mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-12-15 04:30:33 +00:00
fix sse implementations
This commit is contained in:
parent
a817d49e89
commit
38d4d8aa9a
@ -70,7 +70,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a
|
|||||||
cPtr += 16;
|
cPtr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(unsigned int i = 0; i<(num_points % 16); ++i)
|
for(unsigned int i = sse_iters * 16; i < num_points; ++i)
|
||||||
{
|
{
|
||||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||||
}
|
}
|
||||||
@ -134,14 +134,14 @@ static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* a
|
|||||||
|
|
||||||
cVal = _mm_add_epi8(aVal, bVal);
|
cVal = _mm_add_epi8(aVal, bVal);
|
||||||
|
|
||||||
_mm_store_si128((__m128i*)cPtr,cVal); // Store the results back into the C container
|
_mm_store_si128((__m128i*)cPtr, cVal); // Store the results back into the C container
|
||||||
|
|
||||||
aPtr += 16;
|
aPtr += 16;
|
||||||
bPtr += 16;
|
bPtr += 16;
|
||||||
cPtr += 16;
|
cPtr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(unsigned int i = 0; i<(num_points % 16); ++i)
|
for(unsigned int i = sse_iters * 16; i < num_points; ++i)
|
||||||
{
|
{
|
||||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||||
}
|
}
|
||||||
@ -163,7 +163,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_a_generic(char* cVector, const char
|
|||||||
const char* bPtr= bVector;
|
const char* bPtr= bVector;
|
||||||
unsigned int number = 0;
|
unsigned int number = 0;
|
||||||
|
|
||||||
for(number = 0; number < num_points; number++)
|
for(; number < num_points; number++)
|
||||||
{
|
{
|
||||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||||
}
|
}
|
||||||
|
@ -75,7 +75,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const
|
|||||||
c += 16;
|
c += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i<(num_points % 16); ++i)
|
for (unsigned int i = sse_iters * 16; i < num_points; ++i)
|
||||||
{
|
{
|
||||||
*c++ = lv_conj(*a++);
|
*c++ = lv_conj(*a++);
|
||||||
}
|
}
|
||||||
@ -109,7 +109,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, con
|
|||||||
c += 8;
|
c += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i<(num_points % 8); ++i)
|
for (unsigned int i = sse_iters * 8; i < num_points; ++i)
|
||||||
{
|
{
|
||||||
*c++ = lv_conj(*a++);
|
*c++ = lv_conj(*a++);
|
||||||
}
|
}
|
||||||
@ -146,7 +146,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, cons
|
|||||||
c += 8;
|
c += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i<(num_points % 8); ++i)
|
for (unsigned int i = sse_iters * 8; i < num_points; ++i)
|
||||||
{
|
{
|
||||||
*c++ = lv_conj(*a++);
|
*c++ = lv_conj(*a++);
|
||||||
}
|
}
|
||||||
@ -220,7 +220,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const
|
|||||||
c += 16;
|
c += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i<(num_points % 16); ++i)
|
for (unsigned int i = sse_iters * 16; i < num_points; ++i)
|
||||||
{
|
{
|
||||||
*c++ = lv_conj(*a++);
|
*c++ = lv_conj(*a++);
|
||||||
}
|
}
|
||||||
@ -254,7 +254,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, con
|
|||||||
c += 8;
|
c += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i<(num_points % 8); ++i)
|
for (unsigned int i = sse_iters * 8; i < num_points; ++i)
|
||||||
{
|
{
|
||||||
*c++ = lv_conj(*a++);
|
*c++ = lv_conj(*a++);
|
||||||
}
|
}
|
||||||
@ -291,7 +291,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons
|
|||||||
c += 8;
|
c += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i<(num_points % 8); ++i)
|
for (unsigned int i = sse_iters * 8; i < num_points; ++i)
|
||||||
{
|
{
|
||||||
*c++ = lv_conj(*a++);
|
*c++ = lv_conj(*a++);
|
||||||
}
|
}
|
||||||
|
@ -90,7 +90,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeV
|
|||||||
magnitudeVectorPtr += 16;
|
magnitudeVectorPtr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i<(num_points % 16); ++i)
|
for (unsigned int i = sse_iters * 16; i < num_points; ++i)
|
||||||
{
|
{
|
||||||
const char valReal = *complexVectorPtr++;
|
const char valReal = *complexVectorPtr++;
|
||||||
const char valImag = *complexVectorPtr++;
|
const char valImag = *complexVectorPtr++;
|
||||||
@ -226,7 +226,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV
|
|||||||
magnitudeVectorPtr += 16;
|
magnitudeVectorPtr += 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i<(num_points % 16); ++i)
|
for (unsigned int i = sse_iters * 16; i < num_points; ++i)
|
||||||
{
|
{
|
||||||
const char valReal = *complexVectorPtr++;
|
const char valReal = *complexVectorPtr++;
|
||||||
const char valImag = *complexVectorPtr++;
|
const char valImag = *complexVectorPtr++;
|
||||||
|
@ -51,6 +51,7 @@
|
|||||||
*/
|
*/
|
||||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
|
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
|
||||||
{
|
{
|
||||||
|
unsigned int number = 0;
|
||||||
const unsigned int sse_iters = num_points / 8;
|
const unsigned int sse_iters = num_points / 8;
|
||||||
|
|
||||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
|
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
|
||||||
@ -65,7 +66,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector,
|
|||||||
imagy = _mm_and_si128 (imagy, mult1);
|
imagy = _mm_and_si128 (imagy, mult1);
|
||||||
realy = _mm_and_si128 (y, mult1);
|
realy = _mm_and_si128 (y, mult1);
|
||||||
|
|
||||||
for(unsigned int number = 0;number < sse_iters; number++)
|
for(; number < sse_iters; number++)
|
||||||
{
|
{
|
||||||
x = _mm_lddqu_si128((__m128i*)a);
|
x = _mm_lddqu_si128((__m128i*)a);
|
||||||
|
|
||||||
@ -92,7 +93,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector,
|
|||||||
c += 8;
|
c += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i<(num_points % 8); ++i)
|
for (number = sse_iters * 8; number < num_points; ++number)
|
||||||
{
|
{
|
||||||
*c++ = (*a++) * scalar;
|
*c++ = (*a++) * scalar;
|
||||||
}
|
}
|
||||||
@ -164,6 +165,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector,
|
|||||||
*/
|
*/
|
||||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
|
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
|
||||||
{
|
{
|
||||||
|
unsigned int number = 0;
|
||||||
const unsigned int sse_iters = num_points / 8;
|
const unsigned int sse_iters = num_points / 8;
|
||||||
|
|
||||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
|
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
|
||||||
@ -178,7 +180,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
|
|||||||
imagy = _mm_and_si128 (imagy, mult1);
|
imagy = _mm_and_si128 (imagy, mult1);
|
||||||
realy = _mm_and_si128 (y, mult1);
|
realy = _mm_and_si128 (y, mult1);
|
||||||
|
|
||||||
for(unsigned int number = 0;number < sse_iters; number++)
|
for(; number < sse_iters; number++)
|
||||||
{
|
{
|
||||||
x = _mm_load_si128((__m128i*)a);
|
x = _mm_load_si128((__m128i*)a);
|
||||||
|
|
||||||
@ -205,7 +207,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
|
|||||||
c += 8;
|
c += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int i = 0; i<(num_points % 8); ++i)
|
for (number = sse_iters * 8; number < num_points; ++number)
|
||||||
{
|
{
|
||||||
*c++ = (*a++) * scalar;
|
*c++ = (*a++) * scalar;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user