1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2025-01-05 15:00:33 +00:00

fix sse implementation

This commit is contained in:
Carles Fernandez 2016-01-16 22:49:34 +01:00
parent 3d3a758ef2
commit cd80beb16c

View File

@ -55,7 +55,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, c
const unsigned char* a = aChar; const unsigned char* a = aChar;
const unsigned char* b = bChar; const unsigned char* b = bChar;
for(unsigned int number = 0;number < sse_iters; number++) for(unsigned int number = 0; number < sse_iters; number++)
{ {
x = _mm_lddqu_si128((__m128i*)a); x = _mm_lddqu_si128((__m128i*)a);
y = _mm_lddqu_si128((__m128i*)b); y = _mm_lddqu_si128((__m128i*)b);
@ -84,7 +84,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, c
c += 16; c += 16;
} }
for (unsigned int i = 0; i<(num_points % 16); ++i) for (unsigned int i = sse_iters * 16; i < num_points ; ++i)
{ {
*c++ = (*a++) * (*b++); *c++ = (*a++) * (*b++);
} }
@ -140,7 +140,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c
const unsigned char* a = aChar; const unsigned char* a = aChar;
const unsigned char* b = bChar; const unsigned char* b = bChar;
for(unsigned int number = 0;number < sse_iters; number++) for(unsigned int number = 0; number < sse_iters; number++)
{ {
x = _mm_load_si128((__m128i*)a); x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b); y = _mm_load_si128((__m128i*)b);
@ -169,7 +169,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c
c += 16; c += 16;
} }
for (unsigned int i = 0; i<(num_points % 16); ++i) for (unsigned int i = sse_iters * 16; i < num_points; ++i)
{ {
*c++ = (*a++) * (*b++); *c++ = (*a++) * (*b++);
} }