1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-12-14 12:10:34 +00:00

fix sse implementation

This commit is contained in:
Carles Fernandez 2016-01-16 22:49:34 +01:00
parent 3d3a758ef2
commit cd80beb16c

View File

@ -55,7 +55,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, c
const unsigned char* a = aChar;
const unsigned char* b = bChar;
for(unsigned int number = 0;number < sse_iters; number++)
for(unsigned int number = 0; number < sse_iters; number++)
{
x = _mm_lddqu_si128((__m128i*)a);
y = _mm_lddqu_si128((__m128i*)b);
@ -84,7 +84,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, c
c += 16;
}
for (unsigned int i = 0; i<(num_points % 16); ++i)
for (unsigned int i = sse_iters * 16; i < num_points ; ++i)
{
*c++ = (*a++) * (*b++);
}
@ -140,7 +140,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c
const unsigned char* a = aChar;
const unsigned char* b = bChar;
for(unsigned int number = 0;number < sse_iters; number++)
for(unsigned int number = 0; number < sse_iters; number++)
{
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
@ -169,7 +169,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c
c += 16;
}
for (unsigned int i = 0; i<(num_points % 16); ++i)
for (unsigned int i = sse_iters * 16; i < num_points; ++i)
{
*c++ = (*a++) * (*b++);
}