From a2429a851cabdc6c3bef54831fc40c94411e831d Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Sat, 16 Jan 2016 22:52:10 +0100 Subject: [PATCH] fix sse implementation --- .../volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h | 8 ++++---- .../volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h index 75be95579..af102dba0 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h @@ -58,7 +58,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, { //r*a.r - i*a.i, i*a.r + r*a.i lv_16sc_t tmp = in_a[n] * in_b[n]; - result[0] = lv_cmake(sat_adds16i(lv_creal(result[0]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[0]), lv_cimag(tmp) )); + result[0] += lv_cmake(sat_adds16i(lv_creal(tmp), lv_creal(tmp)), sat_adds16i(lv_cimag(tmp), lv_cimag(tmp) )); } //printf("generic result = %i,%i", lv_creal(result[0]),lv_cimag(result[0])); } @@ -127,14 +127,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con for (int i = 0; i < 4; ++i) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct += lv_cmake(sat_adds16i(lv_creal(dotProductVector[i]), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProductVector[i]), lv_cimag(dotProductVector[i]))); } } - for (unsigned int i = 0; i < (num_points % 4); ++i) + for (unsigned int i = sse_iters * 4; i < num_points; ++i) { lv_16sc_t tmp = (*_in_a++) * (*_in_b++); - dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp))); + dotProduct += lv_cmake( sat_adds16i(lv_creal(tmp), lv_creal(tmp)), sat_adds16i(lv_cimag(tmp), lv_cimag(tmp))); } *_out = dotProduct; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h index 1336824e0..d01b07c24 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h @@ -74,7 +74,6 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con lv_16sc_t* _out = out; for(unsigned int number = 0; number < sse_iters; number++) { - //std::complex memory structure: real part -> reinterpret_cast(a)[2*i] //imaginery part -> reinterpret_cast(a)[2*i + 1] // a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r] @@ -104,7 +103,7 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con _out += 4; } - for (unsigned int i = 0; i < (num_points % 4); ++i) + for (unsigned int i = sse_iters * 4; i < num_points; ++i) { *_out++ = (*_in_a++) * (*_in_b++); }