From c8f71204fcc90835217c5dfcc2c63b1380d9630b Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Tue, 31 May 2016 20:56:19 +0200 Subject: [PATCH 1/5] Make kernels compatible with c98 standard so the library can be built by more compilers --- .../volk_gnsssdr/CMakeLists.txt | 4 +- .../volk_gnsssdr_16ic_convert_32fc.h | 43 ++-- .../volk_gnsssdr_16ic_resampler_fast_16ic.h | 3 +- ..._gnsssdr_16ic_resamplerfastxnpuppet_16ic.h | 23 ++- ...volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h | 64 +++--- .../volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h | 39 ++-- .../volk_gnsssdr_16ic_x2_dot_prod_16ic.h | 26 ++- .../volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h | 190 +++++++++--------- ...olk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h | 45 +++-- .../volk_gnsssdr_16ic_x2_multiply_16ic.h | 13 +- ...gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h | 189 +++++++++-------- ...sdr_16ic_x2_rotator_dotprodxnpuppet_16ic.h | 66 +++--- .../volk_gnsssdr_16ic_xn_resampler_16ic_xn.h | 96 +++++---- ...k_gnsssdr_16ic_xn_resampler_fast_16ic_xn.h | 6 +- .../volk_gnsssdr_32f_sincos_32fc.h | 7 +- .../volk_gnsssdr_32fc_convert_16ic.h | 32 +-- .../volk_gnsssdr_32fc_convert_8ic.h | 19 +- ...volk_gnsssdr_32fc_resamplerxnpuppet_32fc.h | 48 ++--- ...gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h | 124 +++++++----- ...sdr_32fc_x2_rotator_dotprodxnpuppet_32fc.h | 42 ++-- .../volk_gnsssdr_32fc_xn_resampler_32fc_xn.h | 93 +++++---- .../volk_gnsssdr_64f_accumulator_64f.h | 39 ++-- .../volk_gnsssdr_8i_accumulator_s8i.h | 21 +- .../volk_gnsssdr_8i_index_max_16u.h | 58 +++--- .../volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h | 38 ++-- .../volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h | 16 +- .../volk_gnsssdr_8ic_conjugate_8ic.h | 46 ++--- .../volk_gnsssdr_8ic_magnitude_squared_8i.h | 17 +- .../volk_gnsssdr_8ic_x2_dot_prod_8ic.h | 42 ++-- .../volk_gnsssdr_8ic_x2_multiply_8ic.h | 31 +-- .../volk_gnsssdr_8u_x2_multiply_8u.h | 16 +- .../volk_gnsssdr_s32f_sincos_32fc.h | 6 +- 32 files changed, 835 insertions(+), 667 deletions(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt index 425193c51..2e7097377 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt @@ -27,8 +27,8 @@ enable_language(CXX) enable_language(C) enable_testing() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11 -Wall") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") option(ENABLE_STRIP "Create a stripped volk_gnsssdr_profile binary (without shared libraries)" OFF) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h index 23f125c12..4ace98414 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h @@ -62,7 +62,8 @@ static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) { - for(unsigned int i = 0; i < num_points; i++) + unsigned int i; + for(i = 0; i < num_points; i++) { outputVector[i] = lv_cmake((float)lv_creal(inputVector[i]), (float)lv_cimag(inputVector[i])); } @@ -76,22 +77,19 @@ static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVecto static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 2; - + unsigned int i; const lv_16sc_t* _in = inputVector; lv_32fc_t* _out = outputVector; __m128 a; - for(unsigned int number = 0; number < sse_iters; number++) + + for(i = 0; i < sse_iters; i++) { - a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg + a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // load (2 byte imag, 2 byte real) x 2 into 128 bits reg _mm_store_ps((float*)_out, a); _in += 2; _out += 2; - //*_out++ = lv_cmake((float)lv_creal(*_in),(float)lv_cimag(*_in)); - //_in++; - //*_out++ = lv_cmake((float)lv_creal(*_in),(float)lv_cimag(*_in)); - //_in++; } - for (unsigned int i = 0; i < (num_points % 2); ++i) + for (i = 0; i < (num_points % 2); ++i) { *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in)); _in++; @@ -106,18 +104,19 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 2; - + unsigned int i; const lv_16sc_t* _in = inputVector; lv_32fc_t* _out = outputVector; __m128 a; - for(unsigned int number = 0; number < sse_iters; number++) + + for(i = 0; i < sse_iters; i++) { a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg _mm_storeu_ps((float*)_out, a); _in += 2; _out += 2; } - for (unsigned int i = 0; i < (num_points % 2); ++i) + for (i = 0; i < (num_points % 2); ++i) { *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in)); _in++; @@ -132,11 +131,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector static inline void volk_gnsssdr_16ic_convert_32fc_u_axv(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 4; - + unsigned int i; const lv_16sc_t* _in = inputVector; lv_32fc_t* _out = outputVector; __m256 a; - for(unsigned int number = 0; number < sse_iters; number++) + + for(i = 0; i < sse_iters; i++) { a = _mm256_set_ps((float)(lv_cimag(_in[3])), (float)(lv_creal(_in[3])), (float)(lv_cimag(_in[2])), (float)(lv_creal(_in[2])), (float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg _mm256_storeu_ps((float*)_out, a); @@ -144,7 +144,7 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_axv(lv_32fc_t* outputVector, _out += 4; } _mm256_zeroupper(); - for (unsigned int i = 0; i < (num_points % 4); ++i) + for(i = 0; i < (num_points % 4); ++i) { *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in)); _in++; @@ -158,11 +158,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_axv(lv_32fc_t* outputVector, static inline void volk_gnsssdr_16ic_convert_32fc_a_axv(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 4; - + unsigned int i; const lv_16sc_t* _in = inputVector; lv_32fc_t* _out = outputVector; __m256 a; - for(unsigned int number = 0; number < sse_iters; number++) + + for(i = 0; i < sse_iters; i++) { a = _mm256_set_ps((float)(lv_cimag(_in[3])), (float)(lv_creal(_in[3])), (float)(lv_cimag(_in[2])), (float)(lv_creal(_in[2])), (float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg _mm256_store_ps((float*)_out, a); @@ -170,7 +171,7 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_axv(lv_32fc_t* outputVector, _out += 4; } _mm256_zeroupper(); - for (unsigned int i = 0; i < (num_points % 4); ++i) + for(i = 0; i < (num_points % 4); ++i) { *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in)); _in++; @@ -185,7 +186,7 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_axv(lv_32fc_t* outputVector, static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 2; - + unsigned int i; const lv_16sc_t* _in = inputVector; lv_32fc_t* _out = outputVector; @@ -193,7 +194,7 @@ static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, int32x4_t a32x4; float32x4_t f32x4; - for(unsigned int number = 0; number < sse_iters; number++) + for(i = 0; i < sse_iters; i++) { a16x4 = vld1_s16((const int16_t*)_in); __builtin_prefetch(_in + 4); @@ -203,7 +204,7 @@ static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, _in += 2; _out += 2; } - for (unsigned int i = 0; i < (num_points % 2); ++i) + for (i = 0; i < (num_points % 2); ++i) { *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in)); _in++; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_fast_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_fast_16ic.h index 51a9cf9bd..38068f9a6 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_fast_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_fast_16ic.h @@ -71,8 +71,9 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples) { int local_code_chip_index; + unsigned int n; //fesetround(FE_TONEAREST); - for (unsigned int n = 0; n < num_output_samples; n++) + for (n = 0; n < num_output_samples; n++) { // resample code for current tap local_code_chip_index = round(code_phase_step_chips * (float)n + rem_code_phase_chips - 0.5f); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic.h index d4409d6a4..cca1ab87d 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic.h @@ -47,10 +47,11 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_generic(lv_16sc_ float code_phase_step_chips = 0.1; int code_length_chips = 2046; int num_out_vectors = 3; + unsigned int n; float* rem_code_phase_chips = (float*)volk_gnsssdr_malloc(sizeof(float) * num_out_vectors, volk_gnsssdr_get_alignment()); - lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { rem_code_phase_chips[n] = -0.234; result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); @@ -59,7 +60,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_generic(lv_16sc_ memcpy((lv_16sc_t*)result, (lv_16sc_t*)result_aux[0], sizeof(lv_16sc_t) * num_points); volk_gnsssdr_free(rem_code_phase_chips); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -75,8 +76,10 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_a_sse2(lv_16sc_t float code_phase_step_chips = 0.1; int code_length_chips = 2046; int num_out_vectors = 3; + unsigned int n; float * rem_code_phase_chips = (float*)volk_gnsssdr_malloc(sizeof(float) * num_out_vectors, volk_gnsssdr_get_alignment()); lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); + for(unsigned int n = 0; n < num_out_vectors; n++) { rem_code_phase_chips[n] = -0.234; @@ -86,7 +89,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_a_sse2(lv_16sc_t memcpy(result, result_aux[0], sizeof(lv_16sc_t) * num_points); volk_gnsssdr_free(rem_code_phase_chips); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -102,9 +105,11 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_u_sse2(lv_16sc_t float code_phase_step_chips = 0.1; int code_length_chips = 2046; int num_out_vectors = 3; + unsigned int n; float * rem_code_phase_chips = (float*)volk_gnsssdr_malloc(sizeof(float) * num_out_vectors, volk_gnsssdr_get_alignment()); lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { rem_code_phase_chips[n] = -0.234; result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); @@ -113,7 +118,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_u_sse2(lv_16sc_t memcpy(result, result_aux[0], sizeof(lv_16sc_t) * num_points); volk_gnsssdr_free(rem_code_phase_chips); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -129,9 +134,11 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_neon(lv_16sc_t* float code_phase_step_chips = 0.1; int code_length_chips = 2046; int num_out_vectors = 3; + unsigned int n; float * rem_code_phase_chips = (float*)volk_gnsssdr_malloc(sizeof(float) * num_out_vectors, volk_gnsssdr_get_alignment()); lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { rem_code_phase_chips[n] = -0.234; result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); @@ -140,7 +147,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_neon(lv_16sc_t* memcpy(result, result_aux[0], sizeof(lv_16sc_t) * num_points); volk_gnsssdr_free(rem_code_phase_chips); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h index f4ca84d87..106ad85b7 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h @@ -47,12 +47,12 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_generic(lv_16sc_t* r float code_phase_step_chips = -0.6; int code_length_chips = 2046; int num_out_vectors = 3; + unsigned int n; float rem_code_phase_chips = -0.234; - float shifts_chips[3] = { -0.1, 0.0, 0.1 }; - lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -61,7 +61,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_generic(lv_16sc_t* r memcpy((lv_16sc_t*)result, (lv_16sc_t*)result_aux[0], sizeof(lv_16sc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -78,11 +78,11 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse3(lv_16sc_t* re int code_length_chips = 2046; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; - lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -91,7 +91,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse3(lv_16sc_t* re memcpy((lv_16sc_t*)result, (lv_16sc_t*)result_aux[0], sizeof(lv_16sc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -107,11 +107,11 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse3(lv_16sc_t* re int code_length_chips = 2046; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; - lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -120,7 +120,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse3(lv_16sc_t* re memcpy((lv_16sc_t*)result, (lv_16sc_t*)result_aux[0], sizeof(lv_16sc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -137,11 +137,11 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse4_1(lv_16sc_t* int code_length_chips = 2046; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; - lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -150,7 +150,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse4_1(lv_16sc_t* memcpy((lv_16sc_t*)result, (lv_16sc_t*)result_aux[0], sizeof(lv_16sc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -167,11 +167,11 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse4_1(lv_16sc_t* int code_length_chips = 2046; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; - lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -180,7 +180,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse4_1(lv_16sc_t* memcpy((lv_16sc_t*)result, (lv_16sc_t*)result_aux[0], sizeof(lv_16sc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -197,11 +197,11 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_avx(lv_16sc_t* res int code_length_chips = 2046; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; - lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -210,7 +210,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_avx(lv_16sc_t* res memcpy((lv_16sc_t*)result, (lv_16sc_t*)result_aux[0], sizeof(lv_16sc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -227,11 +227,11 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_avx(lv_16sc_t* res int code_length_chips = 2046; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; - lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -240,7 +240,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_avx(lv_16sc_t* res memcpy((lv_16sc_t*)result, (lv_16sc_t*)result_aux[0], sizeof(lv_16sc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -257,11 +257,11 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_neon(lv_16sc_t* resu int code_length_chips = 2046; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; - lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -270,7 +270,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_neon(lv_16sc_t* resu memcpy((lv_16sc_t*)result, (lv_16sc_t*)result_aux[0], sizeof(lv_16sc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h index 6b0bb7679..74cb9fc7a 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h @@ -139,6 +139,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic_reload(lv_16s static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) { const unsigned int sse_iters = num_points / 4; + unsigned int number; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128i c1, c2, result; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; @@ -151,14 +152,13 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); const lv_16sc_t* _in = inVector; - lv_16sc_t* _out = outVector; __m128 yl, yh, tmp1, tmp2, tmp3; lv_16sc_t tmp16; lv_32fc_t tmp32; - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg //complex 32fc multiplication b=a*two_phase_acc_reg @@ -221,7 +221,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); (*phase) = two_phase_acc[0]; - for (unsigned int i = sse_iters * 4; i < num_points; ++i) + for (number = sse_iters * 4; number < num_points; ++number) { tmp16 = *_in++; tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); @@ -241,6 +241,8 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc { const unsigned int sse_iters = num_points / 4; const unsigned int ROTATOR_RELOAD = 512; + unsigned int n; + unsigned int j; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128i c1, c2, result; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; @@ -260,9 +262,9 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc lv_16sc_t tmp16; lv_32fc_t tmp32; - for (unsigned int n = 0; n < sse_iters / ROTATOR_RELOAD; n++) + for (n = 0; n < sse_iters / ROTATOR_RELOAD; n++) { - for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) + for (j = 0; j < ROTATOR_RELOAD; j++) { a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg //complex 32fc multiplication b=a*two_phase_acc_reg @@ -319,7 +321,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); } - for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++) + for (j = 0; j < sse_iters % ROTATOR_RELOAD; j++) { a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg //complex 32fc multiplication b=a*two_phase_acc_reg @@ -372,7 +374,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); (*phase) = two_phase_acc[0]; - for (unsigned int i = sse_iters * 4; i < num_points; ++i) + for (n = sse_iters * 4; n < num_points; ++n) { tmp16 = *_in++; tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); @@ -391,6 +393,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) { const unsigned int sse_iters = num_points / 4; + unsigned int number; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128i c1, c2, result; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; @@ -410,7 +413,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out lv_16sc_t tmp16; lv_32fc_t tmp32; - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg //complex 32fc multiplication b=a*two_phase_acc_reg @@ -473,7 +476,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); (*phase) = two_phase_acc[0]; - for (unsigned int i = sse_iters * 4; i < num_points; ++i) + for (number = sse_iters * 4; number < num_points; ++number) { tmp16 = *_in++; tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); @@ -492,6 +495,8 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc { const unsigned int sse_iters = num_points / 4; unsigned int ROTATOR_RELOAD = 512; + unsigned int n; + unsigned int j; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128i c1, c2, result; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; @@ -511,9 +516,9 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc lv_16sc_t tmp16; lv_32fc_t tmp32; - for (unsigned int n = 0; n < sse_iters / ROTATOR_RELOAD; n++) + for (n = 0; n < sse_iters / ROTATOR_RELOAD; n++) { - for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) + for (j = 0; j < ROTATOR_RELOAD; j++) { a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg //complex 32fc multiplication b=a*two_phase_acc_reg @@ -570,7 +575,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); } - for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++) + for (j = 0; j < sse_iters % ROTATOR_RELOAD; j++) { a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg //complex 32fc multiplication b=a*two_phase_acc_reg @@ -623,7 +628,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); (*phase) = two_phase_acc[0]; - for (unsigned int i = sse_iters * 4; i < num_points; ++i) + for (n = sse_iters * 4; n < num_points; ++n) { tmp16 = *_in++; tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); @@ -773,6 +778,8 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t unsigned int i = 0; const unsigned int neon_iters = num_points / 4; const unsigned int ROTATOR_RELOAD = 512; + unsigned int n; + unsigned int j; lv_16sc_t tmp16_; lv_32fc_t tmp32_; @@ -809,9 +816,9 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t if (neon_iters > 0) { - for (unsigned int n = 0; n < neon_iters / ROTATOR_RELOAD; n++) + for (n = 0; n < neon_iters / ROTATOR_RELOAD; n++) { - for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) + for (j = 0; j < ROTATOR_RELOAD; j++) { /* load 4 complex numbers (int 16 bits each component) */ tmp16 = vld2_s16((int16_t*)_in); @@ -880,7 +887,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t _phase_imag = vld1q_f32(____phase_imag); } - for (unsigned int j = 0; j < neon_iters % ROTATOR_RELOAD; j++) + for (j = 0; j < neon_iters % ROTATOR_RELOAD; j++) { /* load 4 complex numbers (int 16 bits each component) */ tmp16 = vld2_s16((int16_t*)_in); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h index 1d1c0beea..fa0b92f99 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h @@ -69,7 +69,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { result[0] = lv_cmake((int16_t)0, (int16_t)0); - for (unsigned int n = 0; n < num_points; n++) + unsigned int n; + for (n = 0; n < num_points; n++) { lv_16sc_t tmp = in_a[n] * in_b[n]; result[0] = lv_cmake(sat_adds16i(lv_creal(result[0]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[0]), lv_cimag(tmp) )); @@ -87,7 +88,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0); const unsigned int sse_iters = num_points / 4; - + unsigned int number; const lv_16sc_t* _in_a = in_a; const lv_16sc_t* _in_b = in_b; lv_16sc_t* _out = out; @@ -103,7 +104,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { // a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r] a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg @@ -137,13 +138,13 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con _mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector - for (int i = 0; i < 4; ++i) + for (number = 0; number < 4; ++number) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[number])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[number]))); } } - for (unsigned int i = 0; i < (num_points % 4); ++i) + for (number = 0; number < (num_points % 4); ++number) { lv_16sc_t tmp = (*_in_a++) * (*_in_b++); dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp))); @@ -168,6 +169,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con const lv_16sc_t* _in_b = in_b; lv_16sc_t* _out = out; unsigned int i; + unsigned int number; if (sse_iters > 0) { @@ -180,7 +182,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { //std::complex memory structure: real part -> reinterpret_cast(a)[2*i] //imaginery part -> reinterpret_cast(a)[2*i + 1] @@ -246,6 +248,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_axv2(lv_16sc_t* out, con const lv_16sc_t* _in_b = in_b; lv_16sc_t* _out = out; unsigned int i; + unsigned int number; if (avx_iters > 0) { @@ -258,7 +261,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_axv2(lv_16sc_t* out, con mask_imag = _mm256_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); mask_real = _mm256_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); - for(unsigned int number = 0; number < avx_iters; number++) + for(number = 0; number < avx_iters; number++) { a = _mm256_loadu_si256((__m256i*)_in_a); __builtin_prefetch(_in_a + 16); @@ -322,6 +325,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_axv2(lv_16sc_t* out, con const lv_16sc_t* _in_b = in_b; lv_16sc_t* _out = out; unsigned int i; + unsigned int number; if (avx_iters > 0) { @@ -334,7 +338,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_axv2(lv_16sc_t* out, con mask_imag = _mm256_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); mask_real = _mm256_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); - for(unsigned int number = 0; number < avx_iters; number++) + for(number = 0; number < avx_iters; number++) { a = _mm256_load_si256((__m256i*)_in_a); __builtin_prefetch(_in_a + 16); @@ -438,9 +442,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const } vst2_s16((int16_t*)accum_result, accumulator); - for (unsigned int i = 0; i < 4; ++i) + for (number = 0; number < 4; ++number) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(accum_result[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(accum_result[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(accum_result[number])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(accum_result[number]))); } *out = dotProduct; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h index dfdf13d3a..19157f8d2 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h @@ -70,10 +70,12 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) { - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + int n_vec; + unsigned int n; + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { result[n_vec] = lv_cmake(0,0); - for (unsigned int n = 0; n < num_points; n++) + for (n = 0; n < num_points; n++) { //r*a.r - i*a.i, i*a.r + r*a.i //result[n_vec]+=in_common[n]*in_a[n_vec][n]; @@ -90,10 +92,12 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic_sat(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) { - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + int n_vec; + unsigned int n; + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { result[n_vec] = lv_cmake(0,0); - for (unsigned int n = 0; n < num_points; n++) + for (n = 0; n < num_points; n++) { lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(in_common[n]), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(in_common[n]), lv_cimag(in_a[n_vec][n]))), sat_adds16i(sat_muls16i(lv_creal(in_common[n]), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(in_common[n]), lv_creal(in_a[n_vec][n])))); @@ -111,7 +115,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic_sat(lv_16sc_t* static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) { lv_16sc_t dotProduct = lv_cmake(0,0); - + int n_vec; + unsigned int index; const unsigned int sse_iters = num_points / 4; const lv_16sc_t** _in_a = in_a; @@ -125,7 +130,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul __m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); __m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm_setzero_si128(); imagcacc[n_vec] = _mm_setzero_si128(); @@ -136,14 +141,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); - for(unsigned int number = 0; number < sse_iters; number++) + for(index = 0; index < sse_iters; index++) { // b[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r] b = _mm_load_si128((__m128i*)_in_common); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg __builtin_prefetch(_in_common + 8); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - a = _mm_load_si128((__m128i*)&(_in_a[n_vec][number*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg + a = _mm_load_si128((__m128i*)&(_in_a[n_vec][index*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, .... @@ -160,12 +165,11 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real); imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag); - } _in_common += 4; } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real); imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag); @@ -174,10 +178,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul _mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (index = 0; index < 4; ++index) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), - sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[index])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[index]))); } _out[n_vec] = dotProduct; } @@ -185,11 +189,11 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul volk_gnsssdr_free(imagcacc); } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - for(unsigned int n = sse_iters * 4; n < num_points; n++) + for(index = sse_iters * 4; index < num_points; index++) { - lv_16sc_t tmp = in_common[n] * in_a[n_vec][n]; + lv_16sc_t tmp = in_common[index] * in_a[n_vec][index]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); @@ -205,7 +209,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) { lv_16sc_t dotProduct = lv_cmake(0,0); - + int n_vec; + unsigned int index; const unsigned int sse_iters = num_points / 4; const lv_16sc_t** _in_a = in_a; @@ -219,7 +224,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul __m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); __m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm_setzero_si128(); imagcacc[n_vec] = _mm_setzero_si128(); @@ -230,14 +235,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); - for(unsigned int number = 0; number < sse_iters; number++) + for(index = 0; index < sse_iters; index++) { // b[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r] b = _mm_loadu_si128((__m128i*)_in_common); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg __builtin_prefetch(_in_common + 8); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - a = _mm_loadu_si128((__m128i*)&(_in_a[n_vec][number*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg + a = _mm_loadu_si128((__m128i*)&(_in_a[n_vec][index*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, .... @@ -258,7 +263,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul _in_common += 4; } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real); imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag); @@ -267,10 +272,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul _mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (index = 0; index < 4; ++index) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), - sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[index])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[index]))); } _out[n_vec] = dotProduct; } @@ -278,11 +283,11 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul volk_gnsssdr_free(imagcacc); } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - for(unsigned int n = sse_iters * 4; n < num_points; n++) + for(index = sse_iters * 4; index < num_points; index++) { - lv_16sc_t tmp = in_common[n] * in_a[n_vec][n]; + lv_16sc_t tmp = in_common[index] * in_a[n_vec][index]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); @@ -298,7 +303,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_avx2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) { lv_16sc_t dotProduct = lv_cmake(0,0); - + int n_vec; + unsigned int index; const unsigned int sse_iters = num_points / 8; const lv_16sc_t** _in_a = in_a; @@ -312,7 +318,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_avx2(lv_16sc_t* resul __m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment()); __m256i* imagcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm256_setzero_si256(); imagcacc[n_vec] = _mm256_setzero_si256(); @@ -323,13 +329,13 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_avx2(lv_16sc_t* resul mask_imag = _mm256_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); mask_real = _mm256_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); - for(unsigned int number = 0; number < sse_iters; number++) + for(index = 0; index < sse_iters; index++) { b = _mm256_load_si256((__m256i*)_in_common); __builtin_prefetch(_in_common + 16); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - a = _mm256_load_si256((__m256i*)&(_in_a[n_vec][number*8])); + a = _mm256_load_si256((__m256i*)&(_in_a[n_vec][index*8])); c = _mm256_mullo_epi16(a, b); @@ -350,7 +356,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_avx2(lv_16sc_t* resul _in_common += 8; } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm256_and_si256(realcacc[n_vec], mask_real); imagcacc[n_vec] = _mm256_and_si256(imagcacc[n_vec], mask_imag); @@ -359,10 +365,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_avx2(lv_16sc_t* resul _mm256_store_si256((__m256i*)dotProductVector, a); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 8; ++i) + for (index = 0; index < 8; ++index) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), - sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[index])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[index]))); } _out[n_vec] = dotProduct; } @@ -371,11 +377,11 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_avx2(lv_16sc_t* resul } _mm256_zeroupper(); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - for(unsigned int n = sse_iters * 8; n < num_points; n++) + for(index = sse_iters * 8; index < num_points; index++) { - lv_16sc_t tmp = in_common[n] * in_a[n_vec][n]; + lv_16sc_t tmp = in_common[index] * in_a[n_vec][index]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); @@ -393,7 +399,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul lv_16sc_t dotProduct = lv_cmake(0,0); const unsigned int sse_iters = num_points / 8; - + int n_vec; + unsigned int index; const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; lv_16sc_t* _out = result; @@ -405,7 +412,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul __m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment()); __m256i* imagcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm256_setzero_si256(); imagcacc[n_vec] = _mm256_setzero_si256(); @@ -416,13 +423,13 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul mask_imag = _mm256_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); mask_real = _mm256_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); - for(unsigned int number = 0; number < sse_iters; number++) + for(index = 0; index < sse_iters; index++) { b = _mm256_loadu_si256((__m256i*)_in_common); __builtin_prefetch(_in_common + 16); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - a = _mm256_loadu_si256((__m256i*)&(_in_a[n_vec][number*8])); + a = _mm256_loadu_si256((__m256i*)&(_in_a[n_vec][index*8])); c = _mm256_mullo_epi16(a, b); @@ -443,7 +450,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul _in_common += 8; } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm256_and_si256(realcacc[n_vec], mask_real); imagcacc[n_vec] = _mm256_and_si256(imagcacc[n_vec], mask_imag); @@ -452,10 +459,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul _mm256_store_si256((__m256i*)dotProductVector, a); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 8; ++i) + for (index = 0; index < 8; ++index) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), - sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[index])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[index]))); } _out[n_vec] = dotProduct; } @@ -466,9 +473,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) { - for(unsigned int n = sse_iters * 8; n < num_points; n++) + for(index = sse_iters * 8; index < num_points; index++) { - lv_16sc_t tmp = in_common[n] * in_a[n_vec][n]; + lv_16sc_t tmp = in_common[index] * in_a[n_vec][index]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); @@ -484,7 +491,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) { lv_16sc_t dotProduct = lv_cmake(0,0); - + int n_vec; + unsigned int index; const unsigned int neon_iters = num_points / 4; const lv_16sc_t** _in_a = in_a; @@ -501,20 +509,20 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result, int16x4x2_t tmp_real, tmp_imag; - for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for(n_vec = 0; n_vec < num_a_vectors; n_vec++) { accumulator[n_vec].val[0] = vdup_n_s16(0); accumulator[n_vec].val[1] = vdup_n_s16(0); } - for(unsigned int number = 0; number < neon_iters; number++) + for(index = 0; index < neon_iters; index++) { b_val = vld2_s16((int16_t*)_in_common); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg __builtin_prefetch(_in_common + 8); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg - //__builtin_prefetch(&_in_a[n_vec][number*4] + 8); + a_val = vld2_s16((int16_t*)&(_in_a[n_vec][index*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg + //__builtin_prefetch(&_in_a[n_vec][index*4] + 8); // multiply the real*real and imag*imag to get real result // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r @@ -537,25 +545,25 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result, _in_common += 4; } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { vst2_s16((int16_t*)dotProductVector, accumulator[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (index = 0; index < 4; ++index) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), - sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[index])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[index]))); } _out[n_vec] = dotProduct; } volk_gnsssdr_free(accumulator); } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - for(unsigned int n = neon_iters * 4; n < num_points; n++) + for(index = neon_iters * 4; index < num_points; index++) { - lv_16sc_t tmp = in_common[n] * in_a[n_vec][n]; + lv_16sc_t tmp = in_common[index] * in_a[n_vec][index]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); @@ -573,7 +581,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res lv_16sc_t dotProduct = lv_cmake(0,0); const unsigned int neon_iters = num_points / 4; - + int n_vec; + unsigned int index; const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; lv_16sc_t* _out = result; @@ -586,19 +595,19 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res int16x4x2_t* accumulator = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); - for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for(n_vec = 0; n_vec < num_a_vectors; n_vec++) { accumulator[n_vec].val[0] = vdup_n_s16(0); accumulator[n_vec].val[1] = vdup_n_s16(0); } - for(unsigned int number = 0; number < neon_iters; number++) + for(index = 0; index < neon_iters; index++) { b_val = vld2_s16((int16_t*)_in_common); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg __builtin_prefetch(_in_common + 8); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); + a_val = vld2_s16((int16_t*)&(_in_a[n_vec][index*4])); tmp.val[0] = vmul_s16(a_val.val[0], b_val.val[0]); tmp.val[1] = vmul_s16(a_val.val[1], b_val.val[0]); @@ -613,25 +622,25 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res _in_common += 4; } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { vst2_s16((int16_t*)dotProductVector, accumulator[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (index = 0; index < 4; ++index) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), - sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[index])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[index]))); } _out[n_vec] = dotProduct; } volk_gnsssdr_free(accumulator); } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - for(unsigned int n = neon_iters * 4; n < num_points; n++) + for(index = neon_iters * 4; index < num_points; index++) { - lv_16sc_t tmp = in_common[n] * in_a[n_vec][n]; + lv_16sc_t tmp = in_common[index] * in_a[n_vec][index]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); @@ -649,7 +658,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t* lv_16sc_t dotProduct = lv_cmake(0,0); const unsigned int neon_iters = num_points / 4; - + int n_vec; + unsigned int index; const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; lv_16sc_t* _out = result; @@ -663,7 +673,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t* int16x4x2_t* accumulator1 = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); int16x4x2_t* accumulator2 = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); - for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for(n_vec = 0; n_vec < num_a_vectors; n_vec++) { accumulator1[n_vec].val[0] = vdup_n_s16(0); accumulator1[n_vec].val[1] = vdup_n_s16(0); @@ -671,13 +681,13 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t* accumulator2[n_vec].val[1] = vdup_n_s16(0); } - for(unsigned int number = 0; number < neon_iters; number++) + for(index = 0; index < neon_iters; index++) { b_val = vld2_s16((int16_t*)_in_common); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg __builtin_prefetch(_in_common + 8); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); + a_val = vld2_s16((int16_t*)&(_in_a[n_vec][index*4])); accumulator1[n_vec].val[0] = vmla_s16(accumulator1[n_vec].val[0], a_val.val[0], b_val.val[0]); accumulator1[n_vec].val[1] = vmla_s16(accumulator1[n_vec].val[1], a_val.val[0], b_val.val[1]); @@ -687,20 +697,20 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t* _in_common += 4; } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { accumulator1[n_vec].val[0] = vqadd_s16(accumulator1[n_vec].val[0], accumulator2[n_vec].val[0]); accumulator1[n_vec].val[1] = vqadd_s16(accumulator1[n_vec].val[1], accumulator2[n_vec].val[1]); } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { vst2_s16((int16_t*)dotProductVector, accumulator1[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (index = 0; index < 4; ++index) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), - sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[index])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[index]))); } _out[n_vec] = dotProduct; } @@ -708,11 +718,11 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t* volk_gnsssdr_free(accumulator2); } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - for(unsigned int n = neon_iters * 4; n < num_points; n++) + for(index = neon_iters * 4; index < num_points; index++) { - lv_16sc_t tmp = in_common[n] * in_a[n_vec][n]; + lv_16sc_t tmp = in_common[index] * in_a[n_vec][index]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h index e68c5ac9a..549fff25d 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h @@ -46,7 +46,8 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic(lv_16sc_t* { int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + unsigned int n; + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -54,7 +55,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic(lv_16sc_t* volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -69,7 +70,8 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic_sat(lv_16sc { int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + unsigned int n; + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -77,7 +79,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic_sat(lv_16sc volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic_sat(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -92,7 +94,8 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* r { int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + unsigned int n; + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -100,7 +103,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* r volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -116,7 +119,8 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_sse2(lv_16sc_t* r { int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + unsigned int n; + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points); @@ -124,7 +128,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_sse2(lv_16sc_t* r volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -140,7 +144,8 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_avx2(lv_16sc_t* r { int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + unsigned int n; + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points); @@ -148,7 +153,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_avx2(lv_16sc_t* r volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_avx2(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -164,7 +169,8 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_avx2(lv_16sc_t* r { int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + unsigned int n; + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points); @@ -172,7 +178,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_avx2(lv_16sc_t* r volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -188,7 +194,8 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon(lv_16sc_t* res { int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + unsigned int n; + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points); @@ -196,7 +203,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon(lv_16sc_t* res volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -212,7 +219,8 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* { int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + unsigned int n; + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points); @@ -220,7 +228,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -235,7 +243,8 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_optvma(lv_16sc { int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + unsigned int n; + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points); @@ -243,7 +252,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_optvma(lv_16sc volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h index aead40de8..c2d05c368 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h @@ -65,7 +65,8 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { - for (unsigned int n = 0; n < num_points; n++) + unsigned int n; + for (n = 0; n < num_points; n++) { //r*a.r - i*a.i, i*a.r + r*a.i result[n] = in_a[n] * in_b[n]; @@ -81,6 +82,7 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result, static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { const unsigned int sse_iters = num_points / 4; + unsigned int number; __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, result; mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); @@ -89,7 +91,7 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con const lv_16sc_t* _in_a = in_a; const lv_16sc_t* _in_b = in_b; lv_16sc_t* _out = out; - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { //std::complex memory structure: real part -> reinterpret_cast(a)[2*i] //imaginery part -> reinterpret_cast(a)[2*i + 1] @@ -120,7 +122,7 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con _out += 4; } - for (unsigned int i = sse_iters * 4; i < num_points; ++i) + for (number = sse_iters * 4; number < num_points; ++number) { *_out++ = (*_in_a++) * (*_in_b++); } @@ -134,6 +136,7 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { const unsigned int sse_iters = num_points / 4; + unsigned int number; __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result; mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); @@ -142,7 +145,7 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, con const lv_16sc_t* _in_a = in_a; const lv_16sc_t* _in_b = in_b; lv_16sc_t* _out = out; - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { //std::complex memory structure: real part -> reinterpret_cast(a)[2*i] //imaginery part -> reinterpret_cast(a)[2*i + 1] @@ -173,7 +176,7 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, con _out += 4; } - for (unsigned int i = sse_iters * 4; i < num_points; ++i) + for (number = sse_iters * 4; number < num_points; ++number) { *_out++ = (*_in_a++) * (*_in_b++); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h index 20d1b1bec..71fe27938 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h @@ -82,11 +82,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc { lv_16sc_t tmp16; lv_32fc_t tmp32; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + int n_vec; + unsigned int n; + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { result[n_vec] = lv_cmake(0,0); } - for (unsigned int n = 0; n < num_points; n++) + for (n = 0; n < num_points; n++) { tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); @@ -105,7 +107,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc } (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { lv_16sc_t tmp = tmp16 * in_a[n_vec][n]; //lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n])))); @@ -123,21 +125,24 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic_reload( { lv_16sc_t tmp16; lv_32fc_t tmp32; + int n_vec; + unsigned int n; + unsigned int j; const unsigned int ROTATOR_RELOAD = 256; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { result[n_vec] = lv_cmake(0,0); } - for (unsigned int n = 0; n < num_points / ROTATOR_RELOAD; n++) + for (n = 0; n < num_points / ROTATOR_RELOAD; n++) { - for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) + for (j = 0; j < ROTATOR_RELOAD; j++) { tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { lv_16sc_t tmp = tmp16 * in_a[n_vec][n * ROTATOR_RELOAD + j]; //lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n])))); @@ -153,13 +158,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic_reload( #endif } - for (unsigned int j = 0; j < num_points % ROTATOR_RELOAD; j++) + for (j = 0; j < num_points % ROTATOR_RELOAD; j++) { tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { lv_16sc_t tmp = tmp16 * in_a[n_vec][ (num_points / ROTATOR_RELOAD) * ROTATOR_RELOAD + j ]; //lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n])))); @@ -179,7 +184,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ lv_16sc_t dotProduct = lv_cmake(0,0); const unsigned int sse_iters = num_points / 4; - + int n_vec; + unsigned int number; + unsigned int n; const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; lv_16sc_t* _out = result; @@ -189,7 +196,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ __m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); __m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm_setzero_si128(); imagcacc[n_vec] = _mm_setzero_si128(); @@ -215,7 +222,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ lv_16sc_t tmp16; lv_32fc_t tmp32; - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { // Phase rotation on operand in_common starts here: //printf("generic phase %i: %f,%f\n", n*4,lv_creal(*phase),lv_cimag(*phase)); @@ -264,7 +271,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ //next two samples _in_common += 2; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a = _mm_load_si128((__m128i*)&(_in_a[n_vec][number*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg @@ -295,7 +302,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real); imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag); @@ -324,14 +331,14 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ //(*phase) = lv_cmake((float*)two_phase_acc[0], (float*)two_phase_acc[1]); (*phase) = two_phase_acc[0]; - for(unsigned int n = sse_iters * 4; n < num_points; n++) + for(n = sse_iters * 4; n < num_points; n++) { tmp16 = in_common[n]; //printf("a_sse phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { lv_16sc_t tmp = tmp16 * in_a[n_vec][n]; //lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n])))); @@ -352,6 +359,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l const unsigned int sse_iters = num_points / 4; const unsigned int ROTATOR_RELOAD = 128; + int n_vec; + unsigned int number; + unsigned int j; + unsigned int n; const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; @@ -362,7 +373,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l __m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); __m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm_setzero_si128(); imagcacc[n_vec] = _mm_setzero_si128(); @@ -388,9 +399,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l lv_16sc_t tmp16; lv_32fc_t tmp32; - for (unsigned int number = 0; number < sse_iters / ROTATOR_RELOAD; ++number) + for (number = 0; number < sse_iters / ROTATOR_RELOAD; ++number) { - for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) + for (j = 0; j < ROTATOR_RELOAD; j++) { // Phase rotation on operand in_common starts here: //printf("generic phase %i: %f,%f\n", n*4,lv_creal(*phase),lv_cimag(*phase)); @@ -439,7 +450,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l //next two samples _in_common += 2; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a = _mm_load_si128((__m128i*)&(_in_a[n_vec][(number * ROTATOR_RELOAD + j) * 4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg @@ -468,7 +479,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); } - for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++) + for (j = 0; j < sse_iters % ROTATOR_RELOAD; j++) { pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg //complex 32fc multiplication b=a*two_phase_acc_reg @@ -515,7 +526,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l //next two samples _in_common += 2; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a = _mm_load_si128((__m128i*)&(_in_a[n_vec][((sse_iters / ROTATOR_RELOAD) * ROTATOR_RELOAD + j) * 4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg @@ -537,7 +548,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real); imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag); @@ -567,14 +578,14 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l //(*phase) = lv_cmake((float*)two_phase_acc[0], (float*)two_phase_acc[1]); (*phase) = two_phase_acc[0]; - for(unsigned int n = sse_iters * 4; n < num_points; n++) + for(n = sse_iters * 4; n < num_points; n++) { tmp16 = in_common[n]; //printf("a_sse phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { lv_16sc_t tmp = tmp16 * in_a[n_vec][n]; //lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n])))); @@ -595,7 +606,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ lv_16sc_t dotProduct = lv_cmake(0,0); const unsigned int sse_iters = num_points / 4; - + int n_vec; + unsigned int number; + unsigned int j; + unsigned int n; const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; @@ -605,7 +619,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ __m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); __m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm_setzero_si128(); imagcacc[n_vec] = _mm_setzero_si128(); @@ -631,7 +645,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ lv_16sc_t tmp16; lv_32fc_t tmp32; - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { // Phase rotation on operand in_common starts here: pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg @@ -680,7 +694,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ //next two samples _in_common += 2; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a = _mm_loadu_si128((__m128i*)&(_in_a[n_vec][number*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg @@ -711,7 +725,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real); imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag); @@ -720,10 +734,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ _mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (j = 0; j < 4; ++j) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), - sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[j])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[j]))); } _out[n_vec] = dotProduct; } @@ -733,13 +747,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); (*phase) = two_phase_acc[0]; - for(unsigned int n = sse_iters * 4; n < num_points; n++) + for(n = sse_iters * 4; n < num_points; n++) { tmp16 = in_common[n]; tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { lv_16sc_t tmp = tmp16 * in_a[n_vec][n]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), @@ -759,6 +773,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_ const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; lv_16sc_t* _out = result; + int n_vec; + unsigned int number; + unsigned int n; lv_16sc_t tmp16; lv_32fc_t tmp32; @@ -769,7 +786,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_ __m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment()); __m256i* imagcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm256_setzero_si256(); imagcacc[n_vec] = _mm256_setzero_si256(); @@ -793,7 +810,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_ __m128 yl, yh, tmp1, tmp2, tmp3; - for(unsigned int number = 0; number < avx2_iters; number++) + for(number = 0; number < avx2_iters; number++) { a = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg //complex 32fc multiplication b=a*two_phase_acc_reg @@ -880,7 +897,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_ result2 = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic _in_common += 2; b2 = _mm256_insertf128_si256(_mm256_castsi128_si256(result1), (result2), 1); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a2 = _mm256_load_si256((__m256i*)&(_in_a[n_vec][number * 8])); @@ -911,7 +928,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_ } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm256_and_si256(realcacc[n_vec], mask_real); imagcacc[n_vec] = _mm256_and_si256(imagcacc[n_vec], mask_imag); @@ -920,10 +937,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_ _mm256_store_si256((__m256i*)dotProductVector, a2); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 8; ++i) + for (number = 0; number < 8; ++number) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), - sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[number])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[number]))); } _out[n_vec] = dotProduct; } @@ -935,13 +952,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_ _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); (*phase) = two_phase_acc[0]; - for(unsigned int n = avx2_iters * 8; n < num_points; n++) + for(n = avx2_iters * 8; n < num_points; n++) { tmp16 = in_common[n]; tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { lv_16sc_t tmp = tmp16 * in_a[n_vec][n]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), @@ -964,7 +981,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; lv_16sc_t* _out = result; - + int n_vec; + unsigned int number; + unsigned int n; + unsigned int j; lv_16sc_t tmp16; lv_32fc_t tmp32; @@ -974,7 +994,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l __m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment()); __m256i* imagcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm256_setzero_si256(); imagcacc[n_vec] = _mm256_setzero_si256(); @@ -998,9 +1018,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l __m128 yl, yh, tmp1, tmp2, tmp3; - for (unsigned int number = 0; number < avx2_iters / ROTATOR_RELOAD; ++number) + for (number = 0; number < avx2_iters / ROTATOR_RELOAD; ++number) { - for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) + for (j = 0; j < ROTATOR_RELOAD; j++) { a = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg //complex 32fc multiplication b=a*two_phase_acc_reg @@ -1087,7 +1107,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l result2 = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic _in_common += 2; b2 = _mm256_insertf128_si256(_mm256_castsi128_si256(result1), (result2), 1); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a2 = _mm256_load_si256((__m256i*)&(_in_a[n_vec][(number * ROTATOR_RELOAD + j) * 8])); @@ -1116,7 +1136,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); } - for (unsigned int j = 0; j < avx2_iters % ROTATOR_RELOAD; j++) + for (j = 0; j < avx2_iters % ROTATOR_RELOAD; j++) { a = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg //complex 32fc multiplication b=a*two_phase_acc_reg @@ -1203,7 +1223,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l result2 = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic _in_common += 2; b2 = _mm256_insertf128_si256(_mm256_castsi128_si256(result1), (result2), 1); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a2 = _mm256_load_si256((__m256i*)&(_in_a[n_vec][((avx2_iters / ROTATOR_RELOAD) * ROTATOR_RELOAD + j) * 8])); @@ -1225,7 +1245,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { realcacc[n_vec] = _mm256_and_si256(realcacc[n_vec], mask_real); imagcacc[n_vec] = _mm256_and_si256(imagcacc[n_vec], mask_imag); @@ -1234,10 +1254,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l _mm256_store_si256((__m256i*)dotProductVector, a2); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 8; ++i) + for (j = 0; j < 8; ++j) { - dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), - sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[j])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[j]))); } _out[n_vec] = dotProduct; } @@ -1248,13 +1268,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); (*phase) = two_phase_acc[0]; - for(unsigned int n = avx2_iters * 8; n < num_points; n++) + for(n = avx2_iters * 8; n < num_points; n++) { tmp16 = in_common[n]; tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { lv_16sc_t tmp = tmp16 * in_a[n_vec][n]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), @@ -1276,7 +1296,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; lv_16sc_t* _out = result; - + int n_vec; + int i; + unsigned int number; + unsigned int n; lv_16sc_t tmp16_, tmp; lv_32fc_t tmp32_; @@ -1315,13 +1338,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* int16x4x2_t* accumulator = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); - for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for(n_vec = 0; n_vec < num_a_vectors; n_vec++) { accumulator[n_vec].val[0] = vdup_n_s16(0); accumulator[n_vec].val[1] = vdup_n_s16(0); } - for(unsigned int number = 0; number < neon_iters; number++) + for(number = 0; number < neon_iters; number++) { /* load 4 complex numbers (int 16 bits each component) */ tmp16 = vld2_s16((int16_t*)_in_common); @@ -1370,7 +1393,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* _phase_real = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]); _phase_imag = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg //__builtin_prefetch(&_in_a[n_vec][number*4] + 8); @@ -1410,11 +1433,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { vst2_s16((int16_t*)dotProductVector, accumulator[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (i = 0; i < 4; ++i) { dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); @@ -1428,13 +1451,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* (*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]); } - for (unsigned int n = neon_iters * 4; n < num_points; n++) + for (n = neon_iters * 4; n < num_points; n++) { tmp16_ = in_common[n]; //printf("neon phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase); tmp16_ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_))); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp = tmp16_ * in_a[n_vec][n]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); @@ -1456,7 +1479,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; lv_16sc_t* _out = result; - + int n_vec; + int i; + unsigned int number; + unsigned int n; lv_16sc_t tmp16_, tmp; lv_32fc_t tmp32_; @@ -1495,13 +1521,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s int16x4x2_t* accumulator = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); - for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for(n_vec = 0; n_vec < num_a_vectors; n_vec++) { accumulator[n_vec].val[0] = vdup_n_s16(0); accumulator[n_vec].val[1] = vdup_n_s16(0); } - for(unsigned int number = 0; number < neon_iters; number++) + for(number = 0; number < neon_iters; number++) { /* load 4 complex numbers (int 16 bits each component) */ tmp16 = vld2_s16((int16_t*)_in_common); @@ -1581,7 +1607,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); @@ -1597,11 +1623,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { vst2_s16((int16_t*)dotProductVector, accumulator[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (i = 0; i < 4; ++i) { dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); @@ -1616,7 +1642,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s (*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]); } - for (unsigned int n = neon_iters * 4; n < num_points; n++) + for (n = neon_iters * 4; n < num_points; n++) { tmp16_ = in_common[n]; //printf("neon phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase); @@ -1644,7 +1670,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_ const lv_16sc_t** _in_a = in_a; const lv_16sc_t* _in_common = in_common; lv_16sc_t* _out = result; - + int n_vec; + int i; + unsigned int number; + unsigned int n; lv_16sc_t tmp16_, tmp; lv_32fc_t tmp32_; @@ -1683,7 +1712,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_ int16x4x2_t* accumulator1 = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); int16x4x2_t* accumulator2 = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); - for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for(n_vec = 0; n_vec < num_a_vectors; n_vec++) { accumulator1[n_vec].val[0] = vdup_n_s16(0); accumulator1[n_vec].val[1] = vdup_n_s16(0); @@ -1691,7 +1720,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_ accumulator2[n_vec].val[1] = vdup_n_s16(0); } - for(unsigned int number = 0; number < neon_iters; number++) + for(number = 0; number < neon_iters; number++) { /* load 4 complex numbers (int 16 bits each component) */ b_val = vld2_s16((int16_t*)_in_common); @@ -1759,7 +1788,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_ _phase_imag = vld1q_f32(____phase_imag); } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); @@ -1770,16 +1799,16 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_ accumulator2[n_vec].val[1] = vmla_s16(accumulator2[n_vec].val[1], a_val.val[1], b_val.val[0]); } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { accumulator1[n_vec].val[0] = vqadd_s16(accumulator1[n_vec].val[0], accumulator2[n_vec].val[0]); accumulator1[n_vec].val[1] = vqadd_s16(accumulator1[n_vec].val[1], accumulator2[n_vec].val[1]); } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { vst2_s16((int16_t*)dotProductVector, accumulator1[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (i = 0; i < 4; ++i) { dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); @@ -1795,7 +1824,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_ (*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]); } - for (unsigned int n = neon_iters * 4; n < num_points; n++) + for (n = neon_iters * 4; n < num_points; n++) { tmp16_ = in_common[n]; //printf("neon phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic.h index 7327812c2..cf002bf6c 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic.h @@ -50,17 +50,17 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic(lv_ phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); } volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(result, local_code, phase_inc[0], phase,(const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -80,17 +80,17 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic_rel phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); } volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic_reload(result, local_code, phase_inc[0], phase,(const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -110,10 +110,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_1 phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -121,7 +121,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_1 volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -141,10 +141,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3_relo phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -152,7 +152,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3_relo volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -172,10 +172,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_1 phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -183,7 +183,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_1 volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -203,10 +203,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_avx2(lv_1 phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -214,7 +214,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_avx2(lv_1 volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -234,10 +234,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_avx2_relo phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -245,7 +245,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_avx2_relo volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -265,10 +265,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_avx2(lv_1 phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -276,7 +276,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_avx2(lv_1 volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -296,10 +296,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_avx2_relo phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -307,7 +307,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_avx2_relo volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -327,10 +327,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon(lv_16s phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -338,7 +338,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon(lv_16s volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -358,10 +358,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon_vma(lv phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); @@ -369,7 +369,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon_vma(lv volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h index 6c22a2b89..4e59aa1d1 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h @@ -74,9 +74,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) { int local_code_chip_index; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + int current_correlator_tap; + int n; + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { - for (int n = 0; n < num_points; n++) + for (n = 0; n < num_points; n++) { // resample code for current tap local_code_chip_index = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -97,7 +99,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse4_1(lv_16sc_t** r { lv_16sc_t** _result = result; const unsigned int quarterPoints = num_points / 4; - + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m128 fours = _mm_set1_ps(4.0f); const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); @@ -111,12 +115,12 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse4_1(lv_16sc_t** r __m128i local_code_chip_index_reg, aux_i, negatives, i; __m128 aux, aux2, shifts_chips_reg, c, cTrunc, base; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); - for(unsigned int n = 0; n < quarterPoints; n++) + for(n = 0; n < quarterPoints; n++) { aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); aux = _mm_add_ps(aux, aux2); @@ -134,13 +138,13 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse4_1(lv_16sc_t** r aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 4; ++k) + for(k = 0; k < 4; ++k) { _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; } indexn = _mm_add_ps(indexn, fours); } - for(unsigned int n = quarterPoints * 4; n < num_points; n++) + for(n = quarterPoints * 4; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -161,7 +165,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse4_1(lv_16sc_t** r { lv_16sc_t** _result = result; const unsigned int quarterPoints = num_points / 4; - + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m128 fours = _mm_set1_ps(4.0f); const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); @@ -175,12 +181,12 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse4_1(lv_16sc_t** r __m128i local_code_chip_index_reg, aux_i, negatives, i; __m128 aux, aux2, shifts_chips_reg, c, cTrunc, base; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); - for(unsigned int n = 0; n < quarterPoints; n++) + for(n = 0; n < quarterPoints; n++) { aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); aux = _mm_add_ps(aux, aux2); @@ -198,13 +204,13 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse4_1(lv_16sc_t** r aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 4; ++k) + for(k = 0; k < 4; ++k) { _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; } indexn = _mm_add_ps(indexn, fours); } - for(unsigned int n = quarterPoints * 4; n < num_points; n++) + for(n = quarterPoints * 4; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -224,8 +230,10 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse4_1(lv_16sc_t** r static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse3(lv_16sc_t** result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) { lv_16sc_t** _result = result; - const unsigned int quarterPoints = num_points / 4; - + const unsigned int quarterPoints = num_points / 4; + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m128 ones = _mm_set1_ps(1.0f); const __m128 fours = _mm_set1_ps(4.0f); const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); @@ -240,12 +248,12 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse3(lv_16sc_t** res __m128i local_code_chip_index_reg, aux_i, negatives, i; __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); - for(unsigned int n = 0; n < quarterPoints; n++) + for(n = 0; n < quarterPoints; n++) { aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); aux = _mm_add_ps(aux, aux2); @@ -266,13 +274,13 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse3(lv_16sc_t** res aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 4; ++k) + for(k = 0; k < 4; ++k) { _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; } indexn = _mm_add_ps(indexn, fours); } - for(unsigned int n = quarterPoints * 4; n < num_points; n++) + for(n = quarterPoints * 4; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -293,7 +301,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse3(lv_16sc_t** res { lv_16sc_t** _result = result; const unsigned int quarterPoints = num_points / 4; - + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m128 ones = _mm_set1_ps(1.0f); const __m128 fours = _mm_set1_ps(4.0f); const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); @@ -308,12 +318,12 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse3(lv_16sc_t** res __m128i local_code_chip_index_reg, aux_i, negatives, i; __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); - for(unsigned int n = 0; n < quarterPoints; n++) + for(n = 0; n < quarterPoints; n++) { aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); aux = _mm_add_ps(aux, aux2); @@ -334,13 +344,13 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse3(lv_16sc_t** res aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 4; ++k) + for(k = 0; k < 4; ++k) { _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; } indexn = _mm_add_ps(indexn, fours); } - for(unsigned int n = quarterPoints * 4; n < num_points; n++) + for(n = quarterPoints * 4; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -360,8 +370,10 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse3(lv_16sc_t** res static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_avx(lv_16sc_t** result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) { lv_16sc_t** _result = result; - const unsigned int avx_iters = num_points / 8; - + const unsigned int avx_iters = num_points / 8; + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m256 eights = _mm256_set1_ps(8.0f); const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips); const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips); @@ -376,12 +388,12 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_avx(lv_16sc_t** resu __m256i local_code_chip_index_reg, i; __m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]); aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); indexn = n0; - for(unsigned int n = 0; n < avx_iters; n++) + for(n = 0; n < avx_iters; n++) { __builtin_prefetch(&_result[current_correlator_tap][8 * n + 7], 1, 0); __builtin_prefetch(&local_code_chip_index[8], 1, 3); @@ -405,7 +417,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_avx(lv_16sc_t** resu local_code_chip_index_reg = _mm256_cvttps_epi32(aux); _mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 8; ++k) + for(k = 0; k < 8; ++k) { _result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]]; } @@ -415,7 +427,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_avx(lv_16sc_t** resu _mm256_zeroupper(); for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { - for(unsigned int n = avx_iters * 8; n < num_points; n++) + for(n = avx_iters * 8; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -436,7 +448,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_avx(lv_16sc_t** resu { lv_16sc_t** _result = result; const unsigned int avx_iters = num_points / 8; - + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m256 eights = _mm256_set1_ps(8.0f); const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips); const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips); @@ -451,12 +465,12 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_avx(lv_16sc_t** resu __m256i local_code_chip_index_reg, i; __m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]); aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); indexn = n0; - for(unsigned int n = 0; n < avx_iters; n++) + for(n = 0; n < avx_iters; n++) { __builtin_prefetch(&_result[current_correlator_tap][8 * n + 7], 1, 0); __builtin_prefetch(&local_code_chip_index[8], 1, 3); @@ -480,7 +494,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_avx(lv_16sc_t** resu local_code_chip_index_reg = _mm256_cvttps_epi32(aux); _mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 8; ++k) + for(k = 0; k < 8; ++k) { _result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]]; } @@ -488,9 +502,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_avx(lv_16sc_t** resu } } _mm256_zeroupper(); - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { - for(unsigned int n = avx_iters * 8; n < num_points; n++) + for(n = avx_iters * 8; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -530,13 +544,15 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal); reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal); // this refinement is required! float32x4_t n0 = vld1q_f32((float*)vec); - - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + int current_correlator_tap; + unsigned int n; + unsigned int k; + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = vdupq_n_f32((float)shifts_chips[current_correlator_tap]); aux2 = vsubq_f32(shifts_chips_reg, rem_code_phase_chips_reg); indexn = n0; - for(unsigned int n = 0; n < neon_iters; n++) + for(n = 0; n < neon_iters; n++) { __builtin_prefetch(&_result[current_correlator_tap][4 * n + 3], 1, 0); __builtin_prefetch(&local_code_chip_index[4]); @@ -564,13 +580,13 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul vst1q_s32((int32_t*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 4; ++k) + for(k = 0; k < 4; ++k) { _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; } indexn = vaddq_f32(indexn, fours); } - for(unsigned int n = neon_iters * 4; n < num_points; n++) + for(n = neon_iters * 4; n < num_points; n++) { __builtin_prefetch(&_result[current_correlator_tap][n], 1, 0); // resample code for current tap diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn.h index 5771bbcac..937a9ef78 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn.h @@ -74,9 +74,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_generic(lv_16sc_t { int local_code_chip_index; //fesetround(FE_TONEAREST); - for (int current_vector = 0; current_vector < num_out_vectors; current_vector++) + int current_vector; + unsigned int n; + for (current_vector = 0; current_vector < num_out_vectors; current_vector++) { - for (unsigned int n = 0; n < num_output_samples; n++) + for (n = 0; n < num_output_samples; n++) { // resample code for current tap local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector] - 0.5f); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h index 5e89da9af..792350d88 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h @@ -560,7 +560,8 @@ static inline void volk_gnsssdr_32f_sincos_32fc_u_sse2(lv_32fc_t* out, const flo static inline void volk_gnsssdr_32f_sincos_32fc_generic(lv_32fc_t* out, const float* in, unsigned int num_points) { float _in; - for(unsigned int i = 0; i < num_points; i++) + unsigned int i; + for(i = 0; i < num_points; i++) { _in = *in++; *out++ = lv_cmake((float)cos(_in), (float)sin(_in) ); @@ -584,8 +585,8 @@ static inline void volk_gnsssdr_32f_sincos_32fc_generic_fxpt(lv_32fc_t* out, con const int32_t Nbits = 10; const int32_t diffbits = bitlength - Nbits; uint32_t ux; - - for(unsigned int i = 0; i < num_points; i++) + unsigned int i; + for(i = 0; i < num_points; i++) { _in = *in++; d = (int32_t)floor(_in / TWO_PI + 0.5); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h index bab43ea7d..831ae99d3 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h @@ -70,7 +70,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector float* inputVectorPtr = (float*)inputVector; int16_t* outputVectorPtr = (int16_t*)outputVector; float aux; - + unsigned int i; const float min_val = (float)SHRT_MIN; const float max_val = (float)SHRT_MAX; @@ -80,7 +80,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector const __m128 vmin_val = _mm_set_ps1(min_val); const __m128 vmax_val = _mm_set_ps1(max_val); - for(unsigned int i = 0; i < sse_iters; i++) + for(i = 0; i < sse_iters; i++) { inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4; inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4; @@ -99,7 +99,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector outputVectorPtr += 8; } - for(unsigned int i = sse_iters * 8; i < num_points * 2; i++) + for(i = sse_iters * 8; i < num_points * 2; i++) { aux = *inputVectorPtr++; if(aux > max_val) @@ -122,6 +122,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, float* inputVectorPtr = (float*)inputVector; int16_t* outputVectorPtr = (int16_t*)outputVector; float aux; + unsigned int i; const float min_val = (float)SHRT_MIN; const float max_val = (float)SHRT_MAX; @@ -132,7 +133,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const __m128 vmin_val = _mm_set_ps1(min_val); const __m128 vmax_val = _mm_set_ps1(max_val); - for(unsigned int i = 0;i < sse_iters; i++) + for(i = 0;i < sse_iters; i++) { inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4; inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4; @@ -151,7 +152,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, outputVectorPtr += 8; } - for(unsigned int i = sse_iters * 8; i < num_points*2; i++) + for(i = sse_iters * 8; i < num_points*2; i++) { aux = *inputVectorPtr++; if(aux > max_val) @@ -174,6 +175,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector float* inputVectorPtr = (float*)inputVector; int16_t* outputVectorPtr = (int16_t*)outputVector; float aux; + unsigned int i; const float min_val = (float)SHRT_MIN; const float max_val = (float)SHRT_MAX; @@ -184,7 +186,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector const __m128 vmin_val = _mm_set_ps1(min_val); const __m128 vmax_val = _mm_set_ps1(max_val); - for(unsigned int i = 0; i < sse_iters; i++) + for(i = 0; i < sse_iters; i++) { inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4; inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4; @@ -203,7 +205,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector outputVectorPtr += 8; } - for(unsigned int i = sse_iters * 8; i < num_points * 2; i++) + for(i = sse_iters * 8; i < num_points * 2; i++) { aux = *inputVectorPtr++; if(aux > max_val) @@ -225,7 +227,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const float min_val = (float)SHRT_MIN; const float max_val = (float)SHRT_MAX; float aux; - + unsigned int i; float* inputVectorPtr = (float*)inputVector; int16_t* outputVectorPtr = (int16_t*)outputVector; @@ -235,7 +237,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const __m128 vmin_val = _mm_set_ps1(min_val); const __m128 vmax_val = _mm_set_ps1(max_val); - for(unsigned int i = 0;i < sse_iters; i++) + for(i = 0; i < sse_iters; i++) { inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4; inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4; @@ -254,7 +256,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, outputVectorPtr += 8; } - for(unsigned int i = sse_iters * 8; i < num_points * 2; i++) + for(i = sse_iters * 8; i < num_points * 2; i++) { aux = *inputVectorPtr++; if(aux > max_val) @@ -280,7 +282,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const float min_val_f = (float)SHRT_MIN; const float max_val_f = (float)SHRT_MAX; float32_t aux; - + unsigned int i; const float32x4_t min_val = vmovq_n_f32(min_val_f); const float32x4_t max_val = vmovq_n_f32(max_val_f); float32x4_t half = vdupq_n_f32(0.5f); @@ -290,7 +292,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, int16x4_t intInputVal1, intInputVal2; int16x8_t res; - for(unsigned int i = 0; i < neon_iters; i++) + for(i = 0; i < neon_iters; i++) { a = vld1q_f32((const float32_t*)(inputVectorPtr)); inputVectorPtr += 4; b = vld1q_f32((const float32_t*)(inputVectorPtr)); inputVectorPtr += 4; @@ -318,7 +320,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, outputVectorPtr += 8; } - for(unsigned int i = neon_iters * 8; i < num_points * 2; i++) + for(i = neon_iters * 8; i < num_points * 2; i++) { aux = *inputVectorPtr++; if(aux > max_val_f) @@ -341,8 +343,8 @@ static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVecto const float min_val = (float)SHRT_MIN; const float max_val = (float)SHRT_MAX; float aux; - - for(unsigned int i = 0; i < num_points * 2; i++) + unsigned int i; + for(i = 0; i < num_points * 2; i++) { aux = *inputVectorPtr++; if(aux > max_val) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h index 6f1de0c05..b04b1072f 100755 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h @@ -71,8 +71,8 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, const float min_val = (float)SCHAR_MIN; const float max_val = (float)SCHAR_MAX; float aux; - - for(unsigned int i = 0; i < num_points * 2; i++) + unsigned int i; + for(i = 0; i < num_points * 2; i++) { aux = *inputVectorPtr++ * max_val; if(aux > max_val) @@ -98,6 +98,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const float min_val = (float)SCHAR_MIN; const float max_val = (float)SCHAR_MAX; float aux; + unsigned int i; __m128 inputVal1, inputVal2, inputVal3, inputVal4; __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4; @@ -106,7 +107,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const __m128 vmin_val = _mm_set_ps1(min_val); const __m128 vmax_val = _mm_set_ps1(max_val); - for(unsigned int i = 0; i < sse_iters; i++) + for(i = 0; i < sse_iters; i++) { inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4; inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4; @@ -137,7 +138,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, outputVectorPtr += 16; } - for(unsigned int i = sse_iters * 16; i < num_points * 2; i++) + for(i = sse_iters * 16; i < num_points * 2; i++) { aux = *inputVectorPtr++ * max_val; if(aux > max_val) @@ -163,6 +164,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const float min_val = (float)SCHAR_MIN; const float max_val = (float)SCHAR_MAX; float aux; + unsigned int i; __m128 inputVal1, inputVal2, inputVal3, inputVal4; __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4; @@ -171,7 +173,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const __m128 vmin_val = _mm_set_ps1(min_val); const __m128 vmax_val = _mm_set_ps1(max_val); - for(unsigned int i = 0; i < sse_iters; i++) + for(i = 0; i < sse_iters; i++) { inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4; inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4; @@ -202,7 +204,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, outputVectorPtr += 16; } - for(unsigned int i = sse_iters * 16; i < num_points * 2; i++) + for(i = sse_iters * 16; i < num_points * 2; i++) { aux = *inputVectorPtr++ * max_val; if(aux > max_val) @@ -227,6 +229,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co const float32_t max_val_f = (float32_t)SCHAR_MAX; const float32_t min_val_f = (float32_t)SCHAR_MIN; float32_t aux; + unsigned int i; const float32x4_t min_val = vmovq_n_f32(min_val_f); const float32x4_t max_val = vmovq_n_f32(max_val_f); @@ -240,7 +243,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co int8x8_t res8_1, res8_2; int8x16_t outputVal; - for(unsigned int i = 0; i < neon_iters; i++) + for(i = 0; i < neon_iters; i++) { a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4; a = vmulq_f32(a, max_val); @@ -290,7 +293,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co outputVectorPtr += 16; } - for(unsigned int i = neon_iters * 16; i < num_points * 2; i++) + for(i = neon_iters * 16; i < num_points * 2; i++) { aux = *inputVectorPtr++ * max_val_f; if(aux > max_val_f) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_resamplerxnpuppet_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_resamplerxnpuppet_32fc.h index dc310f9ff..c109a1be3 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_resamplerxnpuppet_32fc.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_resamplerxnpuppet_32fc.h @@ -50,11 +50,11 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_generic(lv_32fc_t* r int code_length_chips = 1023; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; lv_32fc_t** result_aux = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -63,7 +63,7 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_generic(lv_32fc_t* r memcpy((lv_32fc_t*)result, (lv_32fc_t*)result_aux[0], sizeof(lv_32fc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -80,11 +80,11 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse3(lv_32fc_t* re int code_length_chips = 1023; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; lv_32fc_t** result_aux = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -93,7 +93,7 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse3(lv_32fc_t* re memcpy((lv_32fc_t*)result, (lv_32fc_t*)result_aux[0], sizeof(lv_32fc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -109,11 +109,11 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse3(lv_32fc_t* re int code_length_chips = 1023; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; lv_32fc_t** result_aux = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -122,7 +122,7 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse3(lv_32fc_t* re memcpy((lv_32fc_t*)result, (lv_32fc_t*)result_aux[0], sizeof(lv_32fc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -139,11 +139,11 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse4_1(lv_32fc_t* int code_length_chips = 1023; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; lv_32fc_t** result_aux = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -152,7 +152,7 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse4_1(lv_32fc_t* memcpy((lv_32fc_t*)result, (lv_32fc_t*)result_aux[0], sizeof(lv_32fc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -168,11 +168,11 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse4_1(lv_32fc_t* int code_length_chips = 1023; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; lv_32fc_t** result_aux = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -181,7 +181,7 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse4_1(lv_32fc_t* memcpy((lv_32fc_t*)result, (lv_32fc_t*)result_aux[0], sizeof(lv_32fc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -197,11 +197,11 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_avx(lv_32fc_t* res int code_length_chips = 1023; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; lv_32fc_t** result_aux = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -210,7 +210,7 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_avx(lv_32fc_t* res memcpy((lv_32fc_t*)result, (lv_32fc_t*)result_aux[0], sizeof(lv_32fc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -226,11 +226,11 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx(lv_32fc_t* res int code_length_chips = 1023; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; lv_32fc_t** result_aux = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -239,7 +239,7 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx(lv_32fc_t* res memcpy((lv_32fc_t*)result, (lv_32fc_t*)result_aux[0], sizeof(lv_32fc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } @@ -255,11 +255,11 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_neon(lv_32fc_t* resu int code_length_chips = 1023; int num_out_vectors = 3; float rem_code_phase_chips = -0.234; - + unsigned int n; float shifts_chips[3] = { -0.1, 0.0, 0.1 }; lv_32fc_t** result_aux = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { result_aux[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); } @@ -268,7 +268,7 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_neon(lv_32fc_t* resu memcpy((lv_32fc_t*)result, (lv_32fc_t*)result_aux[0], sizeof(lv_32fc_t) * num_points); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { volk_gnsssdr_free(result_aux[n]); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h index 46d22dfe4..0dff83cfe 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h @@ -81,11 +81,13 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points) { lv_32fc_t tmp32_1, tmp32_2; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + int n_vec; + unsigned int n; + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { result[n_vec] = lv_cmake(0,0); } - for (unsigned int n = 0; n < num_points; n++) + for (n = 0; n < num_points; n++) { tmp32_1 = *in_common++ * (*phase);//if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); @@ -102,7 +104,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic(lv_32fc } (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp32_2 = tmp32_1 * in_a[n_vec][n]; result[n_vec] += tmp32_2; @@ -119,18 +121,21 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic_reload( { lv_32fc_t tmp32_1, tmp32_2; const unsigned int ROTATOR_RELOAD = 256; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + int n_vec; + unsigned int n; + unsigned int j; + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { result[n_vec] = lv_cmake(0,0); } - for (unsigned int n = 0; n < num_points / ROTATOR_RELOAD; n++) + for (n = 0; n < num_points / ROTATOR_RELOAD; n++) { - for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) + for (j = 0; j < ROTATOR_RELOAD; j++) { tmp32_1 = *in_common++ * (*phase); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp32_2 = tmp32_1 * in_a[n_vec][n * ROTATOR_RELOAD + j]; result[n_vec] += tmp32_2; @@ -145,11 +150,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic_reload( #endif } - for (unsigned int j = 0; j < num_points % ROTATOR_RELOAD; j++) + for (j = 0; j < num_points % ROTATOR_RELOAD; j++) { tmp32_1 = *in_common++ * (*phase); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp32_2 = tmp32_1 * in_a[n_vec][(num_points / ROTATOR_RELOAD) * ROTATOR_RELOAD + j]; result[n_vec] += tmp32_2; @@ -167,7 +172,10 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_ lv_32fc_t dotProduct = lv_cmake(0,0); lv_32fc_t tmp32_1, tmp32_2; const unsigned int sse_iters = num_points / 2; - + int n_vec; + int i; + unsigned int number; + unsigned int n; const lv_32fc_t** _in_a = in_a; const lv_32fc_t* _in_common = in_common; @@ -175,7 +183,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_ __m128* acc = (__m128*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { acc[n_vec] = _mm_setzero_ps(); } @@ -195,7 +203,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_ const __m128 ylp = _mm_moveldup_ps(two_phase_inc_reg); const __m128 yhp = _mm_movehdup_ps(two_phase_inc_reg); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { // Phase rotation on operand in_common starts here: a = _mm_loadu_ps((float*)_in_common); @@ -217,7 +225,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_ //next two samples _in_common += 2; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a = _mm_loadu_ps((float*)&(_in_a[n_vec][number*2])); tmp1 = _mm_mul_ps(a, yl); @@ -237,11 +245,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_ } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { _mm_store_ps((float*)dotProductVector, acc[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 2; ++i) + for (i = 0; i < 2; ++i) { dotProduct = dotProduct + dotProductVector[i]; } @@ -252,11 +260,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_ _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); (*phase) = two_phase_acc[0]; - for(unsigned int n = sse_iters * 2; n < num_points; n++) + for(n = sse_iters * 2; n < num_points; n++) { tmp32_1 = in_common[n] * (*phase); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp32_2 = tmp32_1 * in_a[n_vec][n]; result[n_vec] += tmp32_2; @@ -273,7 +281,10 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_ lv_32fc_t dotProduct = lv_cmake(0,0); lv_32fc_t tmp32_1, tmp32_2; const unsigned int sse_iters = num_points / 2; - + int n_vec; + int i; + unsigned int n; + unsigned int number; const lv_32fc_t** _in_a = in_a; const lv_32fc_t* _in_common = in_common; @@ -281,7 +292,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_ __m128* acc = (__m128*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { acc[n_vec] = _mm_setzero_ps(); } @@ -301,7 +312,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_ const __m128 ylp = _mm_moveldup_ps(two_phase_inc_reg); const __m128 yhp = _mm_movehdup_ps(two_phase_inc_reg); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { // Phase rotation on operand in_common starts here: a = _mm_load_ps((float*)_in_common); @@ -323,7 +334,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_ //next two samples _in_common += 2; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a = _mm_load_ps((float*)&(_in_a[n_vec][number*2])); tmp1 = _mm_mul_ps(a, yl); @@ -343,11 +354,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_ } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { _mm_store_ps((float*)dotProductVector, acc[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 2; ++i) + for (i = 0; i < 2; ++i) { dotProduct = dotProduct + dotProductVector[i]; } @@ -358,11 +369,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_ _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); (*phase) = two_phase_acc[0]; - for(unsigned int n = sse_iters * 2; n < num_points; n++) + for(n = sse_iters * 2; n < num_points; n++) { tmp32_1 = in_common[n] * (*phase); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp32_2 = tmp32_1 * in_a[n_vec][n]; result[n_vec] += tmp32_2; @@ -379,7 +390,10 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t lv_32fc_t dotProduct = lv_cmake(0,0); lv_32fc_t tmp32_1, tmp32_2; const unsigned int avx_iters = num_points / 4; - + int n_vec; + int i; + unsigned int number; + unsigned int n; const lv_32fc_t** _in_a = in_a; const lv_32fc_t* _in_common = in_common; lv_32fc_t _phase = (*phase); @@ -388,7 +402,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t __m256* acc = (__m256*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { acc[n_vec] = _mm256_setzero_ps(); result[n_vec] = lv_cmake(0, 0); @@ -417,7 +431,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t const __m256 ylp = _mm256_moveldup_ps(four_phase_inc_reg); const __m256 yhp = _mm256_movehdup_ps(four_phase_inc_reg); - for(unsigned int number = 0; number < avx_iters; number++) + for(number = 0; number < avx_iters; number++) { // Phase rotation on operand in_common starts here: a = _mm256_loadu_ps((float*)_in_common); @@ -439,7 +453,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t //next two samples _in_common += 4; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a = _mm256_loadu_ps((float*)&(_in_a[n_vec][number * 4])); tmp1 = _mm256_mul_ps(a, yl); @@ -459,11 +473,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { _mm256_store_ps((float*)dotProductVector, acc[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (i = 0; i < 4; ++i) { dotProduct = dotProduct + dotProductVector[i]; } @@ -481,11 +495,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t _phase = four_phase_acc[0]; _mm256_zeroupper(); - for(unsigned int n = avx_iters * 4; n < num_points; n++) + for(n = avx_iters * 4; n < num_points; n++) { tmp32_1 = *_in_common++ * _phase; _phase *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp32_2 = tmp32_1 * _in_a[n_vec][n]; result[n_vec] += tmp32_2; @@ -503,7 +517,10 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t lv_32fc_t dotProduct = lv_cmake(0,0); lv_32fc_t tmp32_1, tmp32_2; const unsigned int avx_iters = num_points / 4; - + int n_vec; + int i; + unsigned int number; + unsigned int n; const lv_32fc_t** _in_a = in_a; const lv_32fc_t* _in_common = in_common; lv_32fc_t _phase = (*phase); @@ -512,7 +529,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t __m256* acc = (__m256*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256), volk_gnsssdr_get_alignment()); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { acc[n_vec] = _mm256_setzero_ps(); result[n_vec] = lv_cmake(0, 0); @@ -521,7 +538,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t // phase rotation registers __m256 a, four_phase_acc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z; - __attribute__((aligned(32))) lv_32fc_t four_phase_inc[4]; + __VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_inc[4]; const lv_32fc_t phase_inc2 = phase_inc * phase_inc; const lv_32fc_t phase_inc3 = phase_inc2 * phase_inc; const lv_32fc_t phase_inc4 = phase_inc3 * phase_inc; @@ -531,7 +548,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t four_phase_inc[3] = phase_inc4; const __m256 four_phase_inc_reg = _mm256_load_ps((float*)four_phase_inc); - __attribute__((aligned(32))) lv_32fc_t four_phase_acc[4]; + __VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_acc[4]; four_phase_acc[0] = _phase; four_phase_acc[1] = _phase * phase_inc; four_phase_acc[2] = _phase * phase_inc2; @@ -541,7 +558,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t const __m256 ylp = _mm256_moveldup_ps(four_phase_inc_reg); const __m256 yhp = _mm256_movehdup_ps(four_phase_inc_reg); - for(unsigned int number = 0; number < avx_iters; number++) + for(number = 0; number < avx_iters; number++) { // Phase rotation on operand in_common starts here: a = _mm256_load_ps((float*)_in_common); @@ -563,7 +580,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t //next two samples _in_common += 4; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a = _mm256_load_ps((float*)&(_in_a[n_vec][number * 4])); tmp1 = _mm256_mul_ps(a, yl); @@ -583,11 +600,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { _mm256_store_ps((float*)dotProductVector, acc[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (i = 0; i < 4; ++i) { dotProduct = dotProduct + dotProductVector[i]; } @@ -605,11 +622,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t _phase = four_phase_acc[0]; _mm256_zeroupper(); - for(unsigned int n = avx_iters * 4; n < num_points; n++) + for(n = avx_iters * 4; n < num_points; n++) { tmp32_1 = *_in_common++ * _phase; _phase *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp32_2 = tmp32_1 * _in_a[n_vec][n]; result[n_vec] += tmp32_2; @@ -626,7 +643,10 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points) { const unsigned int neon_iters = num_points / 4; - + int n_vec; + int i; + unsigned int number; + unsigned int n ; const lv_32fc_t** _in_a = in_a; const lv_32fc_t* _in_common = in_common; lv_32fc_t* _out = result; @@ -665,7 +685,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t* float32x4x2_t* accumulator1 = (float32x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(float32x4x2_t), volk_gnsssdr_get_alignment()); float32x4x2_t* accumulator2 = (float32x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(float32x4x2_t), volk_gnsssdr_get_alignment()); - for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for(n_vec = 0; n_vec < num_a_vectors; n_vec++) { accumulator1[n_vec].val[0] = vdupq_n_f32(0.0f); accumulator1[n_vec].val[1] = vdupq_n_f32(0.0f); @@ -673,7 +693,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t* accumulator2[n_vec].val[1] = vdupq_n_f32(0.0f); } - for(unsigned int number = 0; number < neon_iters; number++) + for(number = 0; number < neon_iters; number++) { /* load 4 complex numbers (float 32 bits each component) */ b_val = vld2q_f32((float32_t*)_in_common); @@ -715,7 +735,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t* _phase_imag = vld1q_f32(____phase_imag); } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { a_val = vld2q_f32((float32_t*)&(_in_a[n_vec][number * 4])); @@ -726,16 +746,16 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t* accumulator2[n_vec].val[1] = vmlaq_f32(accumulator2[n_vec].val[1], a_val.val[1], b_val.val[0]); } } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { accumulator1[n_vec].val[0] = vaddq_f32(accumulator1[n_vec].val[0], accumulator2[n_vec].val[0]); accumulator1[n_vec].val[1] = vaddq_f32(accumulator1[n_vec].val[1], accumulator2[n_vec].val[1]); } - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { vst2q_f32((float32_t*)dotProductVector, accumulator1[n_vec]); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (i = 0; i < 4; ++i) { dotProduct = dotProduct + dotProductVector[i]; } @@ -750,11 +770,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t* _phase = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]); } - for(unsigned int n = neon_iters * 4; n < num_points; n++) + for(n = neon_iters * 4; n < num_points; n++) { tmp32_1 = in_common[n] * _phase; _phase *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp32_2 = tmp32_1 * in_a[n_vec][n]; _out[n_vec] += tmp32_2; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc.h index bbd07d8ec..3072542cf 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc.h @@ -50,17 +50,17 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc_generic(lv_ phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_32fc_t** in_a = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_32fc_t*)in_a[n], (lv_32fc_t*)in, sizeof(lv_32fc_t) * num_points); } volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic_reload(result, local_code, phase_inc[0], phase, (const lv_32fc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -80,17 +80,17 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc_generic_rel phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_32fc_t** in_a = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_32fc_t*)in_a[n], (lv_32fc_t*)in, sizeof(lv_32fc_t) * num_points); } volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic_reload(result, local_code, phase_inc[0], phase, (const lv_32fc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -110,17 +110,17 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc_u_sse3(lv_3 phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_32fc_t** in_a = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_32fc_t*)in_a[n], (lv_32fc_t*)in, sizeof(lv_32fc_t) * num_points); } volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(result, local_code, phase_inc[0], phase, (const lv_32fc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -140,17 +140,17 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc_a_sse3(lv_3 phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_32fc_t** in_a = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_32fc_t*)in_a[n], (lv_32fc_t*)in, sizeof(lv_32fc_t) * num_points); } volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(result, local_code, phase_inc[0], phase, (const lv_32fc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -170,17 +170,17 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc_u_avx(lv_32 phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_32fc_t** in_a = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_32fc_t*)in_a[n], (lv_32fc_t*)in, sizeof(lv_32fc_t) * num_points); } volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(result, local_code, phase_inc[0], phase, (const lv_32fc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -200,17 +200,17 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc_a_avx(lv_32 phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_32fc_t** in_a = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_32fc_t*)in_a[n], (lv_32fc_t*)in, sizeof(lv_32fc_t) * num_points); } volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(result, local_code, phase_inc[0], phase, (const lv_32fc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } @@ -230,17 +230,17 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc_neon(lv_32f phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); lv_32fc_t phase_inc[1]; phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); - + unsigned int n; int num_a_vectors = 3; lv_32fc_t** in_a = (lv_32fc_t**)volk_gnsssdr_malloc(sizeof(lv_32fc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { in_a[n] = (lv_32fc_t*)volk_gnsssdr_malloc(sizeof(lv_32fc_t) * num_points, volk_gnsssdr_get_alignment()); memcpy((lv_32fc_t*)in_a[n], (lv_32fc_t*)in, sizeof(lv_32fc_t) * num_points); } volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(result, local_code, phase_inc[0], phase, (const lv_32fc_t**) in_a, num_a_vectors, num_points); - for(unsigned int n = 0; n < num_a_vectors; n++) + for(n = 0; n < num_a_vectors; n++) { volk_gnsssdr_free(in_a[n]); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_xn_resampler_32fc_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_xn_resampler_32fc_xn.h index 3089c9db3..66cebdcbf 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_xn_resampler_32fc_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_xn_resampler_32fc_xn.h @@ -74,9 +74,11 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_generic(lv_32fc_t** result, const lv_32fc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) { int local_code_chip_index; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + int current_correlator_tap; + int n; + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { - for (int n = 0; n < num_points; n++) + for (n = 0; n < num_points; n++) { // resample code for current tap local_code_chip_index = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -97,7 +99,9 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_sse3(lv_32fc_t** res { lv_32fc_t** _result = result; const unsigned int quarterPoints = num_points / 4; - + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m128 ones = _mm_set1_ps(1.0f); const __m128 fours = _mm_set1_ps(4.0f); const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); @@ -112,12 +116,12 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_sse3(lv_32fc_t** res __m128i local_code_chip_index_reg, aux_i, negatives, i; __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); - for(unsigned int n = 0; n < quarterPoints; n++) + for(n = 0; n < quarterPoints; n++) { aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); aux = _mm_add_ps(aux, aux2); @@ -138,13 +142,13 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_sse3(lv_32fc_t** res aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 4; ++k) + for(k = 0; k < 4; ++k) { _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; } indexn = _mm_add_ps(indexn, fours); } - for(unsigned int n = quarterPoints * 4; n < num_points; n++) + for(n = quarterPoints * 4; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -165,7 +169,9 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_sse3(lv_32fc_t** res { lv_32fc_t** _result = result; const unsigned int quarterPoints = num_points / 4; - + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m128 ones = _mm_set1_ps(1.0f); const __m128 fours = _mm_set1_ps(4.0f); const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); @@ -180,12 +186,12 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_sse3(lv_32fc_t** res __m128i local_code_chip_index_reg, aux_i, negatives, i; __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); - for(unsigned int n = 0; n < quarterPoints; n++) + for(n = 0; n < quarterPoints; n++) { aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); aux = _mm_add_ps(aux, aux2); @@ -206,13 +212,13 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_sse3(lv_32fc_t** res aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 4; ++k) + for(k = 0; k < 4; ++k) { _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; } indexn = _mm_add_ps(indexn, fours); } - for(unsigned int n = quarterPoints * 4; n < num_points; n++) + for(n = quarterPoints * 4; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -232,7 +238,9 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_sse4_1(lv_32fc_t** r { lv_32fc_t** _result = result; const unsigned int quarterPoints = num_points / 4; - + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m128 fours = _mm_set1_ps(4.0f); const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); @@ -246,12 +254,12 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_sse4_1(lv_32fc_t** r __m128i local_code_chip_index_reg, aux_i, negatives, i; __m128 aux, aux2, shifts_chips_reg, c, cTrunc, base; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); - for(unsigned int n = 0; n < quarterPoints; n++) + for(n = 0; n < quarterPoints; n++) { aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); aux = _mm_add_ps(aux, aux2); @@ -269,13 +277,13 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_sse4_1(lv_32fc_t** r aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 4; ++k) + for(k = 0; k < 4; ++k) { _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; } indexn = _mm_add_ps(indexn, fours); } - for(unsigned int n = quarterPoints * 4; n < num_points; n++) + for(n = quarterPoints * 4; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -296,7 +304,9 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_sse4_1(lv_32fc_t** r { lv_32fc_t** _result = result; const unsigned int quarterPoints = num_points / 4; - + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m128 fours = _mm_set1_ps(4.0f); const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); @@ -310,12 +320,12 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_sse4_1(lv_32fc_t** r __m128i local_code_chip_index_reg, aux_i, negatives, i; __m128 aux, aux2, shifts_chips_reg, c, cTrunc, base; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); - for(unsigned int n = 0; n < quarterPoints; n++) + for(n = 0; n < quarterPoints; n++) { aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); aux = _mm_add_ps(aux, aux2); @@ -333,13 +343,13 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_sse4_1(lv_32fc_t** r aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 4; ++k) + for(k = 0; k < 4; ++k) { _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; } indexn = _mm_add_ps(indexn, fours); } - for(unsigned int n = quarterPoints * 4; n < num_points; n++) + for(n = quarterPoints * 4; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -360,7 +370,9 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_avx(lv_32fc_t** resu { lv_32fc_t** _result = result; const unsigned int avx_iters = num_points / 8; - + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m256 eights = _mm256_set1_ps(8.0f); const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips); const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips); @@ -375,12 +387,12 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_avx(lv_32fc_t** resu __m256i local_code_chip_index_reg, i; __m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]); aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); indexn = n0; - for(unsigned int n = 0; n < avx_iters; n++) + for(n = 0; n < avx_iters; n++) { __builtin_prefetch(&_result[current_correlator_tap][8 * n + 7], 1, 0); __builtin_prefetch(&local_code_chip_index[8], 1, 3); @@ -404,7 +416,7 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_avx(lv_32fc_t** resu local_code_chip_index_reg = _mm256_cvttps_epi32(aux); _mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 8; ++k) + for(k = 0; k < 8; ++k) { _result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]]; } @@ -412,9 +424,9 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_avx(lv_32fc_t** resu } } _mm256_zeroupper(); - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { - for(unsigned int n = avx_iters * 8; n < num_points; n++) + for(n = avx_iters * 8; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -435,7 +447,9 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_avx(lv_32fc_t** resu { lv_32fc_t** _result = result; const unsigned int avx_iters = num_points / 8; - + int current_correlator_tap; + unsigned int n; + unsigned int k; const __m256 eights = _mm256_set1_ps(8.0f); const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips); const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips); @@ -450,12 +464,12 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_avx(lv_32fc_t** resu __m256i local_code_chip_index_reg, i; __m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn; - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]); aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); indexn = n0; - for(unsigned int n = 0; n < avx_iters; n++) + for(n = 0; n < avx_iters; n++) { __builtin_prefetch(&_result[current_correlator_tap][8 * n + 7], 1, 0); __builtin_prefetch(&local_code_chip_index[8], 1, 3); @@ -479,7 +493,7 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_avx(lv_32fc_t** resu local_code_chip_index_reg = _mm256_cvttps_epi32(aux); _mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 8; ++k) + for(k = 0; k < 8; ++k) { _result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]]; } @@ -487,9 +501,9 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_avx(lv_32fc_t** resu } } _mm256_zeroupper(); - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { - for(unsigned int n = avx_iters * 8; n < num_points; n++) + for(n = avx_iters * 8; n < num_points; n++) { // resample code for current tap local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); @@ -511,6 +525,9 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_neon(lv_32fc_t** resul { lv_32fc_t** _result = result; const unsigned int neon_iters = num_points / 4; + int current_correlator_tap; + unsigned int n; + unsigned int k; const int32x4_t ones = vdupq_n_s32(1); const float32x4_t fours = vdupq_n_f32(4.0f); const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips); @@ -531,12 +548,12 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_neon(lv_32fc_t** resul reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal); // this refinement is required! float32x4_t n0 = vld1q_f32((float*)vec); - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { shifts_chips_reg = vdupq_n_f32((float)shifts_chips[current_correlator_tap]); aux2 = vsubq_f32(shifts_chips_reg, rem_code_phase_chips_reg); indexn = n0; - for(unsigned int n = 0; n < neon_iters; n++) + for(n = 0; n < neon_iters; n++) { __builtin_prefetch(&_result[current_correlator_tap][4 * n + 3], 1, 0); __builtin_prefetch(&local_code_chip_index[4]); @@ -564,13 +581,13 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_neon(lv_32fc_t** resul vst1q_s32((int32_t*)local_code_chip_index, local_code_chip_index_reg); - for(unsigned int k = 0; k < 4; ++k) + for(k = 0; k < 4; ++k) { _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; } indexn = vaddq_f32(indexn, fours); } - for(unsigned int n = neon_iters * 4; n < num_points; n++) + for(n = neon_iters * 4; n < num_points; n++) { __builtin_prefetch(&_result[current_correlator_tap][n], 1, 0); // resample code for current tap diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h index c5985ac3c..e1d577c1e 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h @@ -65,14 +65,15 @@ static inline void volk_gnsssdr_64f_accumulator_64f_u_avx(double* result, const { double returnValue = 0; const unsigned int sse_iters = num_points / 4; - + unsigned int number; + unsigned int i; const double* aPtr = inputBuffer; __VOLK_ATTR_ALIGNED(32) double tempBuffer[4]; __m256d accumulator = _mm256_setzero_pd(); __m256d aVal = _mm256_setzero_pd(); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { aVal = _mm256_loadu_pd(aPtr); accumulator = _mm256_add_pd(accumulator, aVal); @@ -81,12 +82,12 @@ static inline void volk_gnsssdr_64f_accumulator_64f_u_avx(double* result, const _mm256_storeu_pd((double*)tempBuffer, accumulator); - for(unsigned int i = 0; i < 4; ++i) + for(i = 0; i < 4; ++i) { returnValue += tempBuffer[i]; } - for(unsigned int i = 0; i < (num_points % 4); ++i) + for(i = 0; i < (num_points % 4); ++i) { returnValue += (*aPtr++); } @@ -103,14 +104,15 @@ static inline void volk_gnsssdr_64f_accumulator_64f_u_sse3(double* result,const { double returnValue = 0; const unsigned int sse_iters = num_points / 2; - + unsigned int number; + unsigned int i; const double* aPtr = inputBuffer; __VOLK_ATTR_ALIGNED(16) double tempBuffer[2]; __m128d accumulator = _mm_setzero_pd(); __m128d aVal = _mm_setzero_pd(); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { aVal = _mm_loadu_pd(aPtr); accumulator = _mm_add_pd(accumulator, aVal); @@ -119,12 +121,12 @@ static inline void volk_gnsssdr_64f_accumulator_64f_u_sse3(double* result,const _mm_storeu_pd((double*)tempBuffer, accumulator); - for(unsigned int i = 0; i < 2; ++i) + for(i = 0; i < 2; ++i) { returnValue += tempBuffer[i]; } - for(unsigned int i = 0; i < (num_points % 2); ++i) + for(i = 0; i < (num_points % 2); ++i) { returnValue += (*aPtr++); } @@ -140,8 +142,9 @@ static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const { const double* aPtr = inputBuffer; double returnValue = 0; + unsigned int number; - for(unsigned int number = 0;number < num_points; number++) + for(number = 0; number < num_points; number++) { returnValue += (*aPtr++); } @@ -157,14 +160,15 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_avx(double* result,const d { double returnValue = 0; const unsigned int sse_iters = num_points / 4; - + unsigned int number; + unsigned int i; const double* aPtr = inputBuffer; __VOLK_ATTR_ALIGNED(32) double tempBuffer[4]; __m256d accumulator = _mm256_setzero_pd(); __m256d aVal = _mm256_setzero_pd(); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { aVal = _mm256_load_pd(aPtr); accumulator = _mm256_add_pd(accumulator, aVal); @@ -173,12 +177,12 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_avx(double* result,const d _mm256_store_pd((double*)tempBuffer, accumulator); - for(unsigned int i = 0; i < 4; ++i) + for(i = 0; i < 4; ++i) { returnValue += tempBuffer[i]; } - for(unsigned int i = 0; i < (num_points % 4); ++i) + for(i = 0; i < (num_points % 4); ++i) { returnValue += (*aPtr++); } @@ -195,14 +199,15 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const { double returnValue = 0; const unsigned int sse_iters = num_points / 2; - + unsigned int number; + unsigned int i; const double* aPtr = inputBuffer; __VOLK_ATTR_ALIGNED(16) double tempBuffer[2]; __m128d accumulator = _mm_setzero_pd(); __m128d aVal = _mm_setzero_pd(); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { aVal = _mm_load_pd(aPtr); accumulator = _mm_add_pd(accumulator, aVal); @@ -211,12 +216,12 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const _mm_store_pd((double*)tempBuffer, accumulator); - for(unsigned int i = 0; i < 2; ++i) + for(i = 0; i < 2; ++i) { returnValue += tempBuffer[i]; } - for(unsigned int i = 0; i < (num_points % 2); ++i) + for(i = 0; i < (num_points % 2); ++i) { returnValue += (*aPtr++); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h index 5f70310ed..99588fca5 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h @@ -66,14 +66,15 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const ch { char returnValue = 0; const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; const char* aPtr = inputBuffer; __VOLK_ATTR_ALIGNED(16) char tempBuffer[16]; __m128i accumulator = _mm_setzero_si128(); __m128i aVal = _mm_setzero_si128(); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { aVal = _mm_lddqu_si128((__m128i*)aPtr); accumulator = _mm_add_epi8(accumulator, aVal); @@ -81,12 +82,12 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const ch } _mm_storeu_si128((__m128i*)tempBuffer, accumulator); - for(unsigned int i = 0; i < 16; ++i) + for(i = 0; i < 16; ++i) { returnValue += tempBuffer[i]; } - for(unsigned int i = 0; i < (num_points % 16); ++i) + for(i = 0; i < (num_points % 16); ++i) { returnValue += (*aPtr++); } @@ -102,8 +103,8 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const c { const char* aPtr = inputBuffer; char returnValue = 0; - - for(unsigned int number = 0;number < num_points; number++) + unsigned int number; + for(number = 0;number < num_points; number++) { returnValue += (*aPtr++); } @@ -119,6 +120,8 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch { char returnValue = 0; const unsigned int sse_iters = num_points / 16; + unsigned int number; + unsigned int i; const char* aPtr = inputBuffer; @@ -126,7 +129,7 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch __m128i accumulator = _mm_setzero_si128(); __m128i aVal = _mm_setzero_si128(); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { aVal = _mm_load_si128((__m128i*)aPtr); accumulator = _mm_add_epi8(accumulator, aVal); @@ -134,12 +137,12 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch } _mm_store_si128((__m128i*)tempBuffer,accumulator); - for(unsigned int i = 0; i < 16; ++i) + for(i = 0; i < 16; ++i) { returnValue += tempBuffer[i]; } - for(unsigned int i = 0; i < (num_points % 16); ++i) + for(i = 0; i < (num_points % 16); ++i) { returnValue += (*aPtr++); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h index c70ea75d5..75ad588d2 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h @@ -66,7 +66,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, con if(num_points > 0) { const unsigned int sse_iters = num_points / 32; - + unsigned int number; + unsigned int i; char* basePtr = (char*)src0; char* inputPtr = (char*)src0; char max = src0[0]; @@ -78,7 +79,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, con ones = _mm256_set1_epi8(0xFF); maxValues = _mm_set1_epi8(max); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { currentValues = _mm256_lddqu_si256((__m256i*)inputPtr); @@ -95,7 +96,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, con { _mm256_storeu_si256((__m256i*)¤tValuesBuffer, currentValues); - for(unsigned int i = 0; i < 32; i++) + for(i = 0; i < 32; i++) { if(currentValuesBuffer[i] > max) { @@ -109,7 +110,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, con inputPtr += 32; } - for(unsigned int i = 0; i<(num_points % 32); ++i) + for(i = 0; i<(num_points % 32); ++i) { if(src0[i] > max) { @@ -132,7 +133,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, if(num_points > 0) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i = 0; char* basePtr = (char*)src0; char* inputPtr = (char*)src0; char max = src0[0]; @@ -142,7 +144,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, maxValues = _mm_set1_epi8(max); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { currentValues = _mm_lddqu_si128((__m128i*)inputPtr); @@ -152,7 +154,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, { _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues); - for(unsigned int i = 0; i < 16; i++) + for(i = 0; i < 16; i++) { if(currentValuesBuffer[i] > max) { @@ -166,7 +168,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, inputPtr += 16; } - for(unsigned int i = 0; i<(num_points % 16); ++i) + for(i = 0; i<(num_points % 16); ++i) { if(src0[i] > max) { @@ -189,7 +191,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, co if(num_points > 0) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; char* basePtr = (char*)src0; char* inputPtr = (char*)src0; char max = src0[0]; @@ -200,7 +203,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, co maxValues = _mm_set1_epi8(max); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { currentValues = _mm_loadu_si128((__m128i*)inputPtr); compareResults = _mm_cmpgt_epi8(maxValues, currentValues); @@ -210,7 +213,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, co { _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues); mask = ~mask; - unsigned int i = 0; + i = 0; while (mask > 0) { if ((mask & 1) == 1) @@ -229,7 +232,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, co inputPtr += 16; } - for(unsigned int i = 0; i<(num_points % 16); ++i) + for(i = 0; i<(num_points % 16); ++i) { if(src0[i] > max) { @@ -252,8 +255,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, c { char max = src0[0]; unsigned int index = 0; - - for(unsigned int i = 1; i < num_points; ++i) + unsigned int i; + for(i = 1; i < num_points; ++i) { if(src0[i] > max) { @@ -276,7 +279,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con if(num_points > 0) { const unsigned int sse_iters = num_points / 32; - + unsigned int number; + unsigned int i; char* basePtr = (char*)src0; char* inputPtr = (char*)src0; char max = src0[0]; @@ -288,7 +292,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con ones = _mm256_set1_epi8(0xFF); maxValues = _mm_set1_epi8(max); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { currentValues = _mm256_load_si256((__m256i*)inputPtr); @@ -305,7 +309,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con { _mm256_store_si256((__m256i*)¤tValuesBuffer, currentValues); - for(unsigned int i = 0; i < 32; i++) + for(i = 0; i < 32; i++) { if(currentValuesBuffer[i] > max) { @@ -319,7 +323,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con inputPtr += 32; } - for(unsigned int i = 0; i<(num_points % 32); ++i) + for(i = 0; i<(num_points % 32); ++i) { if(src0[i] > max) { @@ -342,7 +346,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, if(num_points > 0) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; char* basePtr = (char*)src0; char* inputPtr = (char*)src0; char max = src0[0]; @@ -352,7 +357,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, maxValues = _mm_set1_epi8(max); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { currentValues = _mm_load_si128((__m128i*)inputPtr); @@ -362,7 +367,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, { _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues); - for(unsigned int i = 0; i < 16; i++) + for(i = 0; i < 16; i++) { if(currentValuesBuffer[i] > max) { @@ -376,7 +381,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, inputPtr += 16; } - for(unsigned int i = 0; i<(num_points % 16); ++i) + for(i = 0; i<(num_points % 16); ++i) { if(src0[i] > max) { @@ -399,7 +404,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, co if(num_points > 0) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; char* basePtr = (char*)src0; char* inputPtr = (char*)src0; char max = src0[0]; @@ -410,7 +416,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, co maxValues = _mm_set1_epi8(max); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { currentValues = _mm_load_si128((__m128i*)inputPtr); compareResults = _mm_cmpgt_epi8(maxValues, currentValues); @@ -420,7 +426,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, co { _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues); mask = ~mask; - unsigned int i = 0; + i = 0; while (mask > 0) { if ((mask & 1) == 1) @@ -439,7 +445,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, co inputPtr += 16; } - for(unsigned int i = 0; i<(num_points % 16); ++i) + for(i = 0; i<(num_points % 16); ++i) { if(src0[i] > max) { diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h index 22d9c193a..2e3bad400 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h @@ -66,7 +66,8 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char* target, const char* sr if(num_points > 0) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; char* inputPtr = (char*)src0; char max = src0[0]; __VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16]; @@ -74,7 +75,7 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char* target, const char* sr maxValues = _mm_set1_epi8(max); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { currentValues = _mm_loadu_si128((__m128i*)inputPtr); compareResults = _mm_cmpgt_epi8(maxValues, currentValues); @@ -84,7 +85,7 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char* target, const char* sr _mm_storeu_si128((__m128i*)maxValuesBuffer, maxValues); - for(unsigned int i = 0; i<16; ++i) + for(i = 0; i < 16; ++i) { if(maxValuesBuffer[i] > max) { @@ -92,7 +93,7 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char* target, const char* sr } } - for(unsigned int i = sse_iters * 16; i< num_points; ++i) + for(i = sse_iters * 16; i < num_points; ++i) { if(src0[i] > max) { @@ -114,7 +115,8 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char* target, const char* src0 if(num_points > 0) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; char* inputPtr = (char*)src0; char max = src0[0]; unsigned short mask; @@ -123,7 +125,7 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char* target, const char* src0 maxValues = _mm_set1_epi8(max); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { currentValues = _mm_loadu_si128((__m128i*)inputPtr); compareResults = _mm_cmpgt_epi8(maxValues, currentValues); @@ -133,7 +135,7 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char* target, const char* src0 { _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues); mask = ~mask; - int i = 0; + i = 0; while (mask > 0) { if ((mask & 1) == 1) @@ -151,7 +153,7 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char* target, const char* src0 inputPtr += 16; } - for(unsigned int i = sse_iters * 16; i < num_points; ++i) + for(i = sse_iters * 16; i < num_points; ++i) { if(src0[i] > max) { @@ -172,8 +174,8 @@ static inline void volk_gnsssdr_8i_max_s8i_generic(char* target, const char* src if(num_points > 0) { char max = src0[0]; - - for(unsigned int i = 1; i < num_points; ++i) + unsigned int i; + for(i = 1; i < num_points; ++i) { if(src0[i] > max) { @@ -195,7 +197,8 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char* target, const char* sr if(num_points > 0) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; char* inputPtr = (char*)src0; char max = src0[0]; __VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16]; @@ -203,7 +206,7 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char* target, const char* sr maxValues = _mm_set1_epi8(max); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { currentValues = _mm_load_si128((__m128i*)inputPtr); compareResults = _mm_cmpgt_epi8(maxValues, currentValues); @@ -213,7 +216,7 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char* target, const char* sr _mm_store_si128((__m128i*)maxValuesBuffer, maxValues); - for(unsigned int i = 0; i<16; ++i) + for(i = 0; i < 16; ++i) { if(maxValuesBuffer[i] > max) { @@ -221,7 +224,7 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char* target, const char* sr } } - for(unsigned int i = sse_iters * 16; i < num_points; ++i) + for(i = sse_iters * 16; i < num_points; ++i) { if(src0[i] > max) { @@ -243,7 +246,8 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char* target, const char* src0 if(num_points > 0) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; char* inputPtr = (char*)src0; char max = src0[0]; unsigned short mask; @@ -252,7 +256,7 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char* target, const char* src0 maxValues = _mm_set1_epi8(max); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { currentValues = _mm_load_si128((__m128i*)inputPtr); compareResults = _mm_cmpgt_epi8(maxValues, currentValues); @@ -280,7 +284,7 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char* target, const char* src0 inputPtr += 16; } - for(unsigned int i = sse_iters * 16; i < num_points; ++i) + for(i = sse_iters * 16; i < num_points; ++i) { if(src0[i] > max) { diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h index 453c7c2b4..54460a3a2 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h @@ -64,14 +64,15 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; char* cPtr = cVector; const char* aPtr = aVector; const char* bPtr = bVector; __m128i aVal, bVal, cVal; - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { aVal = _mm_loadu_si128((__m128i*)aPtr); bVal = _mm_loadu_si128((__m128i*)bPtr); @@ -85,7 +86,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a cPtr += 16; } - for(unsigned int i = sse_iters * 16; i < num_points; ++i) + for(i = sse_iters * 16; i < num_points; ++i) { *cPtr++ = (*aPtr++) + (*bPtr++); } @@ -100,7 +101,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char* char* cPtr = cVector; const char* aPtr = aVector; const char* bPtr = bVector; - unsigned int number = 0; + unsigned int number; for(number = 0; number < num_points; number++) { @@ -116,14 +117,15 @@ static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char* static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; char* cPtr = cVector; const char* aPtr = aVector; const char* bPtr = bVector; __m128i aVal, bVal, cVal; - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { aVal = _mm_load_si128((__m128i*)aPtr); bVal = _mm_load_si128((__m128i*)bPtr); @@ -137,7 +139,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* a cPtr += 16; } - for(unsigned int i = sse_iters * 16; i < num_points; ++i) + for(i = sse_iters * 16; i < num_points; ++i) { *cPtr++ = (*aPtr++) + (*bPtr++); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h index 33e659717..b49952c9b 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h @@ -65,7 +65,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 16; - + unsigned int i; lv_8sc_t* c = cVector; const lv_8sc_t* a = aVector; @@ -74,7 +74,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const __m256 conjugator1 = _mm256_castsi256_ps(_mm256_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255)); __m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); - for (unsigned int i = 0; i < sse_iters; ++i) + for (i = 0; i < sse_iters; ++i) { tmp = _mm256_loadu_ps((float*)a); tmp = _mm256_xor_ps(tmp, conjugator1); @@ -90,7 +90,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const c += 16; } - for (unsigned int i = sse_iters * 16; i < num_points; ++i) + for (i = sse_iters * 16; i < num_points; ++i) { *c++ = lv_conj(*a++); } @@ -104,14 +104,14 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 8; - + unsigned int i; lv_8sc_t* c = cVector; const lv_8sc_t* a = aVector; __m128i tmp; __m128i conjugator = _mm_setr_epi8(1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1); - for (unsigned int i = 0; i < sse_iters; ++i) + for (i = 0; i < sse_iters; ++i) { tmp = _mm_lddqu_si128((__m128i*)a); tmp = _mm_sign_epi8(tmp, conjugator); @@ -120,7 +120,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, con c += 8; } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { *c++ = lv_conj(*a++); } @@ -135,7 +135,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, con static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 8; - + unsigned int i; lv_8sc_t* c = cVector; const lv_8sc_t* a = aVector; __m128i tmp; @@ -143,7 +143,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, cons __m128i conjugator1 = _mm_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); __m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); - for (unsigned int i = 0; i < sse_iters; ++i) + for (i = 0; i < sse_iters; ++i) { tmp = _mm_lddqu_si128((__m128i*)a); tmp = _mm_xor_si128(tmp, conjugator1); @@ -153,7 +153,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, cons c += 8; } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { *c++ = lv_conj(*a++); } @@ -168,7 +168,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, con { lv_8sc_t* cPtr = cVector; const lv_8sc_t* aPtr = aVector; - unsigned int number = 0; + unsigned int number; for(number = 0; number < num_points; number++) { @@ -184,7 +184,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, con static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 16; - + unsigned int i; lv_8sc_t* c = cVector; const lv_8sc_t* a = aVector; @@ -193,7 +193,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const __m256 conjugator1 = _mm256_castsi256_ps(_mm256_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255)); __m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); - for (unsigned int i = 0; i < sse_iters; ++i) + for (i = 0; i < sse_iters; ++i) { tmp = _mm256_load_ps((float*)a); tmp = _mm256_xor_ps(tmp, conjugator1); @@ -209,7 +209,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const c += 16; } - for (unsigned int i = sse_iters * 16; i < num_points; ++i) + for (i = sse_iters * 16; i < num_points; ++i) { *c++ = lv_conj(*a++); } @@ -223,14 +223,13 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 8; - + unsigned int i; lv_8sc_t* c = cVector; const lv_8sc_t* a = aVector; __m128i tmp; - __m128i conjugator = _mm_setr_epi8(1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1); - for (unsigned int i = 0; i < sse_iters; ++i) + for (i = 0; i < sse_iters; ++i) { tmp = _mm_load_si128((__m128i*)a); tmp = _mm_sign_epi8(tmp, conjugator); @@ -239,11 +238,10 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, con c += 8; } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { *c++ = lv_conj(*a++); } - } #endif /* LV_HAVE_SSSE3 */ @@ -254,7 +252,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, con static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 8; - + unsigned int i; lv_8sc_t* c = cVector; const lv_8sc_t* a = aVector; __m128i tmp; @@ -262,7 +260,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons __m128i conjugator1 = _mm_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); __m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); - for (unsigned int i = 0; i < sse_iters; ++i) + for (i = 0; i < sse_iters; ++i) { tmp = _mm_load_si128((__m128i*)a); tmp = _mm_xor_si128(tmp, conjugator1); @@ -272,7 +270,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons c += 8; } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { *c++ = lv_conj(*a++); } @@ -297,12 +295,12 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const static inline void volk_gnsssdr_8ic_conjugate_8ic_neon(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 8; + unsigned int i; lv_8sc_t* c = cVector; const lv_8sc_t* a = aVector; - int8x8x2_t a_val; - for (unsigned int i = 0; i < sse_iters; ++i) + for (i = 0; i < sse_iters; ++i) { a_val = vld2_s8((const int8_t*)a); __builtin_prefetch(a + 16); @@ -312,7 +310,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_neon(lv_8sc_t* cVector, const c += 8; } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { *c++ = lv_conj(*a++); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h index ebf7b763f..7152b0f29 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h @@ -65,7 +65,8 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; const char* complexVectorPtr = (char*)complexVector; char* magnitudeVectorPtr = magnitudeVector; @@ -77,7 +78,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeV maska = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); maskb = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - for(unsigned int number = 0;number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { avector = _mm_lddqu_si128((__m128i*)complexVectorPtr); avectorlo = _mm_unpacklo_epi8 (avector, zero); @@ -104,7 +105,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeV magnitudeVectorPtr += 16; } - for (unsigned int i = sse_iters * 16; i < num_points; ++i) + for (i = sse_iters * 16; i < num_points; ++i) { const char valReal = *complexVectorPtr++; const char valImag = *complexVectorPtr++; @@ -160,8 +161,8 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitude { const char* complexVectorPtr = (char*)complexVector; char* magnitudeVectorPtr = magnitudeVector; - - for(unsigned int number = 0; number < num_points; number++) + unsigned int number; + for(number = 0; number < num_points; number++) { const char real = *complexVectorPtr++; const char imag = *complexVectorPtr++; @@ -180,6 +181,8 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV const char* complexVectorPtr = (char*)complexVector; char* magnitudeVectorPtr = magnitudeVector; + unsigned int number; + unsigned int i; __m128i zero, result8; __m128i avector, avectorhi, avectorlo, avectorlomult, avectorhimult, aadded, maska; @@ -189,7 +192,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV maska = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); maskb = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); - for(unsigned int number = 0;number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { avector = _mm_load_si128((__m128i*)complexVectorPtr); avectorlo = _mm_unpacklo_epi8 (avector, zero); @@ -216,7 +219,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV magnitudeVectorPtr += 16; } - for (unsigned int i = sse_iters * 16; i < num_points; ++i) + for (i = sse_iters * 16; i < num_points; ++i) { const char valReal = *complexVectorPtr++; const char valImag = *complexVectorPtr++; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h index 643c93cbb..b9da5957c 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h @@ -116,13 +116,14 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con { lv_8sc_t dotProduct; memset(&dotProduct, 0x0, 2*sizeof(char)); - + unsigned int number; + unsigned int i; const lv_8sc_t* a = in_a; const lv_8sc_t* b = in_b; const unsigned int sse_iters = num_points/8; - if (sse_iters>0) + if (sse_iters > 0) { __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc; @@ -130,7 +131,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con realcacc = _mm_setzero_si128(); imagcacc = _mm_setzero_si128(); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { x = _mm_loadu_si128((__m128i*)a); y = _mm_loadu_si128((__m128i*)b); @@ -168,13 +169,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con _mm_storeu_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector - for (int i = 0; i < 8; ++i) + for (i = 0; i < 8; ++i) { dotProduct += dotProductVector[i]; } } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { dotProduct += (*a++) * (*b++); } @@ -192,13 +193,14 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c { lv_8sc_t dotProduct; memset(&dotProduct, 0x0, 2*sizeof(char)); - + unsigned int number; + unsigned int i; const lv_8sc_t* a = in_a; const lv_8sc_t* b = in_b; const unsigned int sse_iters = num_points/8; - if (sse_iters>0) + if (sse_iters > 0) { __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc; @@ -206,7 +208,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c realcacc = _mm_setzero_si128(); imagcacc = _mm_setzero_si128(); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { x = _mm_lddqu_si128((__m128i*)a); y = _mm_lddqu_si128((__m128i*)b); @@ -242,13 +244,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c _mm_storeu_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector - for (unsigned int i = 0; i < 8; ++i) + for (i = 0; i < 8; ++i) { dotProduct += dotProductVector[i]; } } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { dotProduct += (*a++) * (*b++); } @@ -266,13 +268,14 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con { lv_8sc_t dotProduct; memset(&dotProduct, 0x0, 2*sizeof(char)); - + unsigned int number; + unsigned int i; const lv_8sc_t* a = in_a; const lv_8sc_t* b = in_b; const unsigned int sse_iters = num_points/8; - if (sse_iters>0) + if (sse_iters > 0) { __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc; @@ -280,7 +283,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con realcacc = _mm_setzero_si128(); imagcacc = _mm_setzero_si128(); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { x = _mm_load_si128((__m128i*)a); y = _mm_load_si128((__m128i*)b); @@ -318,13 +321,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con _mm_store_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector - for (unsigned int i = 0; i < 8; ++i) + for (i = 0; i < 8; ++i) { dotProduct += dotProductVector[i]; } } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { dotProduct += (*a++) * (*b++); } @@ -341,7 +344,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c { lv_8sc_t dotProduct; memset(&dotProduct, 0x0, 2*sizeof(char)); - + unsigned int number; + unsigned int i; const lv_8sc_t* a = in_a; const lv_8sc_t* b = in_b; @@ -355,7 +359,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c realcacc = _mm_setzero_si128(); imagcacc = _mm_setzero_si128(); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { x = _mm_load_si128((__m128i*)a); y = _mm_load_si128((__m128i*)b); @@ -391,13 +395,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c _mm_store_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector - for (unsigned int i = 0; i < 8; ++i) + for (i = 0; i < 8; ++i) { dotProduct += dotProductVector[i]; } } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { dotProduct += (*a++) * (*b++); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h index 8d7ad6573..185ffa39f 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h @@ -66,7 +66,8 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 8; - + unsigned int number; + unsigned int i; __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc; lv_8sc_t* c = cVector; const lv_8sc_t* a = aVector; @@ -74,7 +75,7 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { x = _mm_loadu_si128((__m128i*)a); y = _mm_loadu_si128((__m128i*)b); @@ -107,7 +108,7 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co c += 8; } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { *c++ = (*a++) * (*b++); } @@ -121,7 +122,8 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 8; - + unsigned int number; + unsigned int i; __m128i x, y; __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc; lv_8sc_t* c = cVector; @@ -131,7 +133,7 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, _mm_setzero_si128(); mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { x = _mm_lddqu_si128((__m128i*)a); y = _mm_lddqu_si128((__m128i*)b); @@ -162,7 +164,7 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, c += 8; } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { *c++ = (*a++) * (*b++); } @@ -177,8 +179,9 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c lv_8sc_t* cPtr = cVector; const lv_8sc_t* aPtr = aVector; const lv_8sc_t* bPtr = bVector; + unsigned int number; - for(unsigned int number = 0; number < num_points; number++) + for(number = 0; number < num_points; number++) { *cPtr++ = (*aPtr++) * (*bPtr++); } @@ -192,7 +195,8 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 8; - + unsigned int number; + unsigned int i; __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc; lv_8sc_t* c = cVector; const lv_8sc_t* a = aVector; @@ -200,7 +204,7 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { x = _mm_load_si128((__m128i*)a); y = _mm_load_si128((__m128i*)b); @@ -233,7 +237,7 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co c += 8; } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { *c++ = (*a++) * (*b++); } @@ -247,7 +251,8 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 8; - + unsigned int number; + unsigned int i; __m128i x, y; __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc; lv_8sc_t* c = cVector; @@ -257,7 +262,7 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, _mm_setzero_si128(); mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { x = _mm_load_si128((__m128i*)a); y = _mm_load_si128((__m128i*)b); @@ -288,7 +293,7 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, c += 8; } - for (unsigned int i = sse_iters * 8; i < num_points; ++i) + for (i = sse_iters * 8; i < num_points; ++i) { *c++ = (*a++) * (*b++); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h index d6531666d..156f83a48 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h @@ -65,13 +65,15 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points) { const unsigned int sse_iters = num_points / 16; + unsigned int number; + unsigned int i; __m128i x, y, x1, x2, y1, y2, mult1, x1_mult_y1, x2_mult_y2, tmp, tmp1, tmp2, totalc; unsigned char* c = cChar; const unsigned char* a = aChar; const unsigned char* b = bChar; - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { x = _mm_lddqu_si128((__m128i*)a); y = _mm_lddqu_si128((__m128i*)b); @@ -100,7 +102,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, c c += 16; } - for (unsigned int i = sse_iters * 16; i < num_points ; ++i) + for (i = sse_iters * 16; i < num_points ; ++i) { *c++ = (*a++) * (*b++); } @@ -114,8 +116,9 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar, unsigned char* cPtr = cChar; const unsigned char* aPtr = aChar; const unsigned char* bPtr = bChar; + unsigned int number; - for(unsigned int number = 0; number < num_points; number++) + for(number = 0; number < num_points; number++) { *cPtr++ = (*aPtr++) * (*bPtr++); } @@ -129,13 +132,14 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar, static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points) { const unsigned int sse_iters = num_points / 16; - + unsigned int number; + unsigned int i; __m128i x, y, x1, x2, y1, y2, mult1, x1_mult_y1, x2_mult_y2, tmp, tmp1, tmp2, totalc; unsigned char* c = cChar; const unsigned char* a = aChar; const unsigned char* b = bChar; - for(unsigned int number = 0; number < sse_iters; number++) + for(number = 0; number < sse_iters; number++) { x = _mm_load_si128((__m128i*)a); y = _mm_load_si128((__m128i*)b); @@ -164,7 +168,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c c += 16; } - for (unsigned int i = sse_iters * 16; i < num_points; ++i) + for (i = sse_iters * 16; i < num_points; ++i) { *c++ = (*a++) * (*b++); } diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h index a5ef7efbe..0b60682ce 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h @@ -385,7 +385,8 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_sse2(lv_32fc_t* out, const fl static inline void volk_gnsssdr_s32f_sincos_32fc_generic(lv_32fc_t* out, const float phase_inc, float* phase, unsigned int num_points) { float _phase = (*phase); - for(unsigned int i = 0; i < num_points; i++) + unsigned int i; + for(i = 0; i < num_points; i++) { *out++ = lv_cmake((float)cos(_phase), (float)sin(_phase) ); _phase += phase_inc; @@ -402,6 +403,7 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_generic(lv_32fc_t* out, const f static inline void volk_gnsssdr_s32f_sincos_32fc_generic_fxpt(lv_32fc_t* out, const float phase_inc, float* phase, unsigned int num_points) { float _in, s, c; + unsigned int i; int32_t x, sin_index, cos_index, d; const float PI = 3.14159265358979323846; const float TWO_TO_THE_31_DIV_PI = 2147483648.0 / PI; @@ -411,7 +413,7 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_generic_fxpt(lv_32fc_t* out, co const int32_t diffbits = bitlength - Nbits; uint32_t ux; float _phase = (*phase); - for(unsigned int i = 0; i < num_points; i++) + for(i = 0; i < num_points; i++) { _in = _phase; d = (int32_t)floor(_in / TWO_PI + 0.5); From ce04d6889c327939e82b9870184dbc46d78b8ce3 Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Tue, 31 May 2016 20:58:59 +0200 Subject: [PATCH 2/5] fix error --- .../volk_gnsssdr/volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic.h index cca1ab87d..bc4c2faa8 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic.h @@ -80,7 +80,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_a_sse2(lv_16sc_t float * rem_code_phase_chips = (float*)volk_gnsssdr_malloc(sizeof(float) * num_out_vectors, volk_gnsssdr_get_alignment()); lv_16sc_t** result_aux = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_out_vectors, volk_gnsssdr_get_alignment()); - for(unsigned int n = 0; n < num_out_vectors; n++) + for(n = 0; n < num_out_vectors; n++) { rem_code_phase_chips[n] = -0.234; result_aux[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); From ef6c5dd7bb68fcbcd1390c6d5aae84d897df7261 Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Tue, 31 May 2016 21:03:33 +0200 Subject: [PATCH 3/5] fix errors --- .../volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h index 71fe27938..ea1a864b0 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h @@ -185,6 +185,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ const unsigned int sse_iters = num_points / 4; int n_vec; + int i; unsigned int number; unsigned int n; const lv_16sc_t** _in_a = in_a; @@ -311,7 +312,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ _mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (i = 0; i < 4; ++i) { dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); @@ -331,7 +332,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ //(*phase) = lv_cmake((float*)two_phase_acc[0], (float*)two_phase_acc[1]); (*phase) = two_phase_acc[0]; - for(n = sse_iters * 4; n < num_points; n++) + for(n = sse_iters * 4; n < num_points; n++) { tmp16 = in_common[n]; //printf("a_sse phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); @@ -360,6 +361,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l const unsigned int sse_iters = num_points / 4; const unsigned int ROTATOR_RELOAD = 128; int n_vec; + int i; unsigned int number; unsigned int j; unsigned int n; @@ -557,7 +559,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l _mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector dotProduct = lv_cmake(0,0); - for (int i = 0; i < 4; ++i) + for (i = 0; i < 4; ++i) { dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); @@ -1648,7 +1650,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase); tmp16_ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_))); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp = tmp16_ * in_a[n_vec][n]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); @@ -1830,7 +1832,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_ tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase); tmp16_ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_))); (*phase) *= phase_inc; - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { tmp = tmp16_ * in_a[n_vec][n]; _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); From 2abb774f6e3b3f9ff9f1a9b8ef3bdb34dffcd92b Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Tue, 31 May 2016 21:06:28 +0200 Subject: [PATCH 4/5] fix error --- .../volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h index 4e59aa1d1..cab07d997 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h @@ -425,7 +425,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_avx(lv_16sc_t** resu } } _mm256_zeroupper(); - for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) + for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) { for(n = avx_iters * 8; n < num_points; n++) { From 2f339d2ee7602b88565be8bdec8fe2e48cf4079c Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Tue, 31 May 2016 21:08:53 +0200 Subject: [PATCH 5/5] fix error --- .../volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h index 19157f8d2..196936d56 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h @@ -471,9 +471,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul } _mm256_zeroupper(); - for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + for (n_vec = 0; n_vec < num_a_vectors; n_vec++) { - for(index = sse_iters * 8; index < num_points; index++) + for(index = sse_iters * 8; index < num_points; index++) { lv_16sc_t tmp = in_common[index] * in_a[n_vec][index];