From 833fe313c72edd32cfefcf741ebcccad2fb2fd94 Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Sun, 31 Jan 2016 18:13:03 +0100 Subject: [PATCH] Improving documentation --- .../volk_gnsssdr_16ic_convert_32fc.h | 24 +++ .../volk_gnsssdr_16ic_resampler_16ic.h | 43 +++- .../volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h | 33 +++ .../volk_gnsssdr_16ic_x2_dot_prod_16ic.h | 75 ++++--- .../volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h | 54 +++-- .../volk_gnsssdr_16ic_x2_multiply_16ic.h | 31 ++- .../volk_gnsssdr_16ic_xn_resampler_16ic_xn.h | 54 ++++- .../volk_gnsssdr_32fc_convert_16ic.h | 55 ++--- .../volk_gnsssdr_32fc_convert_8ic.h | 36 ++-- .../volk_gnsssdr_8ic_x2_dot_prod_8ic.h | 79 ++++---- .../volk_gnsssdr_8ic_x2_multiply_8ic.h | 189 +++++++++--------- 11 files changed, 447 insertions(+), 226 deletions(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h index 9125b293c..9a8400f7f 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h @@ -38,6 +38,12 @@ #ifdef LV_HAVE_GENERIC +/*! + \brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component. + \param[out] outputVector The complex 32-bit float output data buffer + \param[in] inputVector The complex 16-bit integer input data buffer + \param[in] num_points The number of data values to be converted + */ static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) { for(unsigned int i = 0; i < num_points; i++) @@ -50,6 +56,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVecto #ifdef LV_HAVE_SSE2 #include +/*! + \brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component. + \param[out] outputVector The complex 32-bit float output data buffer + \param[in] inputVector The complex 16-bit integer input data buffer + \param[in] num_points The number of data values to be converted + */ static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 2; @@ -80,6 +92,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector #ifdef LV_HAVE_SSE2 #include +/*! + \brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component. + \param[out] outputVector The complex 32-bit float output data buffer + \param[in] inputVector The complex 16-bit integer input data buffer + \param[in] num_points The number of data values to be converted + */ static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 2; @@ -110,6 +128,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector #ifdef LV_HAVE_NEON #include +/*! + \brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component. + \param[out] outputVector The complex 32-bit float output data buffer + \param[in] inputVector The complex 16-bit integer input data buffer + \param[in] num_points The number of data values to be converted + */ static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) { const unsigned int sse_iters = num_points / 2; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_16ic.h index 5afd62f61..5d2f4283a 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_16ic.h @@ -47,12 +47,15 @@ //int round_int( float r ) { // return (r > 0.0) ? (r + 0.5) : (r - 0.5); //} + /*! - \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector - \param cVector The vector where the result will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \brief Resamples a complex vector (16-bit integer each component) + \param[out] result The vector where the result will be stored + \param[in] local_code One of the vectors to be multiplied + \param[in] rem_code_phase_chips Remnant code phase [chips] + \param[in] code_phase_step_chips Phase increment per sample [chips/sample] + \param[in] code_length_chips Code length in chips + \param[in] num_output_samples Number of samples to be processed */ static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples) { @@ -73,6 +76,16 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, c #ifdef LV_HAVE_SSE2 #include + +/*! + \brief Resamples a complex vector (16-bit integer each component) + \param[out] result The vector where the result will be stored + \param[in] local_code One of the vectors to be multiplied + \param[in] rem_code_phase_chips Remnant code phase [chips] + \param[in] code_phase_step_chips Phase increment per sample [chips/sample] + \param[in] code_length_chips Code length in chips + \param[in] num_output_samples Number of samples to be processed + */ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float) { _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO @@ -155,6 +168,15 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co #ifdef LV_HAVE_SSE2 #include +/*! + \brief Resamples a complex vector (16-bit integer each component) + \param[out] result The vector where the result will be stored + \param[in] local_code One of the vectors to be multiplied + \param[in] rem_code_phase_chips Remnant code phase [chips] + \param[in] code_phase_step_chips Phase increment per sample [chips/sample] + \param[in] code_length_chips Code length in chips + \param[in] num_output_samples Number of samples to be processed + */ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float) { _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO @@ -235,6 +257,16 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co #ifdef LV_HAVE_NEON #include + +/*! + \brief Resamples a complex vector (16-bit integer each component) + \param[out] result The vector where the result will be stored + \param[in] local_code One of the vectors to be multiplied + \param[in] rem_code_phase_chips Remnant code phase [chips] + \param[in] code_phase_step_chips Phase increment per sample [chips/sample] + \param[in] code_length_chips Code length in chips + \param[in] num_output_samples Number of samples to be processed + */ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float) { unsigned int number; @@ -281,7 +313,6 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons { _code_phase_out = vmulq_f32(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step _code_phase_out_with_offset = vaddq_f32(_code_phase_out, _rem_code_phase); //add the phase offset - //_code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer int32x4_t = f(float32x4_t) sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(_code_phase_out_with_offset), 31))); PlusHalf = vaddq_f32(_code_phase_out_with_offset, half); Round = vsubq_f32(PlusHalf, sign); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h index 21b54779d..cd3000466 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h @@ -45,6 +45,14 @@ #ifdef LV_HAVE_GENERIC +/*! + \brief Rotates a complex vector (16-bit integer samples each component) + \param[out] outVector Rotated vector + \param[in] inVector Vector to be rotated + \param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)) + \param[in,out] phase Initial / final phase + \param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result + */ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) { unsigned int i = 0; @@ -76,6 +84,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* ou #ifdef LV_HAVE_SSE3 #include +/*! + \brief Rotates a complex vector (16-bit integer samples each component) + \param[out] outVector Rotated vector + \param[in] inVector Vector to be rotated + \param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)) + \param[in,out] phase Initial / final phase + \param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result + */ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) { const unsigned int sse_iters = num_points / 4; @@ -164,6 +180,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out #ifdef LV_HAVE_SSE3 #include +/*! + \brief Rotates a complex vector (16-bit integer samples each component) + \param[out] outVector Rotated vector + \param[in] inVector Vector to be rotated + \param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)) + \param[in,out] phase Initial / final phase + \param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result + */ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) { const unsigned int sse_iters = num_points / 4; @@ -209,6 +233,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out //next two samples _in += 2; a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg + __builtin_prefetch(_in + 8); //complex 32fc multiplication b=a*two_phase_acc_reg yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di @@ -252,6 +277,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out #ifdef LV_HAVE_NEON #include +/*! + \brief Rotates a complex vector (16-bit integer samples each component) + \param[out] outVector Rotated vector + \param[in] inVector Vector to be rotated + \param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)) + \param[in,out] phase Initial / final phase + \param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result + */ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) { unsigned int i = 0; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h index 344d1c73e..f50194f9c 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h @@ -42,12 +42,13 @@ #ifdef LV_HAVE_GENERIC + /*! - \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector - \param cVector The vector where the accumulated result will be stored - \param aVector One of the vectors to be multiplied and accumulated - \param bVector One of the vectors to be multiplied and accumulated - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type. + \param[out] result Value of the accumulated result + \param[in] in_a One of the vectors to be multiplied and accumulated + \param[in] in_b One of the vectors to be multiplied and accumulated + \param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector */ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { @@ -64,6 +65,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, #ifdef LV_HAVE_SSE2 #include + +/*! + \brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type. + \param[out] result Value of the accumulated result + \param[in] in_a One of the vectors to be multiplied and accumulated + \param[in] in_b One of the vectors to be multiplied and accumulated + \param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + */ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0); @@ -92,10 +101,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con // a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r] a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg b = _mm_load_si128((__m128i*)_in_b); - c = _mm_mullo_epi16 (a, b); // a3.i*b3.i, a3.r*b3.r, .... + c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, .... - c_sr = _mm_srli_si128 (c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst. - real = _mm_subs_epi16 (c,c_sr); + c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst. + real = _mm_subs_epi16(c,c_sr); b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i .... a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i .... @@ -105,17 +114,17 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic! - realcacc = _mm_adds_epi16 (realcacc, real); - imagcacc = _mm_adds_epi16 (imagcacc, imag); + realcacc = _mm_adds_epi16(realcacc, real); + imagcacc = _mm_adds_epi16(imagcacc, imag); _in_a += 4; _in_b += 4; } - realcacc = _mm_and_si128 (realcacc, mask_real); - imagcacc = _mm_and_si128 (imagcacc, mask_imag); + realcacc = _mm_and_si128(realcacc, mask_real); + imagcacc = _mm_and_si128(imagcacc, mask_imag); - result = _mm_or_si128 (realcacc, imagcacc); + result = _mm_or_si128(realcacc, imagcacc); _mm_store_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector @@ -128,7 +137,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con for (unsigned int i = 0; i < (num_points % 4); ++i) { lv_16sc_t tmp = (*_in_a++) * (*_in_b++); - dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp))); } *_out = dotProduct; @@ -140,6 +149,13 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con #ifdef LV_HAVE_SSE2 #include +/*! + \brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type. + \param[out] result Value of the accumulated result + \param[in] in_a One of the vectors to be multiplied and accumulated + \param[in] in_b One of the vectors to be multiplied and accumulated + \param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + */ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0); @@ -149,6 +165,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con const lv_16sc_t* _in_a = in_a; const lv_16sc_t* _in_b = in_b; lv_16sc_t* _out = out; + unsigned int i; if (sse_iters > 0) { @@ -168,10 +185,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con // a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r] a = _mm_loadu_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg b = _mm_loadu_si128((__m128i*)_in_b); - c = _mm_mullo_epi16 (a, b); // a3.i*b3.i, a3.r*b3.r, .... + c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, .... - c_sr = _mm_srli_si128 (c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst. - real = _mm_subs_epi16 (c,c_sr); + c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst. + real = _mm_subs_epi16(c, c_sr); b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i .... a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i .... @@ -181,30 +198,30 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic! - realcacc = _mm_adds_epi16 (realcacc, real); - imagcacc = _mm_adds_epi16 (imagcacc, imag); + realcacc = _mm_adds_epi16(realcacc, real); + imagcacc = _mm_adds_epi16(imagcacc, imag); _in_a += 4; _in_b += 4; } - realcacc = _mm_and_si128 (realcacc, mask_real); - imagcacc = _mm_and_si128 (imagcacc, mask_imag); + realcacc = _mm_and_si128(realcacc, mask_real); + imagcacc = _mm_and_si128(imagcacc, mask_imag); - result = _mm_or_si128 (realcacc, imagcacc); + result = _mm_or_si128(realcacc, imagcacc); _mm_storeu_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector - for (int i = 0; i < 4; ++i) + for (i = 0; i < 4; ++i) { dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); } } - for (unsigned int i = 0; i < (num_points % 4); ++i) + for (i = 0; i < (num_points % 4); ++i) { lv_16sc_t tmp = (*_in_a++) * (*_in_b++); - dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp))); + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp))); } *_out = dotProduct; @@ -214,6 +231,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con #ifdef LV_HAVE_NEON #include + +/*! + \brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type. + \param[out] result Value of the accumulated result + \param[in] in_a One of the vectors to be multiplied and accumulated + \param[in] in_b One of the vectors to be multiplied and accumulated + \param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + */ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { unsigned int quarter_points = num_points / 4; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h index c8d8a7182..614453f30 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h @@ -42,11 +42,12 @@ #ifdef LV_HAVE_GENERIC /*! - \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector - \param cVector The vector where the accumulated result will be stored - \param aVector One of the vectors to be multiplied and accumulated - \param bVector One of the vectors to be multiplied and accumulated - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector + \param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored + \param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector) + \param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated + \param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated + \param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result */ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) { @@ -68,6 +69,15 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu #ifdef LV_HAVE_SSE2 #include + +/*! + \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector + \param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored + \param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector) + \param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated + \param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated + \param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result + */ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) { lv_16sc_t dotProduct = lv_cmake(0,0); @@ -87,8 +97,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, __m128i* realcacc; __m128i* imagcacc; - realcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0 - imagcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0 + realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 + imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result; @@ -163,6 +173,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, #ifdef LV_HAVE_SSE2 #include +/*! + \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector + \param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored + \param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector) + \param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated + \param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated + \param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result + */ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) { lv_16sc_t dotProduct = lv_cmake(0,0); @@ -182,8 +200,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, __m128i* realcacc; __m128i* imagcacc; - realcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0 - imagcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0 + realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 + imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result; @@ -214,8 +232,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, imag = _mm_adds_epi16(imag1, imag2); - realcacc[n_vec] = _mm_adds_epi16 (realcacc[n_vec], real); - imagcacc[n_vec] = _mm_adds_epi16 (imagcacc[n_vec], imag); + realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real); + imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag); } _in_common += 4; @@ -223,10 +241,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, for (int n_vec=0;n_vec +/*! + \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector + \param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored + \param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector) + \param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated + \param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated + \param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result + */ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) { lv_16sc_t dotProduct = lv_cmake(0,0); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h index 83dfb8a18..cde092fac 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_multiply_16ic.h @@ -42,10 +42,10 @@ #ifdef LV_HAVE_GENERIC /*! \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector - \param cVector The vector where the result will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \param[out] result The vector where the result will be stored + \param[in] in_a One of the vectors to be multiplied + \param[in] in_b One of the vectors to be multiplied + \param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { @@ -61,6 +61,14 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result, #ifdef LV_HAVE_SSE2 #include + +/*! + \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector + \param[out] result The vector where the result will be stored + \param[in] in_a One of the vectors to be multiplied + \param[in] in_b One of the vectors to be multiplied + \param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { const unsigned int sse_iters = num_points / 4; @@ -112,6 +120,14 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con #ifdef LV_HAVE_SSE2 #include + +/*! + \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector + \param[out] result The vector where the result will be stored + \param[in] in_a One of the vectors to be multiplied + \param[in] in_b One of the vectors to be multiplied + \param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { const unsigned int sse_iters = num_points / 4; @@ -164,6 +180,13 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, con #ifdef LV_HAVE_NEON #include +/*! + \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector + \param[out] result The vector where the result will be stored + \param[in] in_a One of the vectors to be multiplied + \param[in] in_b One of the vectors to be multiplied + \param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) { lv_16sc_t *a_ptr = (lv_16sc_t*) in_a; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h index a53c67a9a..e5cc691ed 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h @@ -48,15 +48,18 @@ //int round_int( float r ) { // return (r > 0.0) ? (r + 0.5) : (r - 0.5); //} -/*! - \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector - \param cVector The vector where the result will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector - */ -static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples) +/*! + \brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs + \param[out] result Pointer to the vector where the results will be stored + \param[in] local_code One of the vectors to be multiplied + \param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips] + \param[in] code_phase_step_chips Phase increment per sample [chips/sample] + \param[in] code_length_chips Code length in chips + \param[in] num_out_vectors Number of output vectors + \param[in] num_output_samples Number of samples to be processed + */ +static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples) { int local_code_chip_index; //fesetround(FE_TONEAREST); @@ -65,9 +68,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** re for (unsigned int n = 0; n < num_output_samples; n++) { // resample code for current tap - local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector]-0.5f); + local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector] - 0.5f); if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips; - if (local_code_chip_index > (code_length_chips-1)) local_code_chip_index -= code_length_chips; + if (local_code_chip_index > (code_length_chips - 1)) local_code_chip_index -= code_length_chips; //std::cout<<"g["<(n) + rem_code_phase_chips-0.5f<<","< + +/*! + \brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs + \param[out] result Pointer to the vector where the results will be stored + \param[in] local_code One of the vectors to be multiplied + \param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips] + \param[in] code_phase_step_chips Phase increment per sample [chips/sample] + \param[in] code_length_chips Code length in chips + \param[in] num_out_vectors Number of output vectors + \param[in] num_output_samples Number of samples to be processed + */ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples) { _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO @@ -172,6 +186,16 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res #ifdef LV_HAVE_SSE2 #include +/*! + \brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs + \param[out] result Pointer to the vector where the results will be stored + \param[in] local_code One of the vectors to be multiplied + \param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips] + \param[in] code_phase_step_chips Phase increment per sample [chips/sample] + \param[in] code_length_chips Code length in chips + \param[in] num_out_vectors Number of output vectors + \param[in] num_output_samples Number of samples to be processed + */ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples) { _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO @@ -265,6 +289,16 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res #ifdef LV_HAVE_NEON #include +/*! + \brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs + \param[out] result Pointer to the vector where the results will be stored + \param[in] local_code One of the vectors to be multiplied + \param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips] + \param[in] code_phase_step_chips Phase increment per sample [chips/sample] + \param[in] code_length_chips Code length in chips + \param[in] num_out_vectors Number of output vectors + \param[in] num_output_samples Number of samples to be processed + */ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples) { unsigned int number; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h index e96bb66ce..97f7d6c9c 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h @@ -39,11 +39,12 @@ #ifdef LV_HAVE_SSE2 #include + /*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted + \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type. + \param[out] outputVector The complex 16-bit integer output data buffer + \param[in] inputVector The complex 32-bit float data buffer + \param[in] num_points The number of data values to be converted */ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) { @@ -92,11 +93,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector #ifdef LV_HAVE_SSE #include // __m64, __m128 ?? + /*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted + \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type. + \param[out] outputVector The complex 16-bit integer output data buffer + \param[in] inputVector The complex 32-bit float data buffer + \param[in] num_points The number of data values to be converted */ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) { @@ -146,11 +148,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, #ifdef LV_HAVE_SSE2 #include + /*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted + \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type. + \param[out] outputVector The complex 16-bit integer output data buffer + \param[in] inputVector The complex 32-bit float data buffer + \param[in] num_points The number of data values to be converted */ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) { @@ -199,11 +202,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector #ifdef LV_HAVE_SSE #include + /*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted + \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type. + \param[out] outputVector The complex 16-bit integer output data buffer + \param[in] inputVector The complex 32-bit float data buffer + \param[in] num_points The number of data values to be converted */ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) { @@ -252,11 +256,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, #ifdef LV_HAVE_NEON #include + /*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted + \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type. + \param[out] outputVector The complex 16-bit integer output data buffer + \param[in] inputVector The complex 32-bit float data buffer + \param[in] num_points The number of data values to be converted */ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) { @@ -314,11 +319,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, #endif /* LV_HAVE_NEON */ #ifdef LV_HAVE_GENERIC + /*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted + \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type. + \param[out] outputVector The complex 16-bit integer output data buffer + \param[in] inputVector The complex 32-bit float data buffer + \param[in] num_points The number of data values to be converted */ static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) { @@ -337,4 +343,5 @@ static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVecto } } #endif /* LV_HAVE_GENERIC */ + #endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h index 773694e62..851a97268 100755 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h @@ -41,11 +41,12 @@ #ifdef LV_HAVE_SSE2 #include + /*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted + \brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type. + \param[out] outputVector The complex 8-bit integer output data buffer + \param[in] inputVector The complex 32-bit float data buffer + \param[in] num_points The number of data values to be converted */ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) { @@ -103,11 +104,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, #endif /* LV_HAVE_SSE2 */ #ifdef LV_HAVE_GENERIC + /*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted + \brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type. + \param[out] outputVector The complex 8-bit integer output data buffer + \param[in] inputVector The complex 32-bit float data buffer + \param[in] num_points The number of data values to be converted */ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) { @@ -130,11 +132,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, #ifdef LV_HAVE_SSE2 #include + /*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted + \brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type. + \param[out] outputVector The complex 8-bit integer output data buffer + \param[in] inputVector The complex 32-bit float data buffer + \param[in] num_points The number of data values to be converted */ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) { @@ -193,11 +196,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, #ifdef LV_HAVE_NEON #include + /*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted + \brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type. + \param[out] outputVector The complex 8-bit integer output data buffer + \param[in] inputVector The complex 32-bit float data buffer + \param[in] num_points The number of data values to be converted */ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) { diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h index c43f383cc..72dbbcb21 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h @@ -41,12 +41,13 @@ #include #ifdef LV_HAVE_GENERIC + /*! - \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector - \param cVector The vector where the accumulated result will be stored - \param aVector One of the vectors to be multiplied and accumulated - \param bVector One of the vectors to be multiplied and accumulated - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result. + \param[out] result Value of the accumulated result + \param[in] input One of the vectors to be multiplied + \param[in] taps One of the vectors to be multiplied + \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result */ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { @@ -93,12 +94,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, co #ifdef LV_HAVE_SSE2 #include + /*! - \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector - \param cVector The vector where the accumulated result will be stored - \param aVector One of the vectors to be multiplied and accumulated - \param bVector One of the vectors to be multiplied and accumulated - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result. + \param[out] result Value of the accumulated result + \param[in] input One of the vectors to be multiplied + \param[in] taps One of the vectors to be multiplied + \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result */ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { @@ -174,12 +176,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con #ifdef LV_HAVE_SSE4_1 #include + /*! - \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector - \param cVector The vector where the accumulated result will be stored - \param aVector One of the vectors to be multiplied and accumulated - \param bVector One of the vectors to be multiplied and accumulated - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result. + \param[out] result Value of the accumulated result + \param[in] input One of the vectors to be multiplied + \param[in] taps One of the vectors to be multiplied + \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result */ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { @@ -254,12 +257,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c #ifdef LV_HAVE_SSE2 #include + /*! - \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector - \param cVector The vector where the accumulated result will be stored - \param aVector One of the vectors to be multiplied and accumulated - \param bVector One of the vectors to be multiplied and accumulated - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result. + \param[out] result Value of the accumulated result + \param[in] input One of the vectors to be multiplied + \param[in] taps One of the vectors to be multiplied + \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result */ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { @@ -335,12 +339,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con #ifdef LV_HAVE_SSE4_1 #include + /*! - \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector - \param cVector The vector where the accumulated result will be stored - \param aVector One of the vectors to be multiplied and accumulated - \param bVector One of the vectors to be multiplied and accumulated - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result. + \param[out] result Value of the accumulated result + \param[in] input One of the vectors to be multiplied + \param[in] taps One of the vectors to be multiplied + \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result */ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { @@ -413,12 +418,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c #endif /*LV_HAVE_SSE4_1*/ #ifdef LV_HAVE_ORC + /*! - \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector - \param cVector The vector where the accumulated result will be stored - \param aVector One of the vectors to be multiplied and accumulated - \param bVector One of the vectors to be multiplied and accumulated - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result. + \param[out] result Value of the accumulated result + \param[in] input One of the vectors to be multiplied + \param[in] taps One of the vectors to be multiplied + \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result */ extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points); static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) @@ -440,12 +446,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons #ifdef LV_HAVE_NEON #include + /*! - \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector - \param cVector The vector where the accumulated result will be stored - \param aVector One of the vectors to be multiplied and accumulated - \param bVector One of the vectors to be multiplied and accumulated - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result. + \param[out] result Value of the accumulated result + \param[in] input One of the vectors to be multiplied + \param[in] taps One of the vectors to be multiplied + \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result */ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { @@ -458,7 +465,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const // for 2-lane vectors, 1st lane holds the real part, // 2nd lane holds the imaginary part int8x8x2_t a_val, b_val, c_val, accumulator, tmp_real, tmp_imag; - lv_8sc_t accum_result[8] = { lv_cmake(0,0) }; + __VOLK_ATTR_ALIGNED(16) lv_8sc_t accum_result[8] = { lv_cmake(0,0) }; accumulator.val[0] = vdup_n_s8(0); accumulator.val[1] = vdup_n_s8(0); unsigned int number; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h index 4376e2802..e18f3218b 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h @@ -40,12 +40,13 @@ #ifdef LV_HAVE_SSE2 #include + /*! - \brief Multiplies the two input complex vectors and stores their results in the third vector - \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector + \param[out] cVector The vector where the results will be stored + \param[in] aVector One of the vectors to be multiplied + \param[in] bVector One of the vectors to be multiplied + \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) { @@ -63,26 +64,26 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co x = _mm_loadu_si128((__m128i*)a); y = _mm_loadu_si128((__m128i*)b); - imagx = _mm_srli_si128 (x, 1); - imagx = _mm_and_si128 (imagx, mult1); - realx = _mm_and_si128 (x, mult1); + imagx = _mm_srli_si128(x, 1); + imagx = _mm_and_si128(imagx, mult1); + realx = _mm_and_si128(x, mult1); - imagy = _mm_srli_si128 (y, 1); - imagy = _mm_and_si128 (imagy, mult1); - realy = _mm_and_si128 (y, mult1); + imagy = _mm_srli_si128(y, 1); + imagy = _mm_and_si128(imagy, mult1); + realy = _mm_and_si128(y, mult1); - realx_mult_realy = _mm_mullo_epi16 (realx, realy); - imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); - realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); - imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + realx_mult_realy = _mm_mullo_epi16(realx, realy); + imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16(realx, imagy); + imagx_mult_realy = _mm_mullo_epi16(imagx, realy); - realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); - realc = _mm_and_si128 (realc, mult1); - imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); - imagc = _mm_and_si128 (imagc, mult1); - imagc = _mm_slli_si128 (imagc, 1); + realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy); + realc = _mm_and_si128(realc, mult1); + imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy); + imagc = _mm_and_si128(imagc, mult1); + imagc = _mm_slli_si128(imagc, 1); - totalc = _mm_or_si128 (realc, imagc); + totalc = _mm_or_si128(realc, imagc); _mm_storeu_si128((__m128i*)c, totalc); @@ -100,12 +101,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co #ifdef LV_HAVE_SSE4_1 #include + /*! - \brief Multiplies the two input complex vectors and stores their results in the third vector - \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector + \param[out] cVector The vector where the results will be stored + \param[in] aVector One of the vectors to be multiplied + \param[in] bVector One of the vectors to be multiplied + \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) { @@ -125,24 +127,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, x = _mm_lddqu_si128((__m128i*)a); y = _mm_lddqu_si128((__m128i*)b); - imagx = _mm_srli_si128 (x, 1); - imagx = _mm_and_si128 (imagx, mult1); - realx = _mm_and_si128 (x, mult1); + imagx = _mm_srli_si128(x, 1); + imagx = _mm_and_si128(imagx, mult1); + realx = _mm_and_si128(x, mult1); - imagy = _mm_srli_si128 (y, 1); - imagy = _mm_and_si128 (imagy, mult1); - realy = _mm_and_si128 (y, mult1); + imagy = _mm_srli_si128(y, 1); + imagy = _mm_and_si128(imagy, mult1); + realy = _mm_and_si128(y, mult1); - realx_mult_realy = _mm_mullo_epi16 (realx, realy); - imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); - realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); - imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + realx_mult_realy = _mm_mullo_epi16(realx, realy); + imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16(realx, imagy); + imagx_mult_realy = _mm_mullo_epi16(imagx, realy); - realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); - imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); - imagc = _mm_slli_si128 (imagc, 1); + realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy); + imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy); + imagc = _mm_slli_si128(imagc, 1); - totalc = _mm_blendv_epi8 (imagc, realc, mult1); + totalc = _mm_blendv_epi8(imagc, realc, mult1); _mm_storeu_si128((__m128i*)c, totalc); @@ -159,12 +161,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, #endif /* LV_HAVE_SSE4_1 */ #ifdef LV_HAVE_GENERIC + /*! - \brief Multiplies the two input complex vectors and stores their results in the third vector - \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector + \param[out] cVector The vector where the results will be stored + \param[in] aVector One of the vectors to be multiplied + \param[in] bVector One of the vectors to be multiplied + \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) { @@ -182,12 +185,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c #ifdef LV_HAVE_SSE2 #include + /*! - \brief Multiplies the two input complex vectors and stores their results in the third vector - \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector + \param[out] cVector The vector where the results will be stored + \param[in] aVector One of the vectors to be multiplied + \param[in] bVector One of the vectors to be multiplied + \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) { @@ -205,24 +209,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co x = _mm_load_si128((__m128i*)a); y = _mm_load_si128((__m128i*)b); - imagx = _mm_srli_si128 (x, 1); - imagx = _mm_and_si128 (imagx, mult1); - realx = _mm_and_si128 (x, mult1); + imagx = _mm_srli_si128(x, 1); + imagx = _mm_and_si128(imagx, mult1); + realx = _mm_and_si128(x, mult1); - imagy = _mm_srli_si128 (y, 1); - imagy = _mm_and_si128 (imagy, mult1); - realy = _mm_and_si128 (y, mult1); + imagy = _mm_srli_si128(y, 1); + imagy = _mm_and_si128(imagy, mult1); + realy = _mm_and_si128(y, mult1); - realx_mult_realy = _mm_mullo_epi16 (realx, realy); - imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); - realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); - imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + realx_mult_realy = _mm_mullo_epi16(realx, realy); + imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16(realx, imagy); + imagx_mult_realy = _mm_mullo_epi16(imagx, realy); - realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); - realc = _mm_and_si128 (realc, mult1); - imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); - imagc = _mm_and_si128 (imagc, mult1); - imagc = _mm_slli_si128 (imagc, 1); + realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy); + realc = _mm_and_si128(realc, mult1); + imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy); + imagc = _mm_and_si128(imagc, mult1); + imagc = _mm_slli_si128(imagc, 1); totalc = _mm_or_si128 (realc, imagc); @@ -242,12 +246,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co #ifdef LV_HAVE_SSE4_1 #include + /*! - \brief Multiplies the two input complex vectors and stores their results in the third vector - \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector + \param[out] cVector The vector where the results will be stored + \param[in] aVector One of the vectors to be multiplied + \param[in] bVector One of the vectors to be multiplied + \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector */ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) { @@ -267,24 +272,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, x = _mm_load_si128((__m128i*)a); y = _mm_load_si128((__m128i*)b); - imagx = _mm_srli_si128 (x, 1); - imagx = _mm_and_si128 (imagx, mult1); - realx = _mm_and_si128 (x, mult1); + imagx = _mm_srli_si128(x, 1); + imagx = _mm_and_si128(imagx, mult1); + realx = _mm_and_si128(x, mult1); - imagy = _mm_srli_si128 (y, 1); - imagy = _mm_and_si128 (imagy, mult1); - realy = _mm_and_si128 (y, mult1); + imagy = _mm_srli_si128(y, 1); + imagy = _mm_and_si128(imagy, mult1); + realy = _mm_and_si128(y, mult1); - realx_mult_realy = _mm_mullo_epi16 (realx, realy); - imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); - realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); - imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + realx_mult_realy = _mm_mullo_epi16(realx, realy); + imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16(realx, imagy); + imagx_mult_realy = _mm_mullo_epi16(imagx, realy); - realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); - imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); - imagc = _mm_slli_si128 (imagc, 1); + realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy); + imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy); + imagc = _mm_slli_si128(imagc, 1); - totalc = _mm_blendv_epi8 (imagc, realc, mult1); + totalc = _mm_blendv_epi8(imagc, realc, mult1); _mm_store_si128((__m128i*)c, totalc); @@ -302,14 +307,16 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, #ifdef LV_HAVE_ORC -/*! - \brief Multiplies the two input complex vectors and stores their results in the third vector - \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector - */ + extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points); + +/*! + \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector + \param[out] cVector The vector where the results will be stored + \param[in] aVector One of the vectors to be multiplied + \param[in] bVector One of the vectors to be multiplied + \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) { volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(cVector, aVector, bVector, num_points);