1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2025-01-16 12:12:57 +00:00

Improving documentation

This commit is contained in:
Carles Fernandez 2016-01-31 18:13:03 +01:00
parent f4875012df
commit 833fe313c7
11 changed files with 447 additions and 226 deletions

View File

@ -38,6 +38,12 @@
#ifdef LV_HAVE_GENERIC #ifdef LV_HAVE_GENERIC
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{ {
for(unsigned int i = 0; i < num_points; i++) for(unsigned int i = 0; i < num_points; i++)
@ -50,6 +56,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVecto
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{ {
const unsigned int sse_iters = num_points / 2; const unsigned int sse_iters = num_points / 2;
@ -80,6 +92,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{ {
const unsigned int sse_iters = num_points / 2; const unsigned int sse_iters = num_points / 2;
@ -110,6 +128,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector
#ifdef LV_HAVE_NEON #ifdef LV_HAVE_NEON
#include <arm_neon.h> #include <arm_neon.h>
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{ {
const unsigned int sse_iters = num_points / 2; const unsigned int sse_iters = num_points / 2;

View File

@ -47,12 +47,15 @@
//int round_int( float r ) { //int round_int( float r ) {
// return (r > 0.0) ? (r + 0.5) : (r - 0.5); // return (r > 0.0) ? (r + 0.5) : (r - 0.5);
//} //}
/*! /*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector \brief Resamples a complex vector (16-bit integer each component)
\param cVector The vector where the result will be stored \param[out] result The vector where the result will be stored
\param aVector One of the vectors to be multiplied \param[in] local_code One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied \param[in] rem_code_phase_chips Remnant code phase [chips]
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/ */
static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples) static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)
{ {
@ -73,6 +76,16 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, c
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float) static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
{ {
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -155,6 +168,15 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float) static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
{ {
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -235,6 +257,16 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
#ifdef LV_HAVE_NEON #ifdef LV_HAVE_NEON
#include <arm_neon.h> #include <arm_neon.h>
/*!
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float) static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
{ {
unsigned int number; unsigned int number;
@ -281,7 +313,6 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
{ {
_code_phase_out = vmulq_f32(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step _code_phase_out = vmulq_f32(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step
_code_phase_out_with_offset = vaddq_f32(_code_phase_out, _rem_code_phase); //add the phase offset _code_phase_out_with_offset = vaddq_f32(_code_phase_out, _rem_code_phase); //add the phase offset
//_code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer int32x4_t = f(float32x4_t)
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(_code_phase_out_with_offset), 31))); sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(_code_phase_out_with_offset), 31)));
PlusHalf = vaddq_f32(_code_phase_out_with_offset, half); PlusHalf = vaddq_f32(_code_phase_out_with_offset, half);
Round = vsubq_f32(PlusHalf, sign); Round = vsubq_f32(PlusHalf, sign);

View File

@ -45,6 +45,14 @@
#ifdef LV_HAVE_GENERIC #ifdef LV_HAVE_GENERIC
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{ {
unsigned int i = 0; unsigned int i = 0;
@ -76,6 +84,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* ou
#ifdef LV_HAVE_SSE3 #ifdef LV_HAVE_SSE3
#include <pmmintrin.h> #include <pmmintrin.h>
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{ {
const unsigned int sse_iters = num_points / 4; const unsigned int sse_iters = num_points / 4;
@ -164,6 +180,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
#ifdef LV_HAVE_SSE3 #ifdef LV_HAVE_SSE3
#include <pmmintrin.h> #include <pmmintrin.h>
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{ {
const unsigned int sse_iters = num_points / 4; const unsigned int sse_iters = num_points / 4;
@ -209,6 +233,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
//next two samples //next two samples
_in += 2; _in += 2;
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg //complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
@ -252,6 +277,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
#ifdef LV_HAVE_NEON #ifdef LV_HAVE_NEON
#include <arm_neon.h> #include <arm_neon.h>
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{ {
unsigned int i = 0; unsigned int i = 0;

View File

@ -42,12 +42,13 @@
#ifdef LV_HAVE_GENERIC #ifdef LV_HAVE_GENERIC
/*! /*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector \brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param cVector The vector where the accumulated result will be stored \param[out] result Value of the accumulated result
\param aVector One of the vectors to be multiplied and accumulated \param[in] in_a One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated \param[in] in_b One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/ */
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{ {
@ -64,6 +65,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result,
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{ {
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0); lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
@ -92,10 +101,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r] // a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
b = _mm_load_si128((__m128i*)_in_b); b = _mm_load_si128((__m128i*)_in_b);
c = _mm_mullo_epi16 (a, b); // a3.i*b3.i, a3.r*b3.r, .... c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
c_sr = _mm_srli_si128 (c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst. c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
real = _mm_subs_epi16 (c,c_sr); real = _mm_subs_epi16(c,c_sr);
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i .... b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i .... a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
@ -105,17 +114,17 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic! imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic!
realcacc = _mm_adds_epi16 (realcacc, real); realcacc = _mm_adds_epi16(realcacc, real);
imagcacc = _mm_adds_epi16 (imagcacc, imag); imagcacc = _mm_adds_epi16(imagcacc, imag);
_in_a += 4; _in_a += 4;
_in_b += 4; _in_b += 4;
} }
realcacc = _mm_and_si128 (realcacc, mask_real); realcacc = _mm_and_si128(realcacc, mask_real);
imagcacc = _mm_and_si128 (imagcacc, mask_imag); imagcacc = _mm_and_si128(imagcacc, mask_imag);
result = _mm_or_si128 (realcacc, imagcacc); result = _mm_or_si128(realcacc, imagcacc);
_mm_store_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector _mm_store_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector
@ -128,7 +137,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
for (unsigned int i = 0; i < (num_points % 4); ++i) for (unsigned int i = 0; i < (num_points % 4); ++i)
{ {
lv_16sc_t tmp = (*_in_a++) * (*_in_b++); lv_16sc_t tmp = (*_in_a++) * (*_in_b++);
dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp))); dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
} }
*_out = dotProduct; *_out = dotProduct;
@ -140,6 +149,13 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{ {
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0); lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
@ -149,6 +165,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
const lv_16sc_t* _in_a = in_a; const lv_16sc_t* _in_a = in_a;
const lv_16sc_t* _in_b = in_b; const lv_16sc_t* _in_b = in_b;
lv_16sc_t* _out = out; lv_16sc_t* _out = out;
unsigned int i;
if (sse_iters > 0) if (sse_iters > 0)
{ {
@ -168,10 +185,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r] // a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
a = _mm_loadu_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg a = _mm_loadu_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
b = _mm_loadu_si128((__m128i*)_in_b); b = _mm_loadu_si128((__m128i*)_in_b);
c = _mm_mullo_epi16 (a, b); // a3.i*b3.i, a3.r*b3.r, .... c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
c_sr = _mm_srli_si128 (c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst. c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
real = _mm_subs_epi16 (c,c_sr); real = _mm_subs_epi16(c, c_sr);
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i .... b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i .... a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
@ -181,30 +198,30 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic! imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic!
realcacc = _mm_adds_epi16 (realcacc, real); realcacc = _mm_adds_epi16(realcacc, real);
imagcacc = _mm_adds_epi16 (imagcacc, imag); imagcacc = _mm_adds_epi16(imagcacc, imag);
_in_a += 4; _in_a += 4;
_in_b += 4; _in_b += 4;
} }
realcacc = _mm_and_si128 (realcacc, mask_real); realcacc = _mm_and_si128(realcacc, mask_real);
imagcacc = _mm_and_si128 (imagcacc, mask_imag); imagcacc = _mm_and_si128(imagcacc, mask_imag);
result = _mm_or_si128 (realcacc, imagcacc); result = _mm_or_si128(realcacc, imagcacc);
_mm_storeu_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector _mm_storeu_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector
for (int i = 0; i < 4; ++i) for (i = 0; i < 4; ++i)
{ {
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
} }
} }
for (unsigned int i = 0; i < (num_points % 4); ++i) for (i = 0; i < (num_points % 4); ++i)
{ {
lv_16sc_t tmp = (*_in_a++) * (*_in_b++); lv_16sc_t tmp = (*_in_a++) * (*_in_b++);
dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp))); dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
} }
*_out = dotProduct; *_out = dotProduct;
@ -214,6 +231,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
#ifdef LV_HAVE_NEON #ifdef LV_HAVE_NEON
#include <arm_neon.h> #include <arm_neon.h>
/*!
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{ {
unsigned int quarter_points = num_points / 4; unsigned int quarter_points = num_points / 4;

View File

@ -42,11 +42,12 @@
#ifdef LV_HAVE_GENERIC #ifdef LV_HAVE_GENERIC
/*! /*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param cVector The vector where the accumulated result will be stored \param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated \param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param bVector One of the vectors to be multiplied and accumulated \param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/ */
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{ {
@ -68,6 +69,15 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{ {
lv_16sc_t dotProduct = lv_cmake(0,0); lv_16sc_t dotProduct = lv_cmake(0,0);
@ -87,8 +97,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,
__m128i* realcacc; __m128i* realcacc;
__m128i* imagcacc; __m128i* imagcacc;
realcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0 realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
imagcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0 imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result; __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
@ -163,6 +173,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{ {
lv_16sc_t dotProduct = lv_cmake(0,0); lv_16sc_t dotProduct = lv_cmake(0,0);
@ -182,8 +200,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
__m128i* realcacc; __m128i* realcacc;
__m128i* imagcacc; __m128i* imagcacc;
realcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0 realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
imagcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0 imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result; __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
@ -214,8 +232,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
imag = _mm_adds_epi16(imag1, imag2); imag = _mm_adds_epi16(imag1, imag2);
realcacc[n_vec] = _mm_adds_epi16 (realcacc[n_vec], real); realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
imagcacc[n_vec] = _mm_adds_epi16 (imagcacc[n_vec], imag); imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
} }
_in_common += 4; _in_common += 4;
@ -223,10 +241,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
for (int n_vec=0;n_vec<num_a_vectors;n_vec++) for (int n_vec=0;n_vec<num_a_vectors;n_vec++)
{ {
realcacc[n_vec] = _mm_and_si128 (realcacc[n_vec], mask_real); realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real);
imagcacc[n_vec] = _mm_and_si128 (imagcacc[n_vec], mask_imag); imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag);
result = _mm_or_si128 (realcacc[n_vec], imagcacc[n_vec]); result = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);
_mm_storeu_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector _mm_storeu_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector
dotProduct = lv_cmake(0,0); dotProduct = lv_cmake(0,0);
@ -258,6 +276,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
#ifdef LV_HAVE_NEON #ifdef LV_HAVE_NEON
#include <arm_neon.h> #include <arm_neon.h>
/*!
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{ {
lv_16sc_t dotProduct = lv_cmake(0,0); lv_16sc_t dotProduct = lv_cmake(0,0);

View File

@ -42,10 +42,10 @@
#ifdef LV_HAVE_GENERIC #ifdef LV_HAVE_GENERIC
/*! /*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param cVector The vector where the result will be stored \param[out] result The vector where the result will be stored
\param aVector One of the vectors to be multiplied \param[in] in_a One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied \param[in] in_b One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/ */
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{ {
@ -61,6 +61,14 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result,
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{ {
const unsigned int sse_iters = num_points / 4; const unsigned int sse_iters = num_points / 4;
@ -112,6 +120,14 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{ {
const unsigned int sse_iters = num_points / 4; const unsigned int sse_iters = num_points / 4;
@ -164,6 +180,13 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, con
#ifdef LV_HAVE_NEON #ifdef LV_HAVE_NEON
#include <arm_neon.h> #include <arm_neon.h>
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{ {
lv_16sc_t *a_ptr = (lv_16sc_t*) in_a; lv_16sc_t *a_ptr = (lv_16sc_t*) in_a;

View File

@ -48,15 +48,18 @@
//int round_int( float r ) { //int round_int( float r ) {
// return (r > 0.0) ? (r + 0.5) : (r - 0.5); // return (r > 0.0) ? (r + 0.5) : (r - 0.5);
//} //}
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param cVector The vector where the result will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples) /*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{ {
int local_code_chip_index; int local_code_chip_index;
//fesetround(FE_TONEAREST); //fesetround(FE_TONEAREST);
@ -65,9 +68,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** re
for (unsigned int n = 0; n < num_output_samples; n++) for (unsigned int n = 0; n < num_output_samples; n++)
{ {
// resample code for current tap // resample code for current tap
local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector]-0.5f); local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector] - 0.5f);
if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips; if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips;
if (local_code_chip_index > (code_length_chips-1)) local_code_chip_index -= code_length_chips; if (local_code_chip_index > (code_length_chips - 1)) local_code_chip_index -= code_length_chips;
//std::cout<<"g["<<n<<"]="<<code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips-0.5f<<","<<local_code_chip_index<<" "; //std::cout<<"g["<<n<<"]="<<code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips-0.5f<<","<<local_code_chip_index<<" ";
result[current_vector][n] = local_code[local_code_chip_index]; result[current_vector][n] = local_code[local_code_chip_index];
} }
@ -80,6 +83,17 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** re
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples) static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{ {
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -172,6 +186,16 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples) static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{ {
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -265,6 +289,16 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
#ifdef LV_HAVE_NEON #ifdef LV_HAVE_NEON
#include <arm_neon.h> #include <arm_neon.h>
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples) static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{ {
unsigned int number; unsigned int number;

View File

@ -39,11 +39,12 @@
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*! /*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param inputVector The floating point input data buffer \param[out] outputVector The complex 16-bit integer output data buffer
\param outputVector The 16 bit output data buffer \param[in] inputVector The complex 32-bit float data buffer
\param num_points The number of data values to be converted \param[in] num_points The number of data values to be converted
*/ */
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{ {
@ -92,11 +93,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
#include <xmmintrin.h> // __m64, __m128 ?? #include <xmmintrin.h> // __m64, __m128 ??
/*! /*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param inputVector The floating point input data buffer \param[out] outputVector The complex 16-bit integer output data buffer
\param outputVector The 16 bit output data buffer \param[in] inputVector The complex 32-bit float data buffer
\param num_points The number of data values to be converted \param[in] num_points The number of data values to be converted
*/ */
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{ {
@ -146,11 +148,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector,
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*! /*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param inputVector The floating point input data buffer \param[out] outputVector The complex 16-bit integer output data buffer
\param outputVector The 16 bit output data buffer \param[in] inputVector The complex 32-bit float data buffer
\param num_points The number of data values to be converted \param[in] num_points The number of data values to be converted
*/ */
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{ {
@ -199,11 +202,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
#include <xmmintrin.h> #include <xmmintrin.h>
/*! /*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param inputVector The floating point input data buffer \param[out] outputVector The complex 16-bit integer output data buffer
\param outputVector The 16 bit output data buffer \param[in] inputVector The complex 32-bit float data buffer
\param num_points The number of data values to be converted \param[in] num_points The number of data values to be converted
*/ */
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{ {
@ -252,11 +256,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector,
#ifdef LV_HAVE_NEON #ifdef LV_HAVE_NEON
#include <arm_neon.h> #include <arm_neon.h>
/*! /*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param inputVector The floating point input data buffer \param[out] outputVector The complex 16-bit integer output data buffer
\param outputVector The 16 bit output data buffer \param[in] inputVector The complex 32-bit float data buffer
\param num_points The number of data values to be converted \param[in] num_points The number of data values to be converted
*/ */
static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{ {
@ -314,11 +319,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
#endif /* LV_HAVE_NEON */ #endif /* LV_HAVE_NEON */
#ifdef LV_HAVE_GENERIC #ifdef LV_HAVE_GENERIC
/*! /*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) \brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param inputVector The floating point input data buffer \param[out] outputVector The complex 16-bit integer output data buffer
\param outputVector The 16 bit output data buffer \param[in] inputVector The complex 32-bit float data buffer
\param num_points The number of data values to be converted \param[in] num_points The number of data values to be converted
*/ */
static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{ {
@ -337,4 +343,5 @@ static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVecto
} }
} }
#endif /* LV_HAVE_GENERIC */ #endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_H */ #endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_H */

View File

@ -41,11 +41,12 @@
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*! /*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part) \brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param inputVector The floating point input data buffer \param[out] outputVector The complex 8-bit integer output data buffer
\param outputVector The 16 bit output data buffer \param[in] inputVector The complex 32-bit float data buffer
\param num_points The number of data values to be converted \param[in] num_points The number of data values to be converted
*/ */
static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{ {
@ -103,11 +104,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector,
#endif /* LV_HAVE_SSE2 */ #endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC #ifdef LV_HAVE_GENERIC
/*! /*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part) \brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param inputVector The floating point input data buffer \param[out] outputVector The complex 8-bit integer output data buffer
\param outputVector The 16 bit output data buffer \param[in] inputVector The complex 32-bit float data buffer
\param num_points The number of data values to be converted \param[in] num_points The number of data values to be converted
*/ */
static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{ {
@ -130,11 +132,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector,
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*! /*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part) \brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param inputVector The floating point input data buffer \param[out] outputVector The complex 8-bit integer output data buffer
\param outputVector The 16 bit output data buffer \param[in] inputVector The complex 32-bit float data buffer
\param num_points The number of data values to be converted \param[in] num_points The number of data values to be converted
*/ */
static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{ {
@ -193,11 +196,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
#ifdef LV_HAVE_NEON #ifdef LV_HAVE_NEON
#include <arm_neon.h> #include <arm_neon.h>
/*! /*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) \brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param inputVector The floating point input data buffer \param[out] outputVector The complex 8-bit integer output data buffer
\param outputVector The 16 bit output data buffer \param[in] inputVector The complex 32-bit float data buffer
\param num_points The number of data values to be converted \param[in] num_points The number of data values to be converted
*/ */
static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{ {

View File

@ -41,12 +41,13 @@
#include <volk_gnsssdr/volk_gnsssdr_complex.h> #include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_GENERIC #ifdef LV_HAVE_GENERIC
/*! /*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param cVector The vector where the accumulated result will be stored \param[out] result Value of the accumulated result
\param aVector One of the vectors to be multiplied and accumulated \param[in] input One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied and accumulated \param[in] taps One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/ */
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{ {
@ -93,12 +94,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, co
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*! /*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param cVector The vector where the accumulated result will be stored \param[out] result Value of the accumulated result
\param aVector One of the vectors to be multiplied and accumulated \param[in] input One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied and accumulated \param[in] taps One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/ */
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{ {
@ -174,12 +176,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
#ifdef LV_HAVE_SSE4_1 #ifdef LV_HAVE_SSE4_1
#include <smmintrin.h> #include <smmintrin.h>
/*! /*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param cVector The vector where the accumulated result will be stored \param[out] result Value of the accumulated result
\param aVector One of the vectors to be multiplied and accumulated \param[in] input One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied and accumulated \param[in] taps One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/ */
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{ {
@ -254,12 +257,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*! /*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param cVector The vector where the accumulated result will be stored \param[out] result Value of the accumulated result
\param aVector One of the vectors to be multiplied and accumulated \param[in] input One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied and accumulated \param[in] taps One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/ */
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{ {
@ -335,12 +339,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con
#ifdef LV_HAVE_SSE4_1 #ifdef LV_HAVE_SSE4_1
#include <smmintrin.h> #include <smmintrin.h>
/*! /*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param cVector The vector where the accumulated result will be stored \param[out] result Value of the accumulated result
\param aVector One of the vectors to be multiplied and accumulated \param[in] input One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied and accumulated \param[in] taps One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/ */
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{ {
@ -413,12 +418,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
#endif /*LV_HAVE_SSE4_1*/ #endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_ORC #ifdef LV_HAVE_ORC
/*! /*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param cVector The vector where the accumulated result will be stored \param[out] result Value of the accumulated result
\param aVector One of the vectors to be multiplied and accumulated \param[in] input One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied and accumulated \param[in] taps One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/ */
extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points); extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points);
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
@ -440,12 +446,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons
#ifdef LV_HAVE_NEON #ifdef LV_HAVE_NEON
#include <arm_neon.h> #include <arm_neon.h>
/*! /*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param cVector The vector where the accumulated result will be stored \param[out] result Value of the accumulated result
\param aVector One of the vectors to be multiplied and accumulated \param[in] input One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied and accumulated \param[in] taps One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector \param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/ */
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{ {
@ -458,7 +465,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const
// for 2-lane vectors, 1st lane holds the real part, // for 2-lane vectors, 1st lane holds the real part,
// 2nd lane holds the imaginary part // 2nd lane holds the imaginary part
int8x8x2_t a_val, b_val, c_val, accumulator, tmp_real, tmp_imag; int8x8x2_t a_val, b_val, c_val, accumulator, tmp_real, tmp_imag;
lv_8sc_t accum_result[8] = { lv_cmake(0,0) }; __VOLK_ATTR_ALIGNED(16) lv_8sc_t accum_result[8] = { lv_cmake(0,0) };
accumulator.val[0] = vdup_n_s8(0); accumulator.val[0] = vdup_n_s8(0);
accumulator.val[1] = vdup_n_s8(0); accumulator.val[1] = vdup_n_s8(0);
unsigned int number; unsigned int number;

View File

@ -40,12 +40,13 @@
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*! /*!
\brief Multiplies the two input complex vectors and stores their results in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param cVector The vector where the results will be stored \param[out] cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied \param[in] aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied \param[in] bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/ */
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{ {
@ -63,26 +64,26 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
x = _mm_loadu_si128((__m128i*)a); x = _mm_loadu_si128((__m128i*)a);
y = _mm_loadu_si128((__m128i*)b); y = _mm_loadu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1); imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128 (imagx, mult1); imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128 (x, mult1); realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1); imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128 (imagy, mult1); imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128 (y, mult1); realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy); realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128 (realc, mult1); realc = _mm_and_si128(realc, mult1);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128 (imagc, mult1); imagc = _mm_and_si128(imagc, mult1);
imagc = _mm_slli_si128 (imagc, 1); imagc = _mm_slli_si128(imagc, 1);
totalc = _mm_or_si128 (realc, imagc); totalc = _mm_or_si128(realc, imagc);
_mm_storeu_si128((__m128i*)c, totalc); _mm_storeu_si128((__m128i*)c, totalc);
@ -100,12 +101,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
#ifdef LV_HAVE_SSE4_1 #ifdef LV_HAVE_SSE4_1
#include <smmintrin.h> #include <smmintrin.h>
/*! /*!
\brief Multiplies the two input complex vectors and stores their results in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param cVector The vector where the results will be stored \param[out] cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied \param[in] aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied \param[in] bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/ */
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{ {
@ -125,24 +127,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector,
x = _mm_lddqu_si128((__m128i*)a); x = _mm_lddqu_si128((__m128i*)a);
y = _mm_lddqu_si128((__m128i*)b); y = _mm_lddqu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1); imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128 (imagx, mult1); imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128 (x, mult1); realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1); imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128 (imagy, mult1); imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128 (y, mult1); realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy); realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
imagc = _mm_slli_si128 (imagc, 1); imagc = _mm_slli_si128(imagc, 1);
totalc = _mm_blendv_epi8 (imagc, realc, mult1); totalc = _mm_blendv_epi8(imagc, realc, mult1);
_mm_storeu_si128((__m128i*)c, totalc); _mm_storeu_si128((__m128i*)c, totalc);
@ -159,12 +161,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector,
#endif /* LV_HAVE_SSE4_1 */ #endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC #ifdef LV_HAVE_GENERIC
/*! /*!
\brief Multiplies the two input complex vectors and stores their results in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param cVector The vector where the results will be stored \param[out] cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied \param[in] aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied \param[in] bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/ */
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{ {
@ -182,12 +185,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c
#ifdef LV_HAVE_SSE2 #ifdef LV_HAVE_SSE2
#include <emmintrin.h> #include <emmintrin.h>
/*! /*!
\brief Multiplies the two input complex vectors and stores their results in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param cVector The vector where the results will be stored \param[out] cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied \param[in] aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied \param[in] bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/ */
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{ {
@ -205,24 +209,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co
x = _mm_load_si128((__m128i*)a); x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b); y = _mm_load_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1); imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128 (imagx, mult1); imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128 (x, mult1); realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1); imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128 (imagy, mult1); imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128 (y, mult1); realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy); realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128 (realc, mult1); realc = _mm_and_si128(realc, mult1);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128 (imagc, mult1); imagc = _mm_and_si128(imagc, mult1);
imagc = _mm_slli_si128 (imagc, 1); imagc = _mm_slli_si128(imagc, 1);
totalc = _mm_or_si128 (realc, imagc); totalc = _mm_or_si128 (realc, imagc);
@ -242,12 +246,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co
#ifdef LV_HAVE_SSE4_1 #ifdef LV_HAVE_SSE4_1
#include <smmintrin.h> #include <smmintrin.h>
/*! /*!
\brief Multiplies the two input complex vectors and stores their results in the third vector \brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param cVector The vector where the results will be stored \param[out] cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied \param[in] aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied \param[in] bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector \param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/ */
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{ {
@ -267,24 +272,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
x = _mm_load_si128((__m128i*)a); x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b); y = _mm_load_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1); imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128 (imagx, mult1); imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128 (x, mult1); realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1); imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128 (imagy, mult1); imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128 (y, mult1); realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy); realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
imagc = _mm_slli_si128 (imagc, 1); imagc = _mm_slli_si128(imagc, 1);
totalc = _mm_blendv_epi8 (imagc, realc, mult1); totalc = _mm_blendv_epi8(imagc, realc, mult1);
_mm_store_si128((__m128i*)c, totalc); _mm_store_si128((__m128i*)c, totalc);
@ -302,14 +307,16 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
#ifdef LV_HAVE_ORC #ifdef LV_HAVE_ORC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points); extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points);
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{ {
volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(cVector, aVector, bVector, num_points); volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(cVector, aVector, bVector, num_points);