mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-10-03 09:20:52 +00:00
first attempt
This commit is contained in:
parent
bef9638729
commit
27afafce0f
@ -42,12 +42,14 @@
|
|||||||
|
|
||||||
#ifdef LV_HAVE_GENERIC
|
#ifdef LV_HAVE_GENERIC
|
||||||
/*!
|
/*!
|
||||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
|
||||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
\param[in,out] phase Initial / final phase
|
||||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||||
|
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||||
|
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||||
*/
|
*/
|
||||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||||
{
|
{
|
||||||
@ -78,12 +80,14 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc
|
|||||||
#include <pmmintrin.h>
|
#include <pmmintrin.h>
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
|
||||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
\param[in,out] phase Initial / final phase
|
||||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||||
|
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||||
|
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||||
*/
|
*/
|
||||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||||
{
|
{
|
||||||
@ -238,12 +242,14 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
|
|||||||
#include <pmmintrin.h>
|
#include <pmmintrin.h>
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
|
||||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
\param[in,out] phase Initial / final phase
|
||||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||||
|
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||||
|
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||||
*/
|
*/
|
||||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||||
{
|
{
|
||||||
@ -352,7 +358,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
|||||||
|
|
||||||
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
|
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
|
||||||
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
|
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -400,12 +405,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
|||||||
|
|
||||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||||
{
|
{
|
||||||
// for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
|
||||||
// {
|
|
||||||
// result[n_vec] = lv_cmake(0,0);
|
|
||||||
// }
|
|
||||||
lv_16sc_t dotProduct;
|
|
||||||
|
|
||||||
const unsigned int neon_iters = num_points / 4;
|
const unsigned int neon_iters = num_points / 4;
|
||||||
|
|
||||||
const lv_16sc_t** _in_a = in_a;
|
const lv_16sc_t** _in_a = in_a;
|
||||||
@ -414,6 +413,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
|||||||
lv_16sc_t tmp16_;
|
lv_16sc_t tmp16_;
|
||||||
lv_32fc_t tmp32_;
|
lv_32fc_t tmp32_;
|
||||||
|
|
||||||
|
lv_16sc_t dotProduct;
|
||||||
|
|
||||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
||||||
@ -431,27 +432,26 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
|||||||
float32x4_t _phase_real = vld1q_f32(__phase_real);
|
float32x4_t _phase_real = vld1q_f32(__phase_real);
|
||||||
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
|
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
|
||||||
|
|
||||||
float32x4_t half = vdupq_n_f32(0.5f);
|
|
||||||
int16x4x2_t tmp16;
|
|
||||||
int32x4x2_t tmp32i;
|
|
||||||
float32x4x2_t tmp32f, tmp_real, tmp_imag;
|
|
||||||
float32x4_t sign, PlusHalf, Round;
|
|
||||||
|
|
||||||
int16x4x2_t* accumulator;
|
|
||||||
accumulator = (int16x4x2_t*)calloc(num_a_vectors, sizeof(int16x4x2_t));
|
|
||||||
|
|
||||||
int16x4x2_t tmp_real16, tmp_imag16;
|
|
||||||
|
|
||||||
for(int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
|
||||||
{
|
|
||||||
accumulator[n_vec].val[0] = vdup_n_s16(0);
|
|
||||||
accumulator[n_vec].val[1] = vdup_n_s16(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (neon_iters > 0)
|
if (neon_iters > 0)
|
||||||
{
|
{
|
||||||
int16x4x2_t a_val, b_val, c_val;
|
int16x4x2_t a_val, b_val, c_val;
|
||||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||||
|
float32x4_t half = vdupq_n_f32(0.5f);
|
||||||
|
int16x4x2_t tmp16;
|
||||||
|
int32x4x2_t tmp32i;
|
||||||
|
float32x4x2_t tmp32f, tmp_real, tmp_imag;
|
||||||
|
float32x4_t sign, PlusHalf, Round;
|
||||||
|
|
||||||
|
int16x4x2_t* accumulator;
|
||||||
|
accumulator = (int16x4x2_t*)calloc(num_a_vectors, sizeof(int16x4x2_t));
|
||||||
|
|
||||||
|
int16x4x2_t tmp_real16, tmp_imag16;
|
||||||
|
|
||||||
|
for(int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||||
|
{
|
||||||
|
accumulator[n_vec].val[0] = vdup_n_s16(0);
|
||||||
|
accumulator[n_vec].val[1] = vdup_n_s16(0);
|
||||||
|
}
|
||||||
|
|
||||||
for(unsigned int number = 0; number < neon_iters; number++)
|
for(unsigned int number = 0; number < neon_iters; number++)
|
||||||
{
|
{
|
||||||
@ -524,8 +524,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
|||||||
accumulator[n_vec].val[0] = vadd_s16(accumulator[n_vec].val[0], c_val.val[0]);
|
accumulator[n_vec].val[0] = vadd_s16(accumulator[n_vec].val[0], c_val.val[0]);
|
||||||
accumulator[n_vec].val[1] = vadd_s16(accumulator[n_vec].val[1], c_val.val[1]);
|
accumulator[n_vec].val[1] = vadd_s16(accumulator[n_vec].val[1], c_val.val[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||||
@ -540,6 +538,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
|||||||
_out[n_vec] = dotProduct;
|
_out[n_vec] = dotProduct;
|
||||||
}
|
}
|
||||||
free(accumulator);
|
free(accumulator);
|
||||||
|
vst1q_f32((float32_t*)__phase_real, _phase_real);
|
||||||
|
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
|
||||||
|
|
||||||
|
(*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -558,7 +560,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* LV_HAVE_NEON */
|
#endif /* LV_HAVE_NEON */
|
||||||
|
|
||||||
#endif /*INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H*/
|
#endif /*INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H*/
|
||||||
|
Loading…
Reference in New Issue
Block a user