mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2025-01-18 21:23:02 +00:00
ask for aligned memory in a more portable way
This commit is contained in:
parent
3d733a5140
commit
817139ba50
@ -100,7 +100,7 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
|
|||||||
|
|
||||||
lv_16sc_t* _result = result;
|
lv_16sc_t* _result = result;
|
||||||
|
|
||||||
__attribute__((aligned(16))) int local_code_chip_index[4];
|
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||||
__m128 _rem_code_phase, _code_phase_step_chips;
|
__m128 _rem_code_phase, _code_phase_step_chips;
|
||||||
__m128i _code_length_chips, _code_length_chips_minus1;
|
__m128i _code_length_chips, _code_length_chips_minus1;
|
||||||
__m128 _code_phase_out, _code_phase_out_with_offset;
|
__m128 _code_phase_out, _code_phase_out_with_offset;
|
||||||
@ -108,13 +108,13 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
|
|||||||
|
|
||||||
_rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register
|
_rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register
|
||||||
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||||
four_times_code_length_chips_minus1[0] = code_length_chips-1;
|
four_times_code_length_chips_minus1[0] = code_length_chips-1;
|
||||||
four_times_code_length_chips_minus1[1] = code_length_chips-1;
|
four_times_code_length_chips_minus1[1] = code_length_chips-1;
|
||||||
four_times_code_length_chips_minus1[2] = code_length_chips-1;
|
four_times_code_length_chips_minus1[2] = code_length_chips-1;
|
||||||
four_times_code_length_chips_minus1[3] = code_length_chips-1;
|
four_times_code_length_chips_minus1[3] = code_length_chips-1;
|
||||||
|
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||||
four_times_code_length_chips[0] = code_length_chips;
|
four_times_code_length_chips[0] = code_length_chips;
|
||||||
four_times_code_length_chips[1] = code_length_chips;
|
four_times_code_length_chips[1] = code_length_chips;
|
||||||
four_times_code_length_chips[2] = code_length_chips;
|
four_times_code_length_chips[2] = code_length_chips;
|
||||||
@ -127,9 +127,9 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
|
|||||||
|
|
||||||
__m128i zero = _mm_setzero_si128();
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
|
||||||
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||||
__m128 _4output_index = _mm_load_ps(init_idx_float);
|
__m128 _4output_index = _mm_load_ps(init_idx_float);
|
||||||
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||||
__m128 _4constant_float = _mm_load_ps(init_4constant_float);
|
__m128 _4constant_float = _mm_load_ps(init_4constant_float);
|
||||||
|
|
||||||
|
|
||||||
@ -183,7 +183,7 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
|
|||||||
|
|
||||||
lv_16sc_t* _result = result;
|
lv_16sc_t* _result = result;
|
||||||
|
|
||||||
__attribute__((aligned(16))) int local_code_chip_index[4];
|
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||||
__m128 _rem_code_phase, _code_phase_step_chips;
|
__m128 _rem_code_phase, _code_phase_step_chips;
|
||||||
__m128i _code_length_chips, _code_length_chips_minus1;
|
__m128i _code_length_chips, _code_length_chips_minus1;
|
||||||
__m128 _code_phase_out, _code_phase_out_with_offset;
|
__m128 _code_phase_out, _code_phase_out_with_offset;
|
||||||
@ -191,13 +191,13 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
|
|||||||
|
|
||||||
_rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register
|
_rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register
|
||||||
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||||
four_times_code_length_chips_minus1[0] = code_length_chips-1;
|
four_times_code_length_chips_minus1[0] = code_length_chips-1;
|
||||||
four_times_code_length_chips_minus1[1] = code_length_chips-1;
|
four_times_code_length_chips_minus1[1] = code_length_chips-1;
|
||||||
four_times_code_length_chips_minus1[2] = code_length_chips-1;
|
four_times_code_length_chips_minus1[2] = code_length_chips-1;
|
||||||
four_times_code_length_chips_minus1[3] = code_length_chips-1;
|
four_times_code_length_chips_minus1[3] = code_length_chips-1;
|
||||||
|
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||||
four_times_code_length_chips[0] = code_length_chips;
|
four_times_code_length_chips[0] = code_length_chips;
|
||||||
four_times_code_length_chips[1] = code_length_chips;
|
four_times_code_length_chips[1] = code_length_chips;
|
||||||
four_times_code_length_chips[2] = code_length_chips;
|
four_times_code_length_chips[2] = code_length_chips;
|
||||||
@ -210,9 +210,9 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
|
|||||||
|
|
||||||
__m128i zero = _mm_setzero_si128();
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
|
||||||
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||||
__m128 _4output_index = _mm_loadu_ps(init_idx_float);
|
__m128 _4output_index = _mm_loadu_ps(init_idx_float);
|
||||||
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||||
__m128 _4constant_float = _mm_loadu_ps(init_4constant_float);
|
__m128 _4constant_float = _mm_loadu_ps(init_4constant_float);
|
||||||
|
|
||||||
for(number = 0; number < quarterPoints; number++)
|
for(number = 0; number < quarterPoints; number++)
|
||||||
@ -265,7 +265,7 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
|
|||||||
|
|
||||||
lv_16sc_t* _result = result;
|
lv_16sc_t* _result = result;
|
||||||
|
|
||||||
__attribute__((aligned(16))) int local_code_chip_index[4];
|
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||||
float32x4_t _rem_code_phase, _code_phase_step_chips;
|
float32x4_t _rem_code_phase, _code_phase_step_chips;
|
||||||
int32x4_t _code_length_chips, _code_length_chips_minus1;
|
int32x4_t _code_length_chips, _code_length_chips_minus1;
|
||||||
float32x4_t _code_phase_out, _code_phase_out_with_offset;
|
float32x4_t _code_phase_out, _code_phase_out_with_offset;
|
||||||
@ -274,13 +274,13 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
|
|||||||
|
|
||||||
_rem_code_phase = vld1q_dup_f32(&rem_code_phase_chips); //load float to all four float values in m128 register
|
_rem_code_phase = vld1q_dup_f32(&rem_code_phase_chips); //load float to all four float values in m128 register
|
||||||
_code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in m128 register
|
_code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in m128 register
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||||
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
||||||
|
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||||
four_times_code_length_chips[0] = code_length_chips;
|
four_times_code_length_chips[0] = code_length_chips;
|
||||||
four_times_code_length_chips[1] = code_length_chips;
|
four_times_code_length_chips[1] = code_length_chips;
|
||||||
four_times_code_length_chips[2] = code_length_chips;
|
four_times_code_length_chips[2] = code_length_chips;
|
||||||
@ -293,9 +293,9 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
|
|||||||
uint32x4_t negative_indexes, overflow_indexes;
|
uint32x4_t negative_indexes, overflow_indexes;
|
||||||
int32x4_t zero = vmovq_n_s32(0);
|
int32x4_t zero = vmovq_n_s32(0);
|
||||||
|
|
||||||
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||||
float32x4_t _4output_index = vld1q_f32(init_idx_float);
|
float32x4_t _4output_index = vld1q_f32(init_idx_float);
|
||||||
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||||
float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
|
float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
|
||||||
|
|
||||||
for(number = 0; number < quarterPoints; number++)
|
for(number = 0; number < quarterPoints; number++)
|
||||||
|
@ -141,11 +141,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
|
|||||||
const unsigned int sse_iters = num_points / 4;
|
const unsigned int sse_iters = num_points / 4;
|
||||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||||
__m128i c1, c2, result;
|
__m128i c1, c2, result;
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||||
@ -243,11 +243,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc
|
|||||||
const unsigned int ROTATOR_RELOAD = 512;
|
const unsigned int ROTATOR_RELOAD = 512;
|
||||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||||
__m128i c1, c2, result;
|
__m128i c1, c2, result;
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||||
@ -393,11 +393,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
|
|||||||
const unsigned int sse_iters = num_points / 4;
|
const unsigned int sse_iters = num_points / 4;
|
||||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||||
__m128i c1, c2, result;
|
__m128i c1, c2, result;
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
|
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
|
||||||
@ -494,11 +494,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc
|
|||||||
unsigned int ROTATOR_RELOAD = 512;
|
unsigned int ROTATOR_RELOAD = 512;
|
||||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||||
__m128i c1, c2, result;
|
__m128i c1, c2, result;
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
|
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
|
||||||
|
@ -203,11 +203,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
|
|||||||
// phase rotation registers
|
// phase rotation registers
|
||||||
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
||||||
__m128i pc1, pc2;
|
__m128i pc1, pc2;
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||||
@ -376,11 +376,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l
|
|||||||
// phase rotation registers
|
// phase rotation registers
|
||||||
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
||||||
__m128i pc1, pc2;
|
__m128i pc1, pc2;
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||||
@ -619,11 +619,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
|||||||
// phase rotation registers
|
// phase rotation registers
|
||||||
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
||||||
__m128i pc1, pc2;
|
__m128i pc1, pc2;
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_loadu_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_loadu_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_loadu_ps((float*)two_phase_acc);
|
two_phase_acc_reg = _mm_loadu_ps((float*)two_phase_acc);
|
||||||
@ -780,11 +780,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_
|
|||||||
|
|
||||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||||
__m128i c1, c2, result1, result2;
|
__m128i c1, c2, result1, result2;
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
|
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
|
||||||
@ -985,11 +985,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l
|
|||||||
|
|
||||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||||
__m128i c1, c2, result1, result2;
|
__m128i c1, c2, result1, result2;
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
|
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
|
||||||
|
@ -107,20 +107,20 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
|
|||||||
const unsigned int quarterPoints = num_output_samples / 4;
|
const unsigned int quarterPoints = num_output_samples / 4;
|
||||||
|
|
||||||
lv_16sc_t** _result = result;
|
lv_16sc_t** _result = result;
|
||||||
__attribute__((aligned(16))) int local_code_chip_index[4];
|
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||||
float tmp_rem_code_phase_chips;
|
float tmp_rem_code_phase_chips;
|
||||||
__m128 _rem_code_phase,_code_phase_step_chips;
|
__m128 _rem_code_phase,_code_phase_step_chips;
|
||||||
__m128i _code_length_chips,_code_length_chips_minus1;
|
__m128i _code_length_chips,_code_length_chips_minus1;
|
||||||
__m128 _code_phase_out,_code_phase_out_with_offset;
|
__m128 _code_phase_out,_code_phase_out_with_offset;
|
||||||
|
|
||||||
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||||
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
||||||
|
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||||
four_times_code_length_chips[0] = code_length_chips;
|
four_times_code_length_chips[0] = code_length_chips;
|
||||||
four_times_code_length_chips[1] = code_length_chips;
|
four_times_code_length_chips[1] = code_length_chips;
|
||||||
four_times_code_length_chips[2] = code_length_chips;
|
four_times_code_length_chips[2] = code_length_chips;
|
||||||
@ -133,9 +133,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
|
|||||||
|
|
||||||
__m128i zero = _mm_setzero_si128();
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
|
||||||
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||||
__m128 _4output_index = _mm_load_ps(init_idx_float);
|
__m128 _4output_index = _mm_load_ps(init_idx_float);
|
||||||
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||||
__m128 _4constant_float = _mm_load_ps(init_4constant_float);
|
__m128 _4constant_float = _mm_load_ps(init_4constant_float);
|
||||||
|
|
||||||
int current_vector = 0;
|
int current_vector = 0;
|
||||||
@ -200,20 +200,20 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
|
|||||||
const unsigned int quarterPoints = num_output_samples / 4;
|
const unsigned int quarterPoints = num_output_samples / 4;
|
||||||
|
|
||||||
lv_16sc_t** _result = result;
|
lv_16sc_t** _result = result;
|
||||||
__attribute__((aligned(16))) int local_code_chip_index[4];
|
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||||
float tmp_rem_code_phase_chips;
|
float tmp_rem_code_phase_chips;
|
||||||
__m128 _rem_code_phase,_code_phase_step_chips;
|
__m128 _rem_code_phase,_code_phase_step_chips;
|
||||||
__m128i _code_length_chips,_code_length_chips_minus1;
|
__m128i _code_length_chips,_code_length_chips_minus1;
|
||||||
__m128 _code_phase_out,_code_phase_out_with_offset;
|
__m128 _code_phase_out,_code_phase_out_with_offset;
|
||||||
|
|
||||||
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||||
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
||||||
|
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||||
four_times_code_length_chips[0] = code_length_chips;
|
four_times_code_length_chips[0] = code_length_chips;
|
||||||
four_times_code_length_chips[1] = code_length_chips;
|
four_times_code_length_chips[1] = code_length_chips;
|
||||||
four_times_code_length_chips[2] = code_length_chips;
|
four_times_code_length_chips[2] = code_length_chips;
|
||||||
@ -226,9 +226,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
|
|||||||
|
|
||||||
__m128i zero = _mm_setzero_si128();
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
|
||||||
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||||
__m128 _4output_index = _mm_loadu_ps(init_idx_float);
|
__m128 _4output_index = _mm_loadu_ps(init_idx_float);
|
||||||
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||||
__m128 _4constant_float = _mm_loadu_ps(init_4constant_float);
|
__m128 _4constant_float = _mm_loadu_ps(init_4constant_float);
|
||||||
|
|
||||||
int current_vector = 0;
|
int current_vector = 0;
|
||||||
@ -294,7 +294,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
|
|||||||
float32x4_t half = vdupq_n_f32(0.5f);
|
float32x4_t half = vdupq_n_f32(0.5f);
|
||||||
|
|
||||||
lv_16sc_t** _result = result;
|
lv_16sc_t** _result = result;
|
||||||
__attribute__((aligned(16))) int local_code_chip_index[4];
|
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||||
float tmp_rem_code_phase_chips;
|
float tmp_rem_code_phase_chips;
|
||||||
float32x4_t _rem_code_phase, _code_phase_step_chips;
|
float32x4_t _rem_code_phase, _code_phase_step_chips;
|
||||||
int32x4_t _code_length_chips, _code_length_chips_minus1;
|
int32x4_t _code_length_chips, _code_length_chips_minus1;
|
||||||
@ -302,13 +302,13 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
|
|||||||
float32x4_t sign, PlusHalf, Round;
|
float32x4_t sign, PlusHalf, Round;
|
||||||
|
|
||||||
_code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in float32x4_t register
|
_code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in float32x4_t register
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||||
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
||||||
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
||||||
|
|
||||||
__attribute__((aligned(16))) int four_times_code_length_chips[4];
|
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||||
four_times_code_length_chips[0] = code_length_chips;
|
four_times_code_length_chips[0] = code_length_chips;
|
||||||
four_times_code_length_chips[1] = code_length_chips;
|
four_times_code_length_chips[1] = code_length_chips;
|
||||||
four_times_code_length_chips[2] = code_length_chips;
|
four_times_code_length_chips[2] = code_length_chips;
|
||||||
@ -321,9 +321,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
|
|||||||
uint32x4_t negative_indexes, overflow_indexes;
|
uint32x4_t negative_indexes, overflow_indexes;
|
||||||
int32x4_t zero = vmovq_n_s32(0);
|
int32x4_t zero = vmovq_n_s32(0);
|
||||||
|
|
||||||
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||||
float32x4_t _4output_index = vld1q_f32(init_idx_float);
|
float32x4_t _4output_index = vld1q_f32(init_idx_float);
|
||||||
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||||
float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
|
float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
|
||||||
|
|
||||||
int current_vector = 0;
|
int current_vector = 0;
|
||||||
|
@ -268,26 +268,26 @@ static inline void volk_gnsssdr_32f_sincos_32fc_a_sse2(lv_32fc_t* out, const flo
|
|||||||
__m128i emm0, emm2, emm4;
|
__m128i emm0, emm2, emm4;
|
||||||
|
|
||||||
/* declare some SSE constants */
|
/* declare some SSE constants */
|
||||||
static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
__VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||||
static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
__VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||||
|
|
||||||
static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||||
static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 };
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
|
||||||
static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 };
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
|
||||||
static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2};
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
|
||||||
static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4};
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
|
||||||
|
|
||||||
static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||||
static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||||
static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||||
static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||||
static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||||
static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||||
static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||||
static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||||
static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||||
static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
|
||||||
static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||||
|
|
||||||
for(;number < sse_iters; number++)
|
for(;number < sse_iters; number++)
|
||||||
{
|
{
|
||||||
@ -421,26 +421,26 @@ static inline void volk_gnsssdr_32f_sincos_32fc_u_sse2(lv_32fc_t* out, const flo
|
|||||||
__m128i emm0, emm2, emm4;
|
__m128i emm0, emm2, emm4;
|
||||||
|
|
||||||
/* declare some SSE constants */
|
/* declare some SSE constants */
|
||||||
static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
__VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||||
static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
__VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||||
|
|
||||||
static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||||
static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 };
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
|
||||||
static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 };
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
|
||||||
static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2};
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
|
||||||
static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4};
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
|
||||||
|
|
||||||
static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||||
static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||||
static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||||
static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||||
static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||||
static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||||
static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||||
static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||||
static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||||
static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
|
||||||
static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||||
|
|
||||||
for(;number < sse_iters; number++)
|
for(;number < sse_iters; number++)
|
||||||
{
|
{
|
||||||
|
@ -183,11 +183,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_
|
|||||||
// phase rotation registers
|
// phase rotation registers
|
||||||
__m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1;
|
__m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1;
|
||||||
|
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||||
@ -289,11 +289,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_
|
|||||||
// phase rotation registers
|
// phase rotation registers
|
||||||
__m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1;
|
__m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1;
|
||||||
|
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||||
two_phase_inc[0] = phase_inc * phase_inc;
|
two_phase_inc[0] = phase_inc * phase_inc;
|
||||||
two_phase_inc[1] = phase_inc * phase_inc;
|
two_phase_inc[1] = phase_inc * phase_inc;
|
||||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||||
two_phase_acc[0] = (*phase);
|
two_phase_acc[0] = (*phase);
|
||||||
two_phase_acc[1] = (*phase) * phase_inc;
|
two_phase_acc[1] = (*phase) * phase_inc;
|
||||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||||
|
@ -82,29 +82,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_a_sse2(lv_32fc_t* out, const fl
|
|||||||
__m128i emm0, emm2, emm4;
|
__m128i emm0, emm2, emm4;
|
||||||
|
|
||||||
/* declare some SSE constants */
|
/* declare some SSE constants */
|
||||||
static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||||
static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||||
|
|
||||||
static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||||
static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 };
|
static const int _pi32_1[4] = { 1, 1, 1, 1 };
|
||||||
static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 };
|
static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
|
||||||
static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2};
|
static const int _pi32_2[4] = { 2, 2, 2, 2};
|
||||||
static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4};
|
static const int _pi32_4[4] = { 4, 4, 4, 4};
|
||||||
|
|
||||||
static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||||
static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||||
static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||||
static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||||
static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||||
static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||||
static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||||
static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||||
static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||||
static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f };
|
static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
|
||||||
static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f };
|
static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||||
|
|
||||||
float four_phases[4] __attribute__((aligned(16))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc };
|
float four_phases[4] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc };
|
||||||
float four_phases_inc[4] __attribute__((aligned(16))) = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc };
|
float four_phases_inc[4] = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc };
|
||||||
four_phases_reg = _mm_load_ps(four_phases);
|
four_phases_reg = _mm_load_ps(four_phases);
|
||||||
const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc);
|
const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc);
|
||||||
|
|
||||||
@ -239,29 +239,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_sse2(lv_32fc_t* out, const fl
|
|||||||
__m128i emm0, emm2, emm4;
|
__m128i emm0, emm2, emm4;
|
||||||
|
|
||||||
/* declare some SSE constants */
|
/* declare some SSE constants */
|
||||||
static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
__VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||||
static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
__VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||||
|
|
||||||
static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||||
static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 };
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
|
||||||
static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 };
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
|
||||||
static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2};
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
|
||||||
static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4};
|
__VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
|
||||||
|
|
||||||
static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||||
static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||||
static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||||
static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||||
static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||||
static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||||
static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||||
static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||||
static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||||
static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
|
||||||
static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f };
|
__VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||||
|
|
||||||
float four_phases[4] __attribute__((aligned(16))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc };
|
__VOLK_ATTR_ALIGNED(16) float four_phases[4] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc };
|
||||||
float four_phases_inc[4] __attribute__((aligned(16))) = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc };
|
__VOLK_ATTR_ALIGNED(16) float four_phases_inc[4] = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc };
|
||||||
four_phases_reg = _mm_load_ps(four_phases);
|
four_phases_reg = _mm_load_ps(four_phases);
|
||||||
const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc);
|
const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc);
|
||||||
|
|
||||||
@ -452,29 +452,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_a_avx2(lv_32fc_t* out, const fl
|
|||||||
__m128 aux, c1, s1;
|
__m128 aux, c1, s1;
|
||||||
|
|
||||||
/* declare some AXX2 constants */
|
/* declare some AXX2 constants */
|
||||||
static const int _ps_inv_sign_mask[8] __attribute__((aligned(32))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
__VOLK_ATTR_ALIGNED(32) static const int _ps_inv_sign_mask[8] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||||
static const int _ps_sign_mask[8] __attribute__((aligned(32))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
__VOLK_ATTR_ALIGNED(32) static const int _ps_sign_mask[8] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||||
|
|
||||||
static const float _ps_cephes_FOPI[8] __attribute__((aligned(32))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_cephes_FOPI[8] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||||
static const int _pi32_1[8] __attribute__((aligned(32))) = { 1, 1, 1, 1, 1, 1, 1, 1 };
|
__VOLK_ATTR_ALIGNED(32) static const int _pi32_1[8] = { 1, 1, 1, 1, 1, 1, 1, 1 };
|
||||||
static const int _pi32_inv1[8] __attribute__((aligned(32))) = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 };
|
__VOLK_ATTR_ALIGNED(32) static const int _pi32_inv1[8] = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 };
|
||||||
static const int _pi32_2[8] __attribute__((aligned(32))) = { 2, 2, 2, 2, 2, 2, 2, 2 };
|
__VOLK_ATTR_ALIGNED(32) static const int _pi32_2[8] = { 2, 2, 2, 2, 2, 2, 2, 2 };
|
||||||
static const int _pi32_4[8] __attribute__((aligned(32))) = { 4, 4, 4, 4, 4, 4, 4, 4 };
|
__VOLK_ATTR_ALIGNED(32) static const int _pi32_4[8] = { 4, 4, 4, 4, 4, 4, 4, 4 };
|
||||||
|
|
||||||
static const float _ps_minus_cephes_DP1[8] __attribute__((aligned(32))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP1[8] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||||
static const float _ps_minus_cephes_DP2[8] __attribute__((aligned(32))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP2[8] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||||
static const float _ps_minus_cephes_DP3[8] __attribute__((aligned(32))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP3[8] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||||
static const float _ps_coscof_p0[8] __attribute__((aligned(32))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p0[8] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||||
static const float _ps_coscof_p1[8] __attribute__((aligned(32))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p1[8] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||||
static const float _ps_coscof_p2[8] __attribute__((aligned(32))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p2[8] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||||
static const float _ps_sincof_p0[8] __attribute__((aligned(32))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p0[8] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||||
static const float _ps_sincof_p1[8] __attribute__((aligned(32))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p1[8] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||||
static const float _ps_sincof_p2[8] __attribute__((aligned(32))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p2[8] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||||
static const float _ps_0p5[8] __attribute__((aligned(32))) = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_0p5[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
|
||||||
static const float _ps_1[8] __attribute__((aligned(32))) = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_1[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
|
||||||
|
|
||||||
float eight_phases[8] __attribute__((aligned(32))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc };
|
__VOLK_ATTR_ALIGNED(32) float eight_phases[8] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc };
|
||||||
float eight_phases_inc[8] __attribute__((aligned(32))) = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc };
|
__VOLK_ATTR_ALIGNED(32) float eight_phases_inc[8] = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc };
|
||||||
eight_phases_reg = _mm256_load_ps(eight_phases);
|
eight_phases_reg = _mm256_load_ps(eight_phases);
|
||||||
const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);
|
const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);
|
||||||
|
|
||||||
@ -620,29 +620,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_avx2(lv_32fc_t* out, const fl
|
|||||||
__m128 aux, c1, s1;
|
__m128 aux, c1, s1;
|
||||||
|
|
||||||
/* declare some AXX2 constants */
|
/* declare some AXX2 constants */
|
||||||
static const int _ps_inv_sign_mask[8] __attribute__((aligned(32))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
__VOLK_ATTR_ALIGNED(32) static const int _ps_inv_sign_mask[8] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||||
static const int _ps_sign_mask[8] __attribute__((aligned(32))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
__VOLK_ATTR_ALIGNED(32) static const int _ps_sign_mask[8] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||||
|
|
||||||
static const float _ps_cephes_FOPI[8] __attribute__((aligned(32))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_cephes_FOPI[8] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||||
static const int _pi32_1[8] __attribute__((aligned(32))) = { 1, 1, 1, 1, 1, 1, 1, 1 };
|
__VOLK_ATTR_ALIGNED(32) static const int _pi32_1[8] = { 1, 1, 1, 1, 1, 1, 1, 1 };
|
||||||
static const int _pi32_inv1[8] __attribute__((aligned(32))) = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 };
|
__VOLK_ATTR_ALIGNED(32) static const int _pi32_inv1[8] = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 };
|
||||||
static const int _pi32_2[8] __attribute__((aligned(32))) = { 2, 2, 2, 2, 2, 2, 2, 2 };
|
__VOLK_ATTR_ALIGNED(32) static const int _pi32_2[8] = { 2, 2, 2, 2, 2, 2, 2, 2 };
|
||||||
static const int _pi32_4[8] __attribute__((aligned(32))) = { 4, 4, 4, 4, 4, 4, 4, 4 };
|
__VOLK_ATTR_ALIGNED(32) static const int _pi32_4[8] = { 4, 4, 4, 4, 4, 4, 4, 4 };
|
||||||
|
|
||||||
static const float _ps_minus_cephes_DP1[8] __attribute__((aligned(32))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP1[8] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||||
static const float _ps_minus_cephes_DP2[8] __attribute__((aligned(32))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP2[8] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||||
static const float _ps_minus_cephes_DP3[8] __attribute__((aligned(32))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP3[8] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||||
static const float _ps_coscof_p0[8] __attribute__((aligned(32))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p0[8] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||||
static const float _ps_coscof_p1[8] __attribute__((aligned(32))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p1[8] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||||
static const float _ps_coscof_p2[8] __attribute__((aligned(32))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p2[8] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||||
static const float _ps_sincof_p0[8] __attribute__((aligned(32))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p0[8] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||||
static const float _ps_sincof_p1[8] __attribute__((aligned(32))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p1[8] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||||
static const float _ps_sincof_p2[8] __attribute__((aligned(32))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p2[8] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||||
static const float _ps_0p5[8] __attribute__((aligned(32))) = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_0p5[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
|
||||||
static const float _ps_1[8] __attribute__((aligned(32))) = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
|
__VOLK_ATTR_ALIGNED(32) static const float _ps_1[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
|
||||||
|
|
||||||
float eight_phases[8] __attribute__((aligned(32))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc };
|
__VOLK_ATTR_ALIGNED(32) float eight_phases[8] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc };
|
||||||
float eight_phases_inc[8] __attribute__((aligned(32))) = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc };
|
__VOLK_ATTR_ALIGNED(32) float eight_phases_inc[8] = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc };
|
||||||
eight_phases_reg = _mm256_load_ps(eight_phases);
|
eight_phases_reg = _mm256_load_ps(eight_phases);
|
||||||
const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);
|
const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user