1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2025-01-18 21:23:02 +00:00

ask for aligned memory in a more portable way

This commit is contained in:
Carles Fernandez 2016-03-31 19:39:37 +02:00
parent 3d733a5140
commit 817139ba50
7 changed files with 168 additions and 168 deletions

View File

@ -100,7 +100,7 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
lv_16sc_t* _result = result; lv_16sc_t* _result = result;
__attribute__((aligned(16))) int local_code_chip_index[4]; __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__m128 _rem_code_phase, _code_phase_step_chips; __m128 _rem_code_phase, _code_phase_step_chips;
__m128i _code_length_chips, _code_length_chips_minus1; __m128i _code_length_chips, _code_length_chips_minus1;
__m128 _code_phase_out, _code_phase_out_with_offset; __m128 _code_phase_out, _code_phase_out_with_offset;
@ -108,13 +108,13 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
_rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register _rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips-1; four_times_code_length_chips_minus1[0] = code_length_chips-1;
four_times_code_length_chips_minus1[1] = code_length_chips-1; four_times_code_length_chips_minus1[1] = code_length_chips-1;
four_times_code_length_chips_minus1[2] = code_length_chips-1; four_times_code_length_chips_minus1[2] = code_length_chips-1;
four_times_code_length_chips_minus1[3] = code_length_chips-1; four_times_code_length_chips_minus1[3] = code_length_chips-1;
__attribute__((aligned(16))) int four_times_code_length_chips[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips; four_times_code_length_chips[2] = code_length_chips;
@ -127,9 +127,9 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
__m128i zero = _mm_setzero_si128(); __m128i zero = _mm_setzero_si128();
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__m128 _4output_index = _mm_load_ps(init_idx_float); __m128 _4output_index = _mm_load_ps(init_idx_float);
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
__m128 _4constant_float = _mm_load_ps(init_4constant_float); __m128 _4constant_float = _mm_load_ps(init_4constant_float);
@ -183,7 +183,7 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
lv_16sc_t* _result = result; lv_16sc_t* _result = result;
__attribute__((aligned(16))) int local_code_chip_index[4]; __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__m128 _rem_code_phase, _code_phase_step_chips; __m128 _rem_code_phase, _code_phase_step_chips;
__m128i _code_length_chips, _code_length_chips_minus1; __m128i _code_length_chips, _code_length_chips_minus1;
__m128 _code_phase_out, _code_phase_out_with_offset; __m128 _code_phase_out, _code_phase_out_with_offset;
@ -191,13 +191,13 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
_rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register _rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips-1; four_times_code_length_chips_minus1[0] = code_length_chips-1;
four_times_code_length_chips_minus1[1] = code_length_chips-1; four_times_code_length_chips_minus1[1] = code_length_chips-1;
four_times_code_length_chips_minus1[2] = code_length_chips-1; four_times_code_length_chips_minus1[2] = code_length_chips-1;
four_times_code_length_chips_minus1[3] = code_length_chips-1; four_times_code_length_chips_minus1[3] = code_length_chips-1;
__attribute__((aligned(16))) int four_times_code_length_chips[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips; four_times_code_length_chips[2] = code_length_chips;
@ -210,9 +210,9 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
__m128i zero = _mm_setzero_si128(); __m128i zero = _mm_setzero_si128();
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__m128 _4output_index = _mm_loadu_ps(init_idx_float); __m128 _4output_index = _mm_loadu_ps(init_idx_float);
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
__m128 _4constant_float = _mm_loadu_ps(init_4constant_float); __m128 _4constant_float = _mm_loadu_ps(init_4constant_float);
for(number = 0; number < quarterPoints; number++) for(number = 0; number < quarterPoints; number++)
@ -265,7 +265,7 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
lv_16sc_t* _result = result; lv_16sc_t* _result = result;
__attribute__((aligned(16))) int local_code_chip_index[4]; __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
float32x4_t _rem_code_phase, _code_phase_step_chips; float32x4_t _rem_code_phase, _code_phase_step_chips;
int32x4_t _code_length_chips, _code_length_chips_minus1; int32x4_t _code_length_chips, _code_length_chips_minus1;
float32x4_t _code_phase_out, _code_phase_out_with_offset; float32x4_t _code_phase_out, _code_phase_out_with_offset;
@ -274,13 +274,13 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
_rem_code_phase = vld1q_dup_f32(&rem_code_phase_chips); //load float to all four float values in m128 register _rem_code_phase = vld1q_dup_f32(&rem_code_phase_chips); //load float to all four float values in m128 register
_code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in m128 register _code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in m128 register
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips - 1; four_times_code_length_chips_minus1[0] = code_length_chips - 1;
four_times_code_length_chips_minus1[1] = code_length_chips - 1; four_times_code_length_chips_minus1[1] = code_length_chips - 1;
four_times_code_length_chips_minus1[2] = code_length_chips - 1; four_times_code_length_chips_minus1[2] = code_length_chips - 1;
four_times_code_length_chips_minus1[3] = code_length_chips - 1; four_times_code_length_chips_minus1[3] = code_length_chips - 1;
__attribute__((aligned(16))) int four_times_code_length_chips[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips; four_times_code_length_chips[2] = code_length_chips;
@ -293,9 +293,9 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
uint32x4_t negative_indexes, overflow_indexes; uint32x4_t negative_indexes, overflow_indexes;
int32x4_t zero = vmovq_n_s32(0); int32x4_t zero = vmovq_n_s32(0);
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
float32x4_t _4output_index = vld1q_f32(init_idx_float); float32x4_t _4output_index = vld1q_f32(init_idx_float);
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
float32x4_t _4constant_float = vld1q_f32(init_4constant_float); float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
for(number = 0; number < quarterPoints; number++) for(number = 0; number < quarterPoints; number++)

View File

@ -141,11 +141,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
const unsigned int sse_iters = num_points / 4; const unsigned int sse_iters = num_points / 4;
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result; __m128i c1, c2, result;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -243,11 +243,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc
const unsigned int ROTATOR_RELOAD = 512; const unsigned int ROTATOR_RELOAD = 512;
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result; __m128i c1, c2, result;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -393,11 +393,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
const unsigned int sse_iters = num_points / 4; const unsigned int sse_iters = num_points / 4;
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result; __m128i c1, c2, result;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc); two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
@ -494,11 +494,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc
unsigned int ROTATOR_RELOAD = 512; unsigned int ROTATOR_RELOAD = 512;
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result; __m128i c1, c2, result;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc); two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);

View File

@ -203,11 +203,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
// phase rotation registers // phase rotation registers
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg; __m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
__m128i pc1, pc2; __m128i pc1, pc2;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -376,11 +376,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l
// phase rotation registers // phase rotation registers
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg; __m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
__m128i pc1, pc2; __m128i pc1, pc2;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -619,11 +619,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
// phase rotation registers // phase rotation registers
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg; __m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
__m128i pc1, pc2; __m128i pc1, pc2;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_loadu_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_loadu_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_loadu_ps((float*)two_phase_acc); two_phase_acc_reg = _mm_loadu_ps((float*)two_phase_acc);
@ -780,11 +780,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result1, result2; __m128i c1, c2, result1, result2;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc); two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
@ -985,11 +985,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result1, result2; __m128i c1, c2, result1, result2;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc); two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);

View File

@ -107,20 +107,20 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
const unsigned int quarterPoints = num_output_samples / 4; const unsigned int quarterPoints = num_output_samples / 4;
lv_16sc_t** _result = result; lv_16sc_t** _result = result;
__attribute__((aligned(16))) int local_code_chip_index[4]; __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
float tmp_rem_code_phase_chips; float tmp_rem_code_phase_chips;
__m128 _rem_code_phase,_code_phase_step_chips; __m128 _rem_code_phase,_code_phase_step_chips;
__m128i _code_length_chips,_code_length_chips_minus1; __m128i _code_length_chips,_code_length_chips_minus1;
__m128 _code_phase_out,_code_phase_out_with_offset; __m128 _code_phase_out,_code_phase_out_with_offset;
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips - 1; four_times_code_length_chips_minus1[0] = code_length_chips - 1;
four_times_code_length_chips_minus1[1] = code_length_chips - 1; four_times_code_length_chips_minus1[1] = code_length_chips - 1;
four_times_code_length_chips_minus1[2] = code_length_chips - 1; four_times_code_length_chips_minus1[2] = code_length_chips - 1;
four_times_code_length_chips_minus1[3] = code_length_chips - 1; four_times_code_length_chips_minus1[3] = code_length_chips - 1;
__attribute__((aligned(16))) int four_times_code_length_chips[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips; four_times_code_length_chips[2] = code_length_chips;
@ -133,9 +133,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
__m128i zero = _mm_setzero_si128(); __m128i zero = _mm_setzero_si128();
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__m128 _4output_index = _mm_load_ps(init_idx_float); __m128 _4output_index = _mm_load_ps(init_idx_float);
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
__m128 _4constant_float = _mm_load_ps(init_4constant_float); __m128 _4constant_float = _mm_load_ps(init_4constant_float);
int current_vector = 0; int current_vector = 0;
@ -200,20 +200,20 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
const unsigned int quarterPoints = num_output_samples / 4; const unsigned int quarterPoints = num_output_samples / 4;
lv_16sc_t** _result = result; lv_16sc_t** _result = result;
__attribute__((aligned(16))) int local_code_chip_index[4]; __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
float tmp_rem_code_phase_chips; float tmp_rem_code_phase_chips;
__m128 _rem_code_phase,_code_phase_step_chips; __m128 _rem_code_phase,_code_phase_step_chips;
__m128i _code_length_chips,_code_length_chips_minus1; __m128i _code_length_chips,_code_length_chips_minus1;
__m128 _code_phase_out,_code_phase_out_with_offset; __m128 _code_phase_out,_code_phase_out_with_offset;
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips - 1; four_times_code_length_chips_minus1[0] = code_length_chips - 1;
four_times_code_length_chips_minus1[1] = code_length_chips - 1; four_times_code_length_chips_minus1[1] = code_length_chips - 1;
four_times_code_length_chips_minus1[2] = code_length_chips - 1; four_times_code_length_chips_minus1[2] = code_length_chips - 1;
four_times_code_length_chips_minus1[3] = code_length_chips - 1; four_times_code_length_chips_minus1[3] = code_length_chips - 1;
__attribute__((aligned(16))) int four_times_code_length_chips[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips; four_times_code_length_chips[2] = code_length_chips;
@ -226,9 +226,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
__m128i zero = _mm_setzero_si128(); __m128i zero = _mm_setzero_si128();
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__m128 _4output_index = _mm_loadu_ps(init_idx_float); __m128 _4output_index = _mm_loadu_ps(init_idx_float);
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
__m128 _4constant_float = _mm_loadu_ps(init_4constant_float); __m128 _4constant_float = _mm_loadu_ps(init_4constant_float);
int current_vector = 0; int current_vector = 0;
@ -294,7 +294,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
float32x4_t half = vdupq_n_f32(0.5f); float32x4_t half = vdupq_n_f32(0.5f);
lv_16sc_t** _result = result; lv_16sc_t** _result = result;
__attribute__((aligned(16))) int local_code_chip_index[4]; __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
float tmp_rem_code_phase_chips; float tmp_rem_code_phase_chips;
float32x4_t _rem_code_phase, _code_phase_step_chips; float32x4_t _rem_code_phase, _code_phase_step_chips;
int32x4_t _code_length_chips, _code_length_chips_minus1; int32x4_t _code_length_chips, _code_length_chips_minus1;
@ -302,13 +302,13 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
float32x4_t sign, PlusHalf, Round; float32x4_t sign, PlusHalf, Round;
_code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in float32x4_t register _code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in float32x4_t register
__attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips - 1; four_times_code_length_chips_minus1[0] = code_length_chips - 1;
four_times_code_length_chips_minus1[1] = code_length_chips - 1; four_times_code_length_chips_minus1[1] = code_length_chips - 1;
four_times_code_length_chips_minus1[2] = code_length_chips - 1; four_times_code_length_chips_minus1[2] = code_length_chips - 1;
four_times_code_length_chips_minus1[3] = code_length_chips - 1; four_times_code_length_chips_minus1[3] = code_length_chips - 1;
__attribute__((aligned(16))) int four_times_code_length_chips[4]; __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips; four_times_code_length_chips[2] = code_length_chips;
@ -321,9 +321,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
uint32x4_t negative_indexes, overflow_indexes; uint32x4_t negative_indexes, overflow_indexes;
int32x4_t zero = vmovq_n_s32(0); int32x4_t zero = vmovq_n_s32(0);
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
float32x4_t _4output_index = vld1q_f32(init_idx_float); float32x4_t _4output_index = vld1q_f32(init_idx_float);
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
float32x4_t _4constant_float = vld1q_f32(init_4constant_float); float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
int current_vector = 0; int current_vector = 0;

View File

@ -268,26 +268,26 @@ static inline void volk_gnsssdr_32f_sincos_32fc_a_sse2(lv_32fc_t* out, const flo
__m128i emm0, emm2, emm4; __m128i emm0, emm2, emm4;
/* declare some SSE constants */ /* declare some SSE constants */
static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; __VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; __VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; __VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; __VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; __VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; __VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; __VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; __VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
for(;number < sse_iters; number++) for(;number < sse_iters; number++)
{ {
@ -421,26 +421,26 @@ static inline void volk_gnsssdr_32f_sincos_32fc_u_sse2(lv_32fc_t* out, const flo
__m128i emm0, emm2, emm4; __m128i emm0, emm2, emm4;
/* declare some SSE constants */ /* declare some SSE constants */
static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; __VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; __VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; __VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; __VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; __VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; __VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; __VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; __VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
for(;number < sse_iters; number++) for(;number < sse_iters; number++)
{ {

View File

@ -183,11 +183,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_
// phase rotation registers // phase rotation registers
__m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1; __m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -289,11 +289,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_
// phase rotation registers // phase rotation registers
__m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1; __m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase); two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);

View File

@ -82,29 +82,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_a_sse2(lv_32fc_t* out, const fl
__m128i emm0, emm2, emm4; __m128i emm0, emm2, emm4;
/* declare some SSE constants */ /* declare some SSE constants */
static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; static const int _pi32_1[4] = { 1, 1, 1, 1 };
static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; static const int _pi32_2[4] = { 2, 2, 2, 2};
static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; static const int _pi32_4[4] = { 4, 4, 4, 4};
static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
float four_phases[4] __attribute__((aligned(16))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc }; float four_phases[4] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc };
float four_phases_inc[4] __attribute__((aligned(16))) = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc }; float four_phases_inc[4] = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc };
four_phases_reg = _mm_load_ps(four_phases); four_phases_reg = _mm_load_ps(four_phases);
const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc); const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc);
@ -239,29 +239,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_sse2(lv_32fc_t* out, const fl
__m128i emm0, emm2, emm4; __m128i emm0, emm2, emm4;
/* declare some SSE constants */ /* declare some SSE constants */
static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; __VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; __VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; __VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; __VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; __VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; __VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; __VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; __VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
float four_phases[4] __attribute__((aligned(16))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc }; __VOLK_ATTR_ALIGNED(16) float four_phases[4] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc };
float four_phases_inc[4] __attribute__((aligned(16))) = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc }; __VOLK_ATTR_ALIGNED(16) float four_phases_inc[4] = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc };
four_phases_reg = _mm_load_ps(four_phases); four_phases_reg = _mm_load_ps(four_phases);
const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc); const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc);
@ -452,29 +452,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_a_avx2(lv_32fc_t* out, const fl
__m128 aux, c1, s1; __m128 aux, c1, s1;
/* declare some AXX2 constants */ /* declare some AXX2 constants */
static const int _ps_inv_sign_mask[8] __attribute__((aligned(32))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; __VOLK_ATTR_ALIGNED(32) static const int _ps_inv_sign_mask[8] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
static const int _ps_sign_mask[8] __attribute__((aligned(32))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; __VOLK_ATTR_ALIGNED(32) static const int _ps_sign_mask[8] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
static const float _ps_cephes_FOPI[8] __attribute__((aligned(32))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_cephes_FOPI[8] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
static const int _pi32_1[8] __attribute__((aligned(32))) = { 1, 1, 1, 1, 1, 1, 1, 1 }; __VOLK_ATTR_ALIGNED(32) static const int _pi32_1[8] = { 1, 1, 1, 1, 1, 1, 1, 1 };
static const int _pi32_inv1[8] __attribute__((aligned(32))) = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 }; __VOLK_ATTR_ALIGNED(32) static const int _pi32_inv1[8] = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 };
static const int _pi32_2[8] __attribute__((aligned(32))) = { 2, 2, 2, 2, 2, 2, 2, 2 }; __VOLK_ATTR_ALIGNED(32) static const int _pi32_2[8] = { 2, 2, 2, 2, 2, 2, 2, 2 };
static const int _pi32_4[8] __attribute__((aligned(32))) = { 4, 4, 4, 4, 4, 4, 4, 4 }; __VOLK_ATTR_ALIGNED(32) static const int _pi32_4[8] = { 4, 4, 4, 4, 4, 4, 4, 4 };
static const float _ps_minus_cephes_DP1[8] __attribute__((aligned(32))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP1[8] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
static const float _ps_minus_cephes_DP2[8] __attribute__((aligned(32))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP2[8] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
static const float _ps_minus_cephes_DP3[8] __attribute__((aligned(32))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP3[8] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
static const float _ps_coscof_p0[8] __attribute__((aligned(32))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p0[8] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
static const float _ps_coscof_p1[8] __attribute__((aligned(32))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p1[8] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
static const float _ps_coscof_p2[8] __attribute__((aligned(32))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p2[8] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
static const float _ps_sincof_p0[8] __attribute__((aligned(32))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p0[8] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
static const float _ps_sincof_p1[8] __attribute__((aligned(32))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p1[8] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
static const float _ps_sincof_p2[8] __attribute__((aligned(32))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p2[8] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
static const float _ps_0p5[8] __attribute__((aligned(32))) = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f }; __VOLK_ATTR_ALIGNED(32) static const float _ps_0p5[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
static const float _ps_1[8] __attribute__((aligned(32))) = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; __VOLK_ATTR_ALIGNED(32) static const float _ps_1[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
float eight_phases[8] __attribute__((aligned(32))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc }; __VOLK_ATTR_ALIGNED(32) float eight_phases[8] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc };
float eight_phases_inc[8] __attribute__((aligned(32))) = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc }; __VOLK_ATTR_ALIGNED(32) float eight_phases_inc[8] = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc };
eight_phases_reg = _mm256_load_ps(eight_phases); eight_phases_reg = _mm256_load_ps(eight_phases);
const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc); const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);
@ -620,29 +620,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_avx2(lv_32fc_t* out, const fl
__m128 aux, c1, s1; __m128 aux, c1, s1;
/* declare some AXX2 constants */ /* declare some AXX2 constants */
static const int _ps_inv_sign_mask[8] __attribute__((aligned(32))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; __VOLK_ATTR_ALIGNED(32) static const int _ps_inv_sign_mask[8] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
static const int _ps_sign_mask[8] __attribute__((aligned(32))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; __VOLK_ATTR_ALIGNED(32) static const int _ps_sign_mask[8] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
static const float _ps_cephes_FOPI[8] __attribute__((aligned(32))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_cephes_FOPI[8] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
static const int _pi32_1[8] __attribute__((aligned(32))) = { 1, 1, 1, 1, 1, 1, 1, 1 }; __VOLK_ATTR_ALIGNED(32) static const int _pi32_1[8] = { 1, 1, 1, 1, 1, 1, 1, 1 };
static const int _pi32_inv1[8] __attribute__((aligned(32))) = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 }; __VOLK_ATTR_ALIGNED(32) static const int _pi32_inv1[8] = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 };
static const int _pi32_2[8] __attribute__((aligned(32))) = { 2, 2, 2, 2, 2, 2, 2, 2 }; __VOLK_ATTR_ALIGNED(32) static const int _pi32_2[8] = { 2, 2, 2, 2, 2, 2, 2, 2 };
static const int _pi32_4[8] __attribute__((aligned(32))) = { 4, 4, 4, 4, 4, 4, 4, 4 }; __VOLK_ATTR_ALIGNED(32) static const int _pi32_4[8] = { 4, 4, 4, 4, 4, 4, 4, 4 };
static const float _ps_minus_cephes_DP1[8] __attribute__((aligned(32))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP1[8] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
static const float _ps_minus_cephes_DP2[8] __attribute__((aligned(32))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP2[8] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
static const float _ps_minus_cephes_DP3[8] __attribute__((aligned(32))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP3[8] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
static const float _ps_coscof_p0[8] __attribute__((aligned(32))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p0[8] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
static const float _ps_coscof_p1[8] __attribute__((aligned(32))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p1[8] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
static const float _ps_coscof_p2[8] __attribute__((aligned(32))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p2[8] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
static const float _ps_sincof_p0[8] __attribute__((aligned(32))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p0[8] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
static const float _ps_sincof_p1[8] __attribute__((aligned(32))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p1[8] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
static const float _ps_sincof_p2[8] __attribute__((aligned(32))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p2[8] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
static const float _ps_0p5[8] __attribute__((aligned(32))) = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f }; __VOLK_ATTR_ALIGNED(32) static const float _ps_0p5[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
static const float _ps_1[8] __attribute__((aligned(32))) = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; __VOLK_ATTR_ALIGNED(32) static const float _ps_1[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
float eight_phases[8] __attribute__((aligned(32))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc }; __VOLK_ATTR_ALIGNED(32) float eight_phases[8] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc };
float eight_phases_inc[8] __attribute__((aligned(32))) = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc }; __VOLK_ATTR_ALIGNED(32) float eight_phases_inc[8] = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc };
eight_phases_reg = _mm256_load_ps(eight_phases); eight_phases_reg = _mm256_load_ps(eight_phases);
const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc); const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);