From 817139ba503c9e9c79eacd8a633ad4070e759623 Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Thu, 31 Mar 2016 19:39:37 +0200 Subject: [PATCH] ask for aligned memory in a more portable way --- .../volk_gnsssdr_16ic_resampler_16ic.h | 30 ++-- .../volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h | 16 +- ...gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h | 20 +-- .../volk_gnsssdr_16ic_xn_resampler_16ic_xn.h | 30 ++-- .../volk_gnsssdr_32f_sincos_32fc.h | 72 ++++---- ...gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h | 8 +- .../volk_gnsssdr_s32f_sincos_32fc.h | 160 +++++++++--------- 7 files changed, 168 insertions(+), 168 deletions(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_16ic.h index a43a63528..063d02bad 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resampler_16ic.h @@ -100,7 +100,7 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co lv_16sc_t* _result = result; - __attribute__((aligned(16))) int local_code_chip_index[4]; + __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4]; __m128 _rem_code_phase, _code_phase_step_chips; __m128i _code_length_chips, _code_length_chips_minus1; __m128 _code_phase_out, _code_phase_out_with_offset; @@ -108,13 +108,13 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co _rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register - __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4]; four_times_code_length_chips_minus1[0] = code_length_chips-1; four_times_code_length_chips_minus1[1] = code_length_chips-1; four_times_code_length_chips_minus1[2] = code_length_chips-1; four_times_code_length_chips_minus1[3] = code_length_chips-1; - __attribute__((aligned(16))) int four_times_code_length_chips[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4]; four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[2] = code_length_chips; @@ -127,9 +127,9 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co __m128i zero = _mm_setzero_si128(); - __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; + __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; __m128 _4output_index = _mm_load_ps(init_idx_float); - __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; + __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; __m128 _4constant_float = _mm_load_ps(init_4constant_float); @@ -183,7 +183,7 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co lv_16sc_t* _result = result; - __attribute__((aligned(16))) int local_code_chip_index[4]; + __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4]; __m128 _rem_code_phase, _code_phase_step_chips; __m128i _code_length_chips, _code_length_chips_minus1; __m128 _code_phase_out, _code_phase_out_with_offset; @@ -191,13 +191,13 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co _rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register - __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4]; four_times_code_length_chips_minus1[0] = code_length_chips-1; four_times_code_length_chips_minus1[1] = code_length_chips-1; four_times_code_length_chips_minus1[2] = code_length_chips-1; four_times_code_length_chips_minus1[3] = code_length_chips-1; - __attribute__((aligned(16))) int four_times_code_length_chips[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4]; four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[2] = code_length_chips; @@ -210,9 +210,9 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co __m128i zero = _mm_setzero_si128(); - __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; + __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; __m128 _4output_index = _mm_loadu_ps(init_idx_float); - __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; + __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; __m128 _4constant_float = _mm_loadu_ps(init_4constant_float); for(number = 0; number < quarterPoints; number++) @@ -265,7 +265,7 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons lv_16sc_t* _result = result; - __attribute__((aligned(16))) int local_code_chip_index[4]; + __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4]; float32x4_t _rem_code_phase, _code_phase_step_chips; int32x4_t _code_length_chips, _code_length_chips_minus1; float32x4_t _code_phase_out, _code_phase_out_with_offset; @@ -274,13 +274,13 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons _rem_code_phase = vld1q_dup_f32(&rem_code_phase_chips); //load float to all four float values in m128 register _code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in m128 register - __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4]; four_times_code_length_chips_minus1[0] = code_length_chips - 1; four_times_code_length_chips_minus1[1] = code_length_chips - 1; four_times_code_length_chips_minus1[2] = code_length_chips - 1; four_times_code_length_chips_minus1[3] = code_length_chips - 1; - __attribute__((aligned(16))) int four_times_code_length_chips[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4]; four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[2] = code_length_chips; @@ -293,9 +293,9 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons uint32x4_t negative_indexes, overflow_indexes; int32x4_t zero = vmovq_n_s32(0); - __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; + __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; float32x4_t _4output_index = vld1q_f32(init_idx_float); - __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; + __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; float32x4_t _4constant_float = vld1q_f32(init_4constant_float); for(number = 0; number < quarterPoints; number++) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h index 2bc8d1aba..6b0bb7679 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h @@ -141,11 +141,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out const unsigned int sse_iters = num_points / 4; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128i c1, c2, result; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); @@ -243,11 +243,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc const unsigned int ROTATOR_RELOAD = 512; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128i c1, c2, result; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); @@ -393,11 +393,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out const unsigned int sse_iters = num_points / 4; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128i c1, c2, result; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc); @@ -494,11 +494,11 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc unsigned int ROTATOR_RELOAD = 512; __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128i c1, c2, result; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h index 91b3da8d8..20d1b1bec 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h @@ -203,11 +203,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ // phase rotation registers __m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg; __m128i pc1, pc2; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); @@ -376,11 +376,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l // phase rotation registers __m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg; __m128i pc1, pc2; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); @@ -619,11 +619,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ // phase rotation registers __m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg; __m128i pc1, pc2; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_loadu_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_loadu_ps((float*)two_phase_acc); @@ -780,11 +780,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_ __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128i c1, c2, result1, result2; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc); @@ -985,11 +985,11 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; __m128i c1, c2, result1, result2; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h index 51c5ae963..e84f44574 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_xn_resampler_16ic_xn.h @@ -107,20 +107,20 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res const unsigned int quarterPoints = num_output_samples / 4; lv_16sc_t** _result = result; - __attribute__((aligned(16))) int local_code_chip_index[4]; + __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4]; float tmp_rem_code_phase_chips; __m128 _rem_code_phase,_code_phase_step_chips; __m128i _code_length_chips,_code_length_chips_minus1; __m128 _code_phase_out,_code_phase_out_with_offset; _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register - __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4]; four_times_code_length_chips_minus1[0] = code_length_chips - 1; four_times_code_length_chips_minus1[1] = code_length_chips - 1; four_times_code_length_chips_minus1[2] = code_length_chips - 1; four_times_code_length_chips_minus1[3] = code_length_chips - 1; - __attribute__((aligned(16))) int four_times_code_length_chips[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4]; four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[2] = code_length_chips; @@ -133,9 +133,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res __m128i zero = _mm_setzero_si128(); - __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; + __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; __m128 _4output_index = _mm_load_ps(init_idx_float); - __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; + __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; __m128 _4constant_float = _mm_load_ps(init_4constant_float); int current_vector = 0; @@ -200,20 +200,20 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res const unsigned int quarterPoints = num_output_samples / 4; lv_16sc_t** _result = result; - __attribute__((aligned(16))) int local_code_chip_index[4]; + __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4]; float tmp_rem_code_phase_chips; __m128 _rem_code_phase,_code_phase_step_chips; __m128i _code_length_chips,_code_length_chips_minus1; __m128 _code_phase_out,_code_phase_out_with_offset; _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register - __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4]; four_times_code_length_chips_minus1[0] = code_length_chips - 1; four_times_code_length_chips_minus1[1] = code_length_chips - 1; four_times_code_length_chips_minus1[2] = code_length_chips - 1; four_times_code_length_chips_minus1[3] = code_length_chips - 1; - __attribute__((aligned(16))) int four_times_code_length_chips[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4]; four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[2] = code_length_chips; @@ -226,9 +226,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res __m128i zero = _mm_setzero_si128(); - __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; + __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; __m128 _4output_index = _mm_loadu_ps(init_idx_float); - __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; + __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; __m128 _4constant_float = _mm_loadu_ps(init_4constant_float); int current_vector = 0; @@ -294,7 +294,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul float32x4_t half = vdupq_n_f32(0.5f); lv_16sc_t** _result = result; - __attribute__((aligned(16))) int local_code_chip_index[4]; + __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4]; float tmp_rem_code_phase_chips; float32x4_t _rem_code_phase, _code_phase_step_chips; int32x4_t _code_length_chips, _code_length_chips_minus1; @@ -302,13 +302,13 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul float32x4_t sign, PlusHalf, Round; _code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in float32x4_t register - __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4]; four_times_code_length_chips_minus1[0] = code_length_chips - 1; four_times_code_length_chips_minus1[1] = code_length_chips - 1; four_times_code_length_chips_minus1[2] = code_length_chips - 1; four_times_code_length_chips_minus1[3] = code_length_chips - 1; - __attribute__((aligned(16))) int four_times_code_length_chips[4]; + __VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4]; four_times_code_length_chips[0] = code_length_chips; four_times_code_length_chips[1] = code_length_chips; four_times_code_length_chips[2] = code_length_chips; @@ -321,9 +321,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul uint32x4_t negative_indexes, overflow_indexes; int32x4_t zero = vmovq_n_s32(0); - __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; + __VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; float32x4_t _4output_index = vld1q_f32(init_idx_float); - __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; + __VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; float32x4_t _4constant_float = vld1q_f32(init_4constant_float); int current_vector = 0; diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h index 5a007d455..f43e99faf 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h @@ -268,26 +268,26 @@ static inline void volk_gnsssdr_32f_sincos_32fc_a_sse2(lv_32fc_t* out, const flo __m128i emm0, emm2, emm4; /* declare some SSE constants */ - static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; - static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; + __VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; + __VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; - static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; - static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; - static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; - static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; - static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; + __VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 }; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 }; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2}; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4}; - static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; - static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; - static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; - static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; - static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; - static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; - static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; - static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; - static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; - static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; - static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; for(;number < sse_iters; number++) { @@ -421,26 +421,26 @@ static inline void volk_gnsssdr_32f_sincos_32fc_u_sse2(lv_32fc_t* out, const flo __m128i emm0, emm2, emm4; /* declare some SSE constants */ - static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; - static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; + __VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; + __VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; - static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; - static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; - static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; - static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; - static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; + __VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 }; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 }; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2}; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4}; - static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; - static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; - static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; - static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; - static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; - static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; - static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; - static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; - static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; - static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; - static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; for(;number < sse_iters; number++) { diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h index ef00e5db2..46d22dfe4 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn.h @@ -183,11 +183,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_ // phase rotation registers __m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); @@ -289,11 +289,11 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_ // phase rotation registers __m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1; - __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2]; two_phase_inc[0] = phase_inc * phase_inc; two_phase_inc[1] = phase_inc * phase_inc; two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); - __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2]; two_phase_acc[0] = (*phase); two_phase_acc[1] = (*phase) * phase_inc; two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h index 8fbadeaf5..887989679 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h @@ -82,29 +82,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_a_sse2(lv_32fc_t* out, const fl __m128i emm0, emm2, emm4; /* declare some SSE constants */ - static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; - static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; + static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; + static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; - static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; - static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; - static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; - static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; - static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; + static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; + static const int _pi32_1[4] = { 1, 1, 1, 1 }; + static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 }; + static const int _pi32_2[4] = { 2, 2, 2, 2}; + static const int _pi32_4[4] = { 4, 4, 4, 4}; - static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; - static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; - static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; - static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; - static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; - static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; - static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; - static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; - static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; - static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; - static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; + static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; + static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; + static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; + static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; + static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; + static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; + static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; + static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; + static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; + static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f }; + static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; - float four_phases[4] __attribute__((aligned(16))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc }; - float four_phases_inc[4] __attribute__((aligned(16))) = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc }; + float four_phases[4] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc }; + float four_phases_inc[4] = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc }; four_phases_reg = _mm_load_ps(four_phases); const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc); @@ -239,29 +239,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_sse2(lv_32fc_t* out, const fl __m128i emm0, emm2, emm4; /* declare some SSE constants */ - static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; - static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; + __VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; + __VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; - static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; - static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; - static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; - static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; - static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; + __VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 }; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 }; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2}; + __VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4}; - static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; - static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; - static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; - static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; - static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; - static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; - static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; - static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; - static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; - static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; - static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f }; + __VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; - float four_phases[4] __attribute__((aligned(16))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc }; - float four_phases_inc[4] __attribute__((aligned(16))) = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc }; + __VOLK_ATTR_ALIGNED(16) float four_phases[4] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc }; + __VOLK_ATTR_ALIGNED(16) float four_phases_inc[4] = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc }; four_phases_reg = _mm_load_ps(four_phases); const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc); @@ -452,29 +452,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_a_avx2(lv_32fc_t* out, const fl __m128 aux, c1, s1; /* declare some AXX2 constants */ - static const int _ps_inv_sign_mask[8] __attribute__((aligned(32))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; - static const int _ps_sign_mask[8] __attribute__((aligned(32))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; + __VOLK_ATTR_ALIGNED(32) static const int _ps_inv_sign_mask[8] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; + __VOLK_ATTR_ALIGNED(32) static const int _ps_sign_mask[8] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; - static const float _ps_cephes_FOPI[8] __attribute__((aligned(32))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; - static const int _pi32_1[8] __attribute__((aligned(32))) = { 1, 1, 1, 1, 1, 1, 1, 1 }; - static const int _pi32_inv1[8] __attribute__((aligned(32))) = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 }; - static const int _pi32_2[8] __attribute__((aligned(32))) = { 2, 2, 2, 2, 2, 2, 2, 2 }; - static const int _pi32_4[8] __attribute__((aligned(32))) = { 4, 4, 4, 4, 4, 4, 4, 4 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_cephes_FOPI[8] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; + __VOLK_ATTR_ALIGNED(32) static const int _pi32_1[8] = { 1, 1, 1, 1, 1, 1, 1, 1 }; + __VOLK_ATTR_ALIGNED(32) static const int _pi32_inv1[8] = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 }; + __VOLK_ATTR_ALIGNED(32) static const int _pi32_2[8] = { 2, 2, 2, 2, 2, 2, 2, 2 }; + __VOLK_ATTR_ALIGNED(32) static const int _pi32_4[8] = { 4, 4, 4, 4, 4, 4, 4, 4 }; - static const float _ps_minus_cephes_DP1[8] __attribute__((aligned(32))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; - static const float _ps_minus_cephes_DP2[8] __attribute__((aligned(32))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; - static const float _ps_minus_cephes_DP3[8] __attribute__((aligned(32))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; - static const float _ps_coscof_p0[8] __attribute__((aligned(32))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; - static const float _ps_coscof_p1[8] __attribute__((aligned(32))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; - static const float _ps_coscof_p2[8] __attribute__((aligned(32))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; - static const float _ps_sincof_p0[8] __attribute__((aligned(32))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; - static const float _ps_sincof_p1[8] __attribute__((aligned(32))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; - static const float _ps_sincof_p2[8] __attribute__((aligned(32))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; - static const float _ps_0p5[8] __attribute__((aligned(32))) = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f }; - static const float _ps_1[8] __attribute__((aligned(32))) = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP1[8] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP2[8] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP3[8] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p0[8] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p1[8] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p2[8] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p0[8] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p1[8] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p2[8] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_0p5[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_1[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; - float eight_phases[8] __attribute__((aligned(32))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc }; - float eight_phases_inc[8] __attribute__((aligned(32))) = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc }; + __VOLK_ATTR_ALIGNED(32) float eight_phases[8] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc }; + __VOLK_ATTR_ALIGNED(32) float eight_phases_inc[8] = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc }; eight_phases_reg = _mm256_load_ps(eight_phases); const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc); @@ -620,29 +620,29 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_avx2(lv_32fc_t* out, const fl __m128 aux, c1, s1; /* declare some AXX2 constants */ - static const int _ps_inv_sign_mask[8] __attribute__((aligned(32))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; - static const int _ps_sign_mask[8] __attribute__((aligned(32))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; + __VOLK_ATTR_ALIGNED(32) static const int _ps_inv_sign_mask[8] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; + __VOLK_ATTR_ALIGNED(32) static const int _ps_sign_mask[8] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; - static const float _ps_cephes_FOPI[8] __attribute__((aligned(32))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; - static const int _pi32_1[8] __attribute__((aligned(32))) = { 1, 1, 1, 1, 1, 1, 1, 1 }; - static const int _pi32_inv1[8] __attribute__((aligned(32))) = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 }; - static const int _pi32_2[8] __attribute__((aligned(32))) = { 2, 2, 2, 2, 2, 2, 2, 2 }; - static const int _pi32_4[8] __attribute__((aligned(32))) = { 4, 4, 4, 4, 4, 4, 4, 4 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_cephes_FOPI[8] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; + __VOLK_ATTR_ALIGNED(32) static const int _pi32_1[8] = { 1, 1, 1, 1, 1, 1, 1, 1 }; + __VOLK_ATTR_ALIGNED(32) static const int _pi32_inv1[8] = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 }; + __VOLK_ATTR_ALIGNED(32) static const int _pi32_2[8] = { 2, 2, 2, 2, 2, 2, 2, 2 }; + __VOLK_ATTR_ALIGNED(32) static const int _pi32_4[8] = { 4, 4, 4, 4, 4, 4, 4, 4 }; - static const float _ps_minus_cephes_DP1[8] __attribute__((aligned(32))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; - static const float _ps_minus_cephes_DP2[8] __attribute__((aligned(32))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; - static const float _ps_minus_cephes_DP3[8] __attribute__((aligned(32))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; - static const float _ps_coscof_p0[8] __attribute__((aligned(32))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; - static const float _ps_coscof_p1[8] __attribute__((aligned(32))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; - static const float _ps_coscof_p2[8] __attribute__((aligned(32))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; - static const float _ps_sincof_p0[8] __attribute__((aligned(32))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; - static const float _ps_sincof_p1[8] __attribute__((aligned(32))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; - static const float _ps_sincof_p2[8] __attribute__((aligned(32))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; - static const float _ps_0p5[8] __attribute__((aligned(32))) = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f }; - static const float _ps_1[8] __attribute__((aligned(32))) = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP1[8] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP2[8] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP3[8] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p0[8] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p1[8] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p2[8] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p0[8] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p1[8] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p2[8] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_0p5[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f }; + __VOLK_ATTR_ALIGNED(32) static const float _ps_1[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; - float eight_phases[8] __attribute__((aligned(32))) = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc }; - float eight_phases_inc[8] __attribute__((aligned(32))) = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc }; + __VOLK_ATTR_ALIGNED(32) float eight_phases[8] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc }; + __VOLK_ATTR_ALIGNED(32) float eight_phases_inc[8] = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc }; eight_phases_reg = _mm256_load_ps(eight_phases); const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);