mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-12-15 04:30:33 +00:00
Fixing problems with _mm_loadu_si128
This commit is contained in:
parent
d3aade34b2
commit
4a2e03f9a8
@ -59,8 +59,8 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a
|
|||||||
|
|
||||||
for(int number = 0; number < sse_iters; number++){
|
for(int number = 0; number < sse_iters; number++){
|
||||||
|
|
||||||
aVal = _mm_load_si128((__m128i*)aPtr);
|
aVal = _mm_loadu_si128((__m128i*)aPtr);
|
||||||
bVal = _mm_load_si128((__m128i*)bPtr);
|
bVal = _mm_loadu_si128((__m128i*)bPtr);
|
||||||
|
|
||||||
cVal = _mm_add_epi8(aVal, bVal);
|
cVal = _mm_add_epi8(aVal, bVal);
|
||||||
|
|
||||||
|
@ -119,8 +119,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
|
|||||||
|
|
||||||
for(int number = 0; number < sse_iters; number++){
|
for(int number = 0; number < sse_iters; number++){
|
||||||
|
|
||||||
x = _mm_load_si128((__m128i*)a);
|
x = _mm_loadu_si128((__m128i*)a);
|
||||||
y = _mm_load_si128((__m128i*)b);
|
y = _mm_loadu_si128((__m128i*)b);
|
||||||
|
|
||||||
imagx = _mm_srli_si128 (x, 1);
|
imagx = _mm_srli_si128 (x, 1);
|
||||||
imagx = _mm_and_si128 (imagx, mult1);
|
imagx = _mm_and_si128 (imagx, mult1);
|
||||||
|
@ -62,8 +62,8 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
|
|||||||
|
|
||||||
for(int number = 0;number < sse_iters; number++){
|
for(int number = 0;number < sse_iters; number++){
|
||||||
|
|
||||||
x = _mm_load_si128((__m128i*)a);
|
x = _mm_loadu_si128((__m128i*)a);
|
||||||
y = _mm_load_si128((__m128i*)b);
|
y = _mm_loadu_si128((__m128i*)b);
|
||||||
|
|
||||||
imagx = _mm_srli_si128 (x, 1);
|
imagx = _mm_srli_si128 (x, 1);
|
||||||
imagx = _mm_and_si128 (imagx, mult1);
|
imagx = _mm_and_si128 (imagx, mult1);
|
||||||
|
@ -222,8 +222,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
|
|||||||
for(int number = 0;number < sse_iters; number++){
|
for(int number = 0;number < sse_iters; number++){
|
||||||
|
|
||||||
//Perform the carrier wipe-off
|
//Perform the carrier wipe-off
|
||||||
x = _mm_load_si128((__m128i*)input_ptr);
|
x = _mm_loadu_si128((__m128i*)input_ptr);
|
||||||
y = _mm_load_si128((__m128i*)carrier_ptr);
|
y = _mm_loadu_si128((__m128i*)carrier_ptr);
|
||||||
|
|
||||||
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
|
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
|
||||||
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
|
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
|
||||||
@ -231,7 +231,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
|
|||||||
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
|
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
|
||||||
|
|
||||||
//Get early values
|
//Get early values
|
||||||
y = _mm_load_si128((__m128i*)E_code_ptr);
|
y = _mm_loadu_si128((__m128i*)E_code_ptr);
|
||||||
|
|
||||||
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
||||||
|
|
||||||
@ -239,7 +239,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
|
|||||||
E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
|
E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
|
||||||
|
|
||||||
//Get prompt values
|
//Get prompt values
|
||||||
y = _mm_load_si128((__m128i*)P_code_ptr);
|
y = _mm_loadu_si128((__m128i*)P_code_ptr);
|
||||||
|
|
||||||
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
||||||
|
|
||||||
@ -247,7 +247,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
|
|||||||
P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
|
P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
|
||||||
|
|
||||||
//Get late values
|
//Get late values
|
||||||
y = _mm_load_si128((__m128i*)L_code_ptr);
|
y = _mm_loadu_si128((__m128i*)L_code_ptr);
|
||||||
|
|
||||||
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
||||||
|
|
||||||
|
@ -268,8 +268,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
|
|||||||
for(int number = 0;number < sse_iters; number++){
|
for(int number = 0;number < sse_iters; number++){
|
||||||
|
|
||||||
//Perform the carrier wipe-off
|
//Perform the carrier wipe-off
|
||||||
x = _mm_load_si128((__m128i*)input_ptr);
|
x = _mm_loadu_si128((__m128i*)input_ptr);
|
||||||
y = _mm_load_si128((__m128i*)carrier_ptr);
|
y = _mm_loadu_si128((__m128i*)carrier_ptr);
|
||||||
|
|
||||||
imagx = _mm_srli_si128 (x, 1);
|
imagx = _mm_srli_si128 (x, 1);
|
||||||
imagx = _mm_and_si128 (imagx, mult1);
|
imagx = _mm_and_si128 (imagx, mult1);
|
||||||
@ -288,7 +288,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
|
|||||||
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||||
|
|
||||||
//Get early values
|
//Get early values
|
||||||
y = _mm_load_si128((__m128i*)E_code_ptr);
|
y = _mm_loadu_si128((__m128i*)E_code_ptr);
|
||||||
|
|
||||||
imagy = _mm_srli_si128 (y, 1);
|
imagy = _mm_srli_si128 (y, 1);
|
||||||
imagy = _mm_and_si128 (imagy, mult1);
|
imagy = _mm_and_si128 (imagy, mult1);
|
||||||
@ -306,7 +306,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
|
|||||||
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
|
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
|
||||||
|
|
||||||
//Get late values
|
//Get late values
|
||||||
y = _mm_load_si128((__m128i*)L_code_ptr);
|
y = _mm_loadu_si128((__m128i*)L_code_ptr);
|
||||||
|
|
||||||
imagy = _mm_srli_si128 (y, 1);
|
imagy = _mm_srli_si128 (y, 1);
|
||||||
imagy = _mm_and_si128 (imagy, mult1);
|
imagy = _mm_and_si128 (imagy, mult1);
|
||||||
@ -324,7 +324,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
|
|||||||
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
|
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
|
||||||
|
|
||||||
//Get prompt values
|
//Get prompt values
|
||||||
y = _mm_load_si128((__m128i*)P_code_ptr);
|
y = _mm_loadu_si128((__m128i*)P_code_ptr);
|
||||||
|
|
||||||
imagy = _mm_srli_si128 (y, 1);
|
imagy = _mm_srli_si128 (y, 1);
|
||||||
imagy = _mm_and_si128 (imagy, mult1);
|
imagy = _mm_and_si128 (imagy, mult1);
|
||||||
|
@ -289,8 +289,8 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
|
|||||||
for(int number = 0;number < sse_iters; number++){
|
for(int number = 0;number < sse_iters; number++){
|
||||||
|
|
||||||
//Perform the carrier wipe-off
|
//Perform the carrier wipe-off
|
||||||
x = _mm_load_si128((__m128i*)input_ptr);
|
x = _mm_loadu_si128((__m128i*)input_ptr);
|
||||||
y = _mm_load_si128((__m128i*)carrier_ptr);
|
y = _mm_loadu_si128((__m128i*)carrier_ptr);
|
||||||
|
|
||||||
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
|
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
|
||||||
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
|
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
|
||||||
@ -298,7 +298,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
|
|||||||
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
|
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
|
||||||
|
|
||||||
//Get very early values
|
//Get very early values
|
||||||
y = _mm_load_si128((__m128i*)VE_code_ptr);
|
y = _mm_loadu_si128((__m128i*)VE_code_ptr);
|
||||||
|
|
||||||
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
||||||
|
|
||||||
@ -306,7 +306,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
|
|||||||
VE_code_acc = _mm_add_ps (VE_code_acc, output_ps_2);
|
VE_code_acc = _mm_add_ps (VE_code_acc, output_ps_2);
|
||||||
|
|
||||||
//Get early values
|
//Get early values
|
||||||
y = _mm_load_si128((__m128i*)E_code_ptr);
|
y = _mm_loadu_si128((__m128i*)E_code_ptr);
|
||||||
|
|
||||||
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
||||||
|
|
||||||
@ -314,7 +314,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
|
|||||||
E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
|
E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
|
||||||
|
|
||||||
//Get prompt values
|
//Get prompt values
|
||||||
y = _mm_load_si128((__m128i*)P_code_ptr);
|
y = _mm_loadu_si128((__m128i*)P_code_ptr);
|
||||||
|
|
||||||
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
||||||
|
|
||||||
@ -322,7 +322,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
|
|||||||
P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
|
P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
|
||||||
|
|
||||||
//Get late values
|
//Get late values
|
||||||
y = _mm_load_si128((__m128i*)L_code_ptr);
|
y = _mm_loadu_si128((__m128i*)L_code_ptr);
|
||||||
|
|
||||||
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
||||||
|
|
||||||
@ -330,7 +330,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
|
|||||||
L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
|
L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
|
||||||
|
|
||||||
//Get very late values
|
//Get very late values
|
||||||
y = _mm_load_si128((__m128i*)VL_code_ptr);
|
y = _mm_loadu_si128((__m128i*)VL_code_ptr);
|
||||||
|
|
||||||
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user