1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-12-14 12:10:34 +00:00

Fixing problems with _mm_loadu_si128

This commit is contained in:
andres 2014-10-20 16:55:55 +02:00
parent d3aade34b2
commit 4a2e03f9a8
6 changed files with 23 additions and 23 deletions

View File

@ -59,8 +59,8 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a
for(int number = 0; number < sse_iters; number++){
aVal = _mm_load_si128((__m128i*)aPtr);
bVal = _mm_load_si128((__m128i*)bPtr);
aVal = _mm_loadu_si128((__m128i*)aPtr);
bVal = _mm_loadu_si128((__m128i*)bPtr);
cVal = _mm_add_epi8(aVal, bVal);

View File

@ -119,8 +119,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
for(int number = 0; number < sse_iters; number++){
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
x = _mm_loadu_si128((__m128i*)a);
y = _mm_loadu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);

View File

@ -62,8 +62,8 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
for(int number = 0;number < sse_iters; number++){
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
x = _mm_loadu_si128((__m128i*)a);
y = _mm_loadu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);

View File

@ -222,8 +222,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
for(int number = 0;number < sse_iters; number++){
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
x = _mm_loadu_si128((__m128i*)input_ptr);
y = _mm_loadu_si128((__m128i*)carrier_ptr);
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
@ -231,7 +231,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
y = _mm_loadu_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
@ -239,7 +239,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
y = _mm_loadu_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
@ -247,7 +247,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
y = _mm_loadu_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)

View File

@ -268,8 +268,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
for(int number = 0;number < sse_iters; number++){
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
x = _mm_loadu_si128((__m128i*)input_ptr);
y = _mm_loadu_si128((__m128i*)carrier_ptr);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
@ -288,7 +288,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
y = _mm_loadu_si128((__m128i*)E_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
@ -306,7 +306,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
y = _mm_loadu_si128((__m128i*)L_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
@ -324,7 +324,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
y = _mm_loadu_si128((__m128i*)P_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);

View File

@ -289,8 +289,8 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
for(int number = 0;number < sse_iters; number++){
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
x = _mm_loadu_si128((__m128i*)input_ptr);
y = _mm_loadu_si128((__m128i*)carrier_ptr);
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
@ -298,7 +298,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get very early values
y = _mm_load_si128((__m128i*)VE_code_ptr);
y = _mm_loadu_si128((__m128i*)VE_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
@ -306,7 +306,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
VE_code_acc = _mm_add_ps (VE_code_acc, output_ps_2);
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
y = _mm_loadu_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
@ -314,7 +314,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
y = _mm_loadu_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
@ -322,7 +322,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
y = _mm_loadu_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
@ -330,7 +330,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
//Get very late values
y = _mm_load_si128((__m128i*)VL_code_ptr);
y = _mm_loadu_si128((__m128i*)VL_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)