mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-31 07:13:03 +00:00 
			
		
		
		
	Fixing problems with _mm_loadu_si128
This commit is contained in:
		| @@ -59,8 +59,8 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a | ||||
|      | ||||
|     for(int number = 0; number < sse_iters; number++){ | ||||
|          | ||||
|         aVal = _mm_load_si128((__m128i*)aPtr); | ||||
|         bVal = _mm_load_si128((__m128i*)bPtr); | ||||
|         aVal = _mm_loadu_si128((__m128i*)aPtr); | ||||
|         bVal = _mm_loadu_si128((__m128i*)bPtr); | ||||
|          | ||||
|         cVal = _mm_add_epi8(aVal, bVal); | ||||
|          | ||||
|   | ||||
| @@ -119,8 +119,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con | ||||
|          | ||||
|         for(int number = 0; number < sse_iters; number++){ | ||||
|              | ||||
|             x = _mm_load_si128((__m128i*)a); | ||||
|             y = _mm_load_si128((__m128i*)b); | ||||
|             x = _mm_loadu_si128((__m128i*)a); | ||||
|             y = _mm_loadu_si128((__m128i*)b); | ||||
|              | ||||
|             imagx = _mm_srli_si128 (x, 1); | ||||
|             imagx = _mm_and_si128 (imagx, mult1); | ||||
|   | ||||
| @@ -62,8 +62,8 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co | ||||
|      | ||||
|     for(int number = 0;number < sse_iters; number++){ | ||||
|          | ||||
|         x = _mm_load_si128((__m128i*)a); | ||||
|         y = _mm_load_si128((__m128i*)b); | ||||
|         x = _mm_loadu_si128((__m128i*)a); | ||||
|         y = _mm_loadu_si128((__m128i*)b); | ||||
|          | ||||
|         imagx = _mm_srli_si128 (x, 1); | ||||
|         imagx = _mm_and_si128 (imagx, mult1); | ||||
|   | ||||
| @@ -222,8 +222,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o | ||||
|         for(int number = 0;number < sse_iters; number++){ | ||||
|              | ||||
|             //Perform the carrier wipe-off | ||||
|             x = _mm_load_si128((__m128i*)input_ptr); | ||||
|             y = _mm_load_si128((__m128i*)carrier_ptr); | ||||
|             x = _mm_loadu_si128((__m128i*)input_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)carrier_ptr); | ||||
|              | ||||
|             CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx) | ||||
|             CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy) | ||||
| @@ -231,7 +231,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o | ||||
|             CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample) | ||||
|              | ||||
|             //Get early values | ||||
|             y = _mm_load_si128((__m128i*)E_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)E_code_ptr); | ||||
|              | ||||
|             CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2) | ||||
|              | ||||
| @@ -239,7 +239,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o | ||||
|             E_code_acc = _mm_add_ps (E_code_acc, output_ps_2); | ||||
|              | ||||
|             //Get prompt values | ||||
|             y = _mm_load_si128((__m128i*)P_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)P_code_ptr); | ||||
|              | ||||
|             CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2) | ||||
|              | ||||
| @@ -247,7 +247,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o | ||||
|             P_code_acc = _mm_add_ps (P_code_acc, output_ps_2); | ||||
|              | ||||
|             //Get late values | ||||
|             y = _mm_load_si128((__m128i*)L_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)L_code_ptr); | ||||
|              | ||||
|             CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2) | ||||
|              | ||||
|   | ||||
| @@ -268,8 +268,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out | ||||
|         for(int number = 0;number < sse_iters; number++){ | ||||
|              | ||||
|             //Perform the carrier wipe-off | ||||
|             x = _mm_load_si128((__m128i*)input_ptr); | ||||
|             y = _mm_load_si128((__m128i*)carrier_ptr); | ||||
|             x = _mm_loadu_si128((__m128i*)input_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)carrier_ptr); | ||||
|              | ||||
|             imagx = _mm_srli_si128 (x, 1); | ||||
|             imagx = _mm_and_si128 (imagx, mult1); | ||||
| @@ -288,7 +288,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out | ||||
|             imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); | ||||
|              | ||||
|             //Get early values | ||||
|             y = _mm_load_si128((__m128i*)E_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)E_code_ptr); | ||||
|              | ||||
|             imagy = _mm_srli_si128 (y, 1); | ||||
|             imagy = _mm_and_si128 (imagy, mult1); | ||||
| @@ -306,7 +306,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out | ||||
|             imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output); | ||||
|              | ||||
|             //Get late values | ||||
|             y = _mm_load_si128((__m128i*)L_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)L_code_ptr); | ||||
|              | ||||
|             imagy = _mm_srli_si128 (y, 1); | ||||
|             imagy = _mm_and_si128 (imagy, mult1); | ||||
| @@ -324,7 +324,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out | ||||
|             imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output); | ||||
|              | ||||
|             //Get prompt values | ||||
|             y = _mm_load_si128((__m128i*)P_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)P_code_ptr); | ||||
|              | ||||
|             imagy = _mm_srli_si128 (y, 1); | ||||
|             imagy = _mm_and_si128 (imagy, mult1); | ||||
|   | ||||
| @@ -289,8 +289,8 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE | ||||
|         for(int number = 0;number < sse_iters; number++){ | ||||
|              | ||||
|             //Perform the carrier wipe-off | ||||
|             x = _mm_load_si128((__m128i*)input_ptr); | ||||
|             y = _mm_load_si128((__m128i*)carrier_ptr); | ||||
|             x = _mm_loadu_si128((__m128i*)input_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)carrier_ptr); | ||||
|              | ||||
|             CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx) | ||||
|             CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy) | ||||
| @@ -298,7 +298,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE | ||||
|             CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample) | ||||
|              | ||||
|             //Get very early values | ||||
|             y = _mm_load_si128((__m128i*)VE_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)VE_code_ptr); | ||||
|              | ||||
|             CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2) | ||||
|              | ||||
| @@ -306,7 +306,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE | ||||
|             VE_code_acc = _mm_add_ps (VE_code_acc, output_ps_2); | ||||
|              | ||||
|             //Get early values | ||||
|             y = _mm_load_si128((__m128i*)E_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)E_code_ptr); | ||||
|              | ||||
|             CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2) | ||||
|              | ||||
| @@ -314,7 +314,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE | ||||
|             E_code_acc = _mm_add_ps (E_code_acc, output_ps_2); | ||||
|              | ||||
|             //Get prompt values | ||||
|             y = _mm_load_si128((__m128i*)P_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)P_code_ptr); | ||||
|              | ||||
|             CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2) | ||||
|              | ||||
| @@ -322,7 +322,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE | ||||
|             P_code_acc = _mm_add_ps (P_code_acc, output_ps_2); | ||||
|              | ||||
|             //Get late values | ||||
|             y = _mm_load_si128((__m128i*)L_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)L_code_ptr); | ||||
|              | ||||
|             CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2) | ||||
|              | ||||
| @@ -330,7 +330,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE | ||||
|             L_code_acc = _mm_add_ps (L_code_acc, output_ps_2); | ||||
|              | ||||
|             //Get very late values | ||||
|             y = _mm_load_si128((__m128i*)VL_code_ptr); | ||||
|             y = _mm_loadu_si128((__m128i*)VL_code_ptr); | ||||
|              | ||||
|             CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2) | ||||
|              | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 andres
					andres