mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-11-04 09:13:05 +00:00 
			
		
		
		
	Fixing problems with _mm_loadu_si128
This commit is contained in:
		@@ -59,8 +59,8 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a
 | 
			
		||||
    
 | 
			
		||||
    for(int number = 0; number < sse_iters; number++){
 | 
			
		||||
        
 | 
			
		||||
        aVal = _mm_load_si128((__m128i*)aPtr);
 | 
			
		||||
        bVal = _mm_load_si128((__m128i*)bPtr);
 | 
			
		||||
        aVal = _mm_loadu_si128((__m128i*)aPtr);
 | 
			
		||||
        bVal = _mm_loadu_si128((__m128i*)bPtr);
 | 
			
		||||
        
 | 
			
		||||
        cVal = _mm_add_epi8(aVal, bVal);
 | 
			
		||||
        
 | 
			
		||||
 
 | 
			
		||||
@@ -119,8 +119,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
 | 
			
		||||
        
 | 
			
		||||
        for(int number = 0; number < sse_iters; number++){
 | 
			
		||||
            
 | 
			
		||||
            x = _mm_load_si128((__m128i*)a);
 | 
			
		||||
            y = _mm_load_si128((__m128i*)b);
 | 
			
		||||
            x = _mm_loadu_si128((__m128i*)a);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)b);
 | 
			
		||||
            
 | 
			
		||||
            imagx = _mm_srli_si128 (x, 1);
 | 
			
		||||
            imagx = _mm_and_si128 (imagx, mult1);
 | 
			
		||||
 
 | 
			
		||||
@@ -62,8 +62,8 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
 | 
			
		||||
    
 | 
			
		||||
    for(int number = 0;number < sse_iters; number++){
 | 
			
		||||
        
 | 
			
		||||
        x = _mm_load_si128((__m128i*)a);
 | 
			
		||||
        y = _mm_load_si128((__m128i*)b);
 | 
			
		||||
        x = _mm_loadu_si128((__m128i*)a);
 | 
			
		||||
        y = _mm_loadu_si128((__m128i*)b);
 | 
			
		||||
        
 | 
			
		||||
        imagx = _mm_srli_si128 (x, 1);
 | 
			
		||||
        imagx = _mm_and_si128 (imagx, mult1);
 | 
			
		||||
 
 | 
			
		||||
@@ -222,8 +222,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
 | 
			
		||||
        for(int number = 0;number < sse_iters; number++){
 | 
			
		||||
            
 | 
			
		||||
            //Perform the carrier wipe-off
 | 
			
		||||
            x = _mm_load_si128((__m128i*)input_ptr);
 | 
			
		||||
            y = _mm_load_si128((__m128i*)carrier_ptr);
 | 
			
		||||
            x = _mm_loadu_si128((__m128i*)input_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)carrier_ptr);
 | 
			
		||||
            
 | 
			
		||||
            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
 | 
			
		||||
            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
 | 
			
		||||
@@ -231,7 +231,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
 | 
			
		||||
            CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
 | 
			
		||||
            
 | 
			
		||||
            //Get early values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)E_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)E_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
 | 
			
		||||
            
 | 
			
		||||
@@ -239,7 +239,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
 | 
			
		||||
            E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
 | 
			
		||||
            
 | 
			
		||||
            //Get prompt values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)P_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)P_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
 | 
			
		||||
            
 | 
			
		||||
@@ -247,7 +247,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
 | 
			
		||||
            P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
 | 
			
		||||
            
 | 
			
		||||
            //Get late values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)L_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)L_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
 | 
			
		||||
            
 | 
			
		||||
 
 | 
			
		||||
@@ -268,8 +268,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
 | 
			
		||||
        for(int number = 0;number < sse_iters; number++){
 | 
			
		||||
            
 | 
			
		||||
            //Perform the carrier wipe-off
 | 
			
		||||
            x = _mm_load_si128((__m128i*)input_ptr);
 | 
			
		||||
            y = _mm_load_si128((__m128i*)carrier_ptr);
 | 
			
		||||
            x = _mm_loadu_si128((__m128i*)input_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)carrier_ptr);
 | 
			
		||||
            
 | 
			
		||||
            imagx = _mm_srli_si128 (x, 1);
 | 
			
		||||
            imagx = _mm_and_si128 (imagx, mult1);
 | 
			
		||||
@@ -288,7 +288,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
 | 
			
		||||
            imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
 | 
			
		||||
            
 | 
			
		||||
            //Get early values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)E_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)E_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            imagy = _mm_srli_si128 (y, 1);
 | 
			
		||||
            imagy = _mm_and_si128 (imagy, mult1);
 | 
			
		||||
@@ -306,7 +306,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
 | 
			
		||||
            imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
 | 
			
		||||
            
 | 
			
		||||
            //Get late values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)L_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)L_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            imagy = _mm_srli_si128 (y, 1);
 | 
			
		||||
            imagy = _mm_and_si128 (imagy, mult1);
 | 
			
		||||
@@ -324,7 +324,7 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out
 | 
			
		||||
            imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
 | 
			
		||||
            
 | 
			
		||||
            //Get prompt values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)P_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)P_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            imagy = _mm_srli_si128 (y, 1);
 | 
			
		||||
            imagy = _mm_and_si128 (imagy, mult1);
 | 
			
		||||
 
 | 
			
		||||
@@ -289,8 +289,8 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
 | 
			
		||||
        for(int number = 0;number < sse_iters; number++){
 | 
			
		||||
            
 | 
			
		||||
            //Perform the carrier wipe-off
 | 
			
		||||
            x = _mm_load_si128((__m128i*)input_ptr);
 | 
			
		||||
            y = _mm_load_si128((__m128i*)carrier_ptr);
 | 
			
		||||
            x = _mm_loadu_si128((__m128i*)input_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)carrier_ptr);
 | 
			
		||||
            
 | 
			
		||||
            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
 | 
			
		||||
            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
 | 
			
		||||
@@ -298,7 +298,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
 | 
			
		||||
            CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
 | 
			
		||||
            
 | 
			
		||||
            //Get very early values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)VE_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)VE_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
 | 
			
		||||
            
 | 
			
		||||
@@ -306,7 +306,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
 | 
			
		||||
            VE_code_acc = _mm_add_ps (VE_code_acc, output_ps_2);
 | 
			
		||||
            
 | 
			
		||||
            //Get early values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)E_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)E_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
 | 
			
		||||
            
 | 
			
		||||
@@ -314,7 +314,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
 | 
			
		||||
            E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
 | 
			
		||||
            
 | 
			
		||||
            //Get prompt values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)P_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)P_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
 | 
			
		||||
            
 | 
			
		||||
@@ -322,7 +322,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
 | 
			
		||||
            P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
 | 
			
		||||
            
 | 
			
		||||
            //Get late values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)L_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)L_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
 | 
			
		||||
            
 | 
			
		||||
@@ -330,7 +330,7 @@ static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE
 | 
			
		||||
            L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
 | 
			
		||||
            
 | 
			
		||||
            //Get very late values
 | 
			
		||||
            y = _mm_load_si128((__m128i*)VL_code_ptr);
 | 
			
		||||
            y = _mm_loadu_si128((__m128i*)VL_code_ptr);
 | 
			
		||||
            
 | 
			
		||||
            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
 | 
			
		||||
            
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user