mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-30 23:03:05 +00:00 
			
		
		
		
	fix sse implementation
This commit is contained in:
		| @@ -58,7 +58,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, | ||||
|         { | ||||
|             //r*a.r - i*a.i, i*a.r + r*a.i | ||||
|             lv_16sc_t tmp = in_a[n] * in_b[n]; | ||||
|             result[0] = lv_cmake(sat_adds16i(lv_creal(result[0]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[0]), lv_cimag(tmp) )); | ||||
|             result[0] += lv_cmake(sat_adds16i(lv_creal(tmp), lv_creal(tmp)), sat_adds16i(lv_cimag(tmp), lv_cimag(tmp) )); | ||||
|         } | ||||
|     	//printf("generic result = %i,%i", lv_creal(result[0]),lv_cimag(result[0])); | ||||
| } | ||||
| @@ -127,14 +127,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con | ||||
|  | ||||
|             for (int i = 0; i < 4; ++i) | ||||
|                 { | ||||
|                     dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); | ||||
|                     dotProduct += lv_cmake(sat_adds16i(lv_creal(dotProductVector[i]), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProductVector[i]), lv_cimag(dotProductVector[i]))); | ||||
|                 } | ||||
|         } | ||||
|  | ||||
|     for (unsigned int i = 0; i < (num_points % 4); ++i) | ||||
|     for (unsigned int i = sse_iters * 4; i < num_points; ++i) | ||||
|         { | ||||
|             lv_16sc_t tmp = (*_in_a++) * (*_in_b++); | ||||
|             dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp))); | ||||
|             dotProduct += lv_cmake( sat_adds16i(lv_creal(tmp), lv_creal(tmp)), sat_adds16i(lv_cimag(tmp), lv_cimag(tmp))); | ||||
|         } | ||||
|  | ||||
|     *_out = dotProduct; | ||||
|   | ||||
| @@ -74,7 +74,6 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con | ||||
|     lv_16sc_t* _out = out; | ||||
|     for(unsigned int number = 0; number < sse_iters; number++) | ||||
|         { | ||||
|  | ||||
|             //std::complex<T> memory structure: real part -> reinterpret_cast<cv T*>(a)[2*i] | ||||
|             //imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1] | ||||
|             // a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r] | ||||
| @@ -104,7 +103,7 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con | ||||
|             _out += 4; | ||||
|         } | ||||
|  | ||||
|     for (unsigned int i = 0; i < (num_points % 4); ++i) | ||||
|     for (unsigned int i = sse_iters * 4; i < num_points; ++i) | ||||
|         { | ||||
|             *_out++ = (*_in_a++) * (*_in_b++); | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Carles Fernandez
					Carles Fernandez