mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2025-01-29 02:14:51 +00:00
fix sse implementation
This commit is contained in:
parent
cd80beb16c
commit
a2429a851c
@ -58,7 +58,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result,
|
||||
{
|
||||
//r*a.r - i*a.i, i*a.r + r*a.i
|
||||
lv_16sc_t tmp = in_a[n] * in_b[n];
|
||||
result[0] = lv_cmake(sat_adds16i(lv_creal(result[0]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[0]), lv_cimag(tmp) ));
|
||||
result[0] += lv_cmake(sat_adds16i(lv_creal(tmp), lv_creal(tmp)), sat_adds16i(lv_cimag(tmp), lv_cimag(tmp) ));
|
||||
}
|
||||
//printf("generic result = %i,%i", lv_creal(result[0]),lv_cimag(result[0]));
|
||||
}
|
||||
@ -127,14 +127,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
|
||||
dotProduct += lv_cmake(sat_adds16i(lv_creal(dotProductVector[i]), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProductVector[i]), lv_cimag(dotProductVector[i])));
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < (num_points % 4); ++i)
|
||||
for (unsigned int i = sse_iters * 4; i < num_points; ++i)
|
||||
{
|
||||
lv_16sc_t tmp = (*_in_a++) * (*_in_b++);
|
||||
dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
|
||||
dotProduct += lv_cmake( sat_adds16i(lv_creal(tmp), lv_creal(tmp)), sat_adds16i(lv_cimag(tmp), lv_cimag(tmp)));
|
||||
}
|
||||
|
||||
*_out = dotProduct;
|
||||
|
@ -74,7 +74,6 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con
|
||||
lv_16sc_t* _out = out;
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
|
||||
//std::complex<T> memory structure: real part -> reinterpret_cast<cv T*>(a)[2*i]
|
||||
//imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1]
|
||||
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
|
||||
@ -104,7 +103,7 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con
|
||||
_out += 4;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < (num_points % 4); ++i)
|
||||
for (unsigned int i = sse_iters * 4; i < num_points; ++i)
|
||||
{
|
||||
*_out++ = (*_in_a++) * (*_in_b++);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user