mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2025-01-28 18:04:51 +00:00
Add NEON intrinsics
This commit is contained in:
parent
11afe63ef3
commit
91ccc8589e
@ -53,4 +53,47 @@ static inline float32x4_t vsqrtq_f32(float32x4_t q_x)
|
|||||||
return vmulq_f32(q_x, q_step_2);
|
return vmulq_f32(q_x, q_step_2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Magnitude squared for float32x4x2_t */
|
||||||
|
static inline float32x4_t _vmagnitudesquaredq_f32(float32x4x2_t cmplxValue)
|
||||||
|
{
|
||||||
|
float32x4_t iValue, qValue, result;
|
||||||
|
iValue = vmulq_f32(cmplxValue.val[0], cmplxValue.val[0]); // Square the values
|
||||||
|
qValue = vmulq_f32(cmplxValue.val[1], cmplxValue.val[1]); // Square the values
|
||||||
|
result = vaddq_f32(iValue, qValue); // Add the I2 and Q2 values
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inverse square root for float32x4_t */
|
||||||
|
static inline float32x4_t _vinvsqrtq_f32(float32x4_t x)
|
||||||
|
{
|
||||||
|
float32x4_t sqrt_reciprocal = vrsqrteq_f32(x);
|
||||||
|
sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
|
||||||
|
sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
|
||||||
|
|
||||||
|
return sqrt_reciprocal;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Complex multiplication for float32x4x2_t */
|
||||||
|
static inline float32x4x2_t _vmultiply_complexq_f32(float32x4x2_t a_val, float32x4x2_t b_val)
|
||||||
|
{
|
||||||
|
float32x4x2_t tmp_real;
|
||||||
|
float32x4x2_t tmp_imag;
|
||||||
|
float32x4x2_t c_val;
|
||||||
|
|
||||||
|
// multiply the real*real and imag*imag to get real result
|
||||||
|
// a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r
|
||||||
|
tmp_real.val[0] = vmulq_f32(a_val.val[0], b_val.val[0]);
|
||||||
|
// a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i
|
||||||
|
tmp_real.val[1] = vmulq_f32(a_val.val[1], b_val.val[1]);
|
||||||
|
// Multiply cross terms to get the imaginary result
|
||||||
|
// a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i
|
||||||
|
tmp_imag.val[0] = vmulq_f32(a_val.val[0], b_val.val[1]);
|
||||||
|
// a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r
|
||||||
|
tmp_imag.val[1] = vmulq_f32(a_val.val[1], b_val.val[0]);
|
||||||
|
// combine the products
|
||||||
|
c_val.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
|
||||||
|
c_val.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
|
||||||
|
return c_val;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_ */
|
#endif /* INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_ */
|
||||||
|
Loading…
Reference in New Issue
Block a user