mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-31 15:23:04 +00:00 
			
		
		
		
	Add NEON intrinsics
This commit is contained in:
		| @@ -53,4 +53,47 @@ static inline float32x4_t vsqrtq_f32(float32x4_t q_x) | |||||||
|     return vmulq_f32(q_x, q_step_2); |     return vmulq_f32(q_x, q_step_2); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* Magnitude squared for float32x4x2_t */ | ||||||
|  | static inline float32x4_t _vmagnitudesquaredq_f32(float32x4x2_t cmplxValue) | ||||||
|  | { | ||||||
|  |     float32x4_t iValue, qValue, result; | ||||||
|  |     iValue = vmulq_f32(cmplxValue.val[0], cmplxValue.val[0]); // Square the values | ||||||
|  |     qValue = vmulq_f32(cmplxValue.val[1], cmplxValue.val[1]); // Square the values | ||||||
|  |     result = vaddq_f32(iValue, qValue); // Add the I2 and Q2 values | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* Inverse square root for float32x4_t */ | ||||||
|  | static inline float32x4_t _vinvsqrtq_f32(float32x4_t x) | ||||||
|  | { | ||||||
|  |     float32x4_t sqrt_reciprocal = vrsqrteq_f32(x); | ||||||
|  |     sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); | ||||||
|  |     sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal); | ||||||
|  |  | ||||||
|  |     return sqrt_reciprocal; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* Complex multiplication for float32x4x2_t */ | ||||||
|  | static inline float32x4x2_t _vmultiply_complexq_f32(float32x4x2_t a_val, float32x4x2_t b_val) | ||||||
|  | { | ||||||
|  |     float32x4x2_t tmp_real; | ||||||
|  |     float32x4x2_t tmp_imag; | ||||||
|  |     float32x4x2_t c_val; | ||||||
|  |  | ||||||
|  |     // multiply the real*real and imag*imag to get real result | ||||||
|  |     // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r | ||||||
|  |     tmp_real.val[0] = vmulq_f32(a_val.val[0], b_val.val[0]); | ||||||
|  |     // a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i | ||||||
|  |     tmp_real.val[1] = vmulq_f32(a_val.val[1], b_val.val[1]); | ||||||
|  |     // Multiply cross terms to get the imaginary result | ||||||
|  |     // a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i | ||||||
|  |     tmp_imag.val[0] = vmulq_f32(a_val.val[0], b_val.val[1]); | ||||||
|  |     // a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r | ||||||
|  |     tmp_imag.val[1] = vmulq_f32(a_val.val[1], b_val.val[0]); | ||||||
|  |     // combine the products | ||||||
|  |     c_val.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]); | ||||||
|  |     c_val.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]); | ||||||
|  |     return c_val; | ||||||
|  | } | ||||||
|  |  | ||||||
| #endif /* INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_ */ | #endif /* INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_ */ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Carles Fernandez
					Carles Fernandez