mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-30 23:03:05 +00:00 
			
		
		
		
	Merge branch 'next' of https://github.com/carlesfernandez/gnss-sdr into next
This commit is contained in:
		| @@ -170,6 +170,7 @@ install(FILES | ||||
|     ${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_common.h | ||||
|     ${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_avx_intrinsics.h | ||||
|     ${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_sse3_intrinsics.h | ||||
|     ${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_neon_intrinsics.h | ||||
|     ${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h | ||||
|     ${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h | ||||
|     ${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h | ||||
|   | ||||
| @@ -0,0 +1,56 @@ | ||||
| /*! | ||||
|  * \file volk_gnsssdr_neon_intrinsics.h | ||||
|  * \author Carles Fernandez, 2016. carles.fernandez(at)gmail.com | ||||
|  * \brief Holds NEON intrinsics of intrinsics. | ||||
|  * They can be used in VOLK_GNSSSDR kernels to avoid copy-paste | ||||
|  * | ||||
|  * Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors) | ||||
|  * | ||||
|  * This file is part of GNSS-SDR. | ||||
|  * | ||||
|  * GNSS-SDR is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * GNSS-SDR is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. | ||||
|  */ | ||||
|  | ||||
| #ifndef INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_ | ||||
| #define INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_ | ||||
|  | ||||
| #include <arm_neon.h> | ||||
|  | ||||
| static inline float32x4_t vdivq_f32( float32x4_t num, float32x4_t den ) | ||||
| { | ||||
|     const float32x4_t q_inv0 = vrecpeq_f32( den ); | ||||
|     const float32x4_t q_step0 = vrecpsq_f32( q_inv0, den ); | ||||
|  | ||||
|     const float32x4_t q_inv1 = vmulq_f32( q_step0, q_inv0 ); | ||||
|     return vmulq_f32( num, q_inv1 ); | ||||
| } | ||||
|  | ||||
|  | ||||
| static inline float32x4_t vsqrtq_f32( float32x4_t q_x ) | ||||
| { | ||||
|     const float32x4_t q_step_0 = vrsqrteq_f32( q_x ); | ||||
|     // step | ||||
|     const float32x4_t q_step_parm0 = vmulq_f32( q_x, q_step_0 ); | ||||
|     const float32x4_t q_step_result0 = vrsqrtsq_f32( q_step_parm0, q_step_0 ); | ||||
|     // step | ||||
|     const float32x4_t q_step_1 = vmulq_f32( q_step_0, q_step_result0 ); | ||||
|     const float32x4_t q_step_parm1 = vmulq_f32( q_x, q_step_1 ); | ||||
|     const float32x4_t q_step_result1 = vrsqrtsq_f32( q_step_parm1, q_step_1 ); | ||||
|     // take the res | ||||
|     const float32x4_t q_step_2 = vmulq_f32( q_step_1, q_step_result1 ); | ||||
|     // mul by x to get sqrt, not rsqrt | ||||
|     return vmulq_f32( q_x, q_step_2 ); | ||||
| } | ||||
|  | ||||
| #endif /* INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_ */ | ||||
| @@ -57,6 +57,22 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_generic(lv_16sc_t* outVe | ||||
| #endif /* LV_HAVE_GENERIC */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
| static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_generic_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points) | ||||
| { | ||||
|     // phases must be normalized. Phase rotator expects a complex exponential input! | ||||
|     float rem_carrier_phase_in_rad = 0.345; | ||||
|     float phase_step_rad = 0.123; | ||||
|     lv_32fc_t phase[1]; | ||||
|     phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad)); | ||||
|     lv_32fc_t phase_inc[1]; | ||||
|     phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)); | ||||
|     volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic_reload(outVector, inVector, phase_inc[0], phase, num_points); | ||||
| } | ||||
|  | ||||
| #endif /* LV_HAVE_GENERIC */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points) | ||||
| { | ||||
| @@ -73,6 +89,22 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3(lv_16sc_t* outVec | ||||
| #endif /* LV_HAVE_SSE3 */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points) | ||||
| { | ||||
|     // phases must be normalized. Phase rotator expects a complex exponential input! | ||||
|     float rem_carrier_phase_in_rad = 0.345; | ||||
|     float phase_step_rad = 0.123; | ||||
|     lv_32fc_t phase[1]; | ||||
|     phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad)); | ||||
|     lv_32fc_t phase_inc[1]; | ||||
|     phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)); | ||||
|     volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(outVector, inVector, phase_inc[0], phase, num_points); | ||||
| } | ||||
|  | ||||
| #endif /* LV_HAVE_SSE3 */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points) | ||||
| { | ||||
| @@ -89,6 +121,22 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3(lv_16sc_t* outVec | ||||
| #endif /* LV_HAVE_SSE3 */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points) | ||||
| { | ||||
|     // phases must be normalized. Phase rotator expects a complex exponential input! | ||||
|     float rem_carrier_phase_in_rad = 0.345; | ||||
|     float phase_step_rad = 0.123; | ||||
|     lv_32fc_t phase[1]; | ||||
|     phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad)); | ||||
|     lv_32fc_t phase_inc[1]; | ||||
|     phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)); | ||||
|     volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(outVector, inVector, phase_inc[0], phase, num_points); | ||||
| } | ||||
|  | ||||
| #endif /* LV_HAVE_SSE3 */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_NEON | ||||
| static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points) | ||||
| { | ||||
| @@ -105,4 +153,20 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon(lv_16sc_t* outVecto | ||||
| #endif /* LV_HAVE_NEON */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_NEON | ||||
| static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points) | ||||
| { | ||||
|     // phases must be normalized. Phase rotator expects a complex exponential input! | ||||
|     float rem_carrier_phase_in_rad = 0.345; | ||||
|     float phase_step_rad = 0.123; | ||||
|     lv_32fc_t phase[1]; | ||||
|     phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad)); | ||||
|     lv_32fc_t phase_inc[1]; | ||||
|     phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)); | ||||
|     volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(outVector, inVector, phase_inc[0], phase, num_points); | ||||
| } | ||||
|  | ||||
| #endif /* LV_HAVE_NEON */ | ||||
|  | ||||
|  | ||||
| #endif /* INCLUDED_volk_gnsssdr_16ic_rotatorpuppet_16ic_H */ | ||||
|   | ||||
| @@ -61,6 +61,7 @@ | ||||
|  | ||||
| #include <volk_gnsssdr/volk_gnsssdr_complex.h> | ||||
| #include <math.h> | ||||
| //#include <stdio.h> | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
| @@ -71,6 +72,56 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* ou | ||||
|     lv_16sc_t tmp16; | ||||
|     lv_32fc_t tmp32; | ||||
|     for(i = 0; i < (unsigned int)(num_points); ++i) | ||||
|         { | ||||
|             tmp16 = *inVector++; | ||||
|             tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
|             *outVector++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); | ||||
|             (*phase) *= phase_inc; | ||||
|             // Regenerate phase | ||||
|             if (i % 512 == 0) | ||||
|                 { | ||||
|                     //printf("Phase before regeneration %i: %f,%f  Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase)); | ||||
| #ifdef __cplusplus | ||||
|                     (*phase) /= std::abs((*phase)); | ||||
| #else | ||||
|                     (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); | ||||
| #endif | ||||
|                     //printf("Phase after regeneration %i: %f,%f  Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase)); | ||||
|                 } | ||||
|         } | ||||
| } | ||||
|  | ||||
| #endif /* LV_HAVE_GENERIC */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) | ||||
| { | ||||
|     unsigned int ROTATOR_RELOAD = 512; | ||||
|     unsigned int n = 0; | ||||
|     unsigned int j = 0; | ||||
|     lv_16sc_t tmp16; | ||||
|     lv_32fc_t tmp32; | ||||
|     for (; n < num_points / ROTATOR_RELOAD; n++) | ||||
|         { | ||||
|             for (j = 0; j < ROTATOR_RELOAD; j++) | ||||
|                 { | ||||
|                     tmp16 = *inVector++; | ||||
|                     tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
|                     *outVector++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); | ||||
|                     (*phase) *= phase_inc; | ||||
|                 } | ||||
|             // Regenerate phase | ||||
|             //printf("Phase before regeneration %i: %f,%f  Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase)); | ||||
| #ifdef __cplusplus | ||||
|             (*phase) /= std::abs((*phase)); | ||||
| #else | ||||
|             (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); | ||||
| #endif | ||||
|             //printf("Phase after regeneration %i: %f,%f  Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase)); | ||||
|         } | ||||
|     for (j = 0; j < num_points % ROTATOR_RELOAD; j++) | ||||
|         { | ||||
|             tmp16 = *inVector++; | ||||
|             tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
| @@ -130,6 +181,168 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out | ||||
|             //next two samples | ||||
|             _in += 2; | ||||
|             a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|             __builtin_prefetch(_in + 8); | ||||
|             //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|             tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|             a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|             tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|             b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|             c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic | ||||
|  | ||||
|             //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|             tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|             tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|             tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|             two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|             // store four output samples | ||||
|             result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic | ||||
|             _mm_store_si128((__m128i*)_out, result); | ||||
|  | ||||
|             // Regenerate phase | ||||
|             if ((number % 512) == 0) | ||||
|                 { | ||||
|                     tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); | ||||
|                     tmp2 = _mm_hadd_ps(tmp1, tmp1); | ||||
|                     tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); | ||||
|                     tmp2 = _mm_sqrt_ps(tmp1); | ||||
|                     two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); | ||||
|                 } | ||||
|  | ||||
|             //next two samples | ||||
|             _in += 2; | ||||
|             _out += 4; | ||||
|         } | ||||
|  | ||||
|     _mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg); | ||||
|     (*phase) = two_phase_acc[0]; | ||||
|  | ||||
|     for (unsigned int i = sse_iters * 4; i < num_points; ++i) | ||||
|         { | ||||
|             tmp16 = *_in++; | ||||
|             tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
|             *_out++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); | ||||
|             (*phase) *= phase_inc; | ||||
|         } | ||||
| } | ||||
|  | ||||
| #endif /* LV_HAVE_SSE3 */ | ||||
|  | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| #include <pmmintrin.h> | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) | ||||
| { | ||||
|     const unsigned int sse_iters = num_points / 4; | ||||
|     const unsigned int ROTATOR_RELOAD = 512; | ||||
|     __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; | ||||
|     __m128i c1, c2, result; | ||||
|     __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; | ||||
|     two_phase_inc[0] = phase_inc * phase_inc; | ||||
|     two_phase_inc[1] = phase_inc * phase_inc; | ||||
|     two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); | ||||
|     __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; | ||||
|     two_phase_acc[0] = (*phase); | ||||
|     two_phase_acc[1] = (*phase) * phase_inc; | ||||
|     two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); | ||||
|  | ||||
|     const lv_16sc_t* _in = inVector; | ||||
|  | ||||
|     lv_16sc_t* _out = outVector; | ||||
|  | ||||
|     __m128 yl, yh, tmp1, tmp2, tmp3; | ||||
|     lv_16sc_t tmp16; | ||||
|     lv_32fc_t tmp32; | ||||
|  | ||||
|     for (unsigned int n = 0; n < sse_iters / ROTATOR_RELOAD; n++) | ||||
|         { | ||||
|             for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) | ||||
|                 { | ||||
|                     a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|                     //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|                     yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                     yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                     tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                     a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                     tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                     b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|                     c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic | ||||
|  | ||||
|                     //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|                     yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                     yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                     tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                     tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                     tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                     two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|                     //next two samples | ||||
|                     _in += 2; | ||||
|                     a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|                     __builtin_prefetch(_in + 8); | ||||
|                     //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|                     yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                     yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                     tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                     a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                     tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                     b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|                     c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic | ||||
|  | ||||
|                     //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|                     yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                     yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                     tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                     tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                     tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                     two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|                     // store four output samples | ||||
|                     result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic | ||||
|                     _mm_store_si128((__m128i*)_out, result); | ||||
|  | ||||
|                     //next two samples | ||||
|                     _in += 2; | ||||
|                     _out += 4; | ||||
|                 } | ||||
|             // Regenerate phase | ||||
|             tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); | ||||
|             tmp2 = _mm_hadd_ps(tmp1, tmp1); | ||||
|             tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); | ||||
|             tmp2 = _mm_sqrt_ps(tmp1); | ||||
|             two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); | ||||
|         } | ||||
|  | ||||
|     for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++) | ||||
|         { | ||||
|             a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|             //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|             tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|             a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|             tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|             b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|             c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic | ||||
|  | ||||
|             //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|             tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|             tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|             tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|             two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|             //next two samples | ||||
|             _in += 2; | ||||
|             a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|             __builtin_prefetch(_in + 8); | ||||
|             //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
| @@ -171,6 +384,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out | ||||
| #endif /* LV_HAVE_SSE3 */ | ||||
|  | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| #include <pmmintrin.h> | ||||
|  | ||||
| @@ -241,6 +455,16 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out | ||||
|             result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic | ||||
|             _mm_storeu_si128((__m128i*)_out, result); | ||||
|  | ||||
|             // Regenerate phase | ||||
|             if ((number % 512) == 0) | ||||
|                 { | ||||
|                     tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); | ||||
|                     tmp2 = _mm_hadd_ps(tmp1, tmp1); | ||||
|                     tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); | ||||
|                     tmp2 = _mm_sqrt_ps(tmp1); | ||||
|                     two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); | ||||
|                 } | ||||
|  | ||||
|             //next two samples | ||||
|             _in += 2; | ||||
|             _out += 4; | ||||
| @@ -261,6 +485,156 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out | ||||
| #endif /* LV_HAVE_SSE3 */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| #include <pmmintrin.h> | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) | ||||
| { | ||||
|     const unsigned int sse_iters = num_points / 4; | ||||
|      unsigned int ROTATOR_RELOAD = 512; | ||||
|      __m128 a, b, two_phase_acc_reg, two_phase_inc_reg; | ||||
|      __m128i c1, c2, result; | ||||
|      __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; | ||||
|      two_phase_inc[0] = phase_inc * phase_inc; | ||||
|      two_phase_inc[1] = phase_inc * phase_inc; | ||||
|      two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); | ||||
|      __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; | ||||
|      two_phase_acc[0] = (*phase); | ||||
|      two_phase_acc[1] = (*phase) * phase_inc; | ||||
|      two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc); | ||||
|  | ||||
|      const lv_16sc_t* _in = inVector; | ||||
|  | ||||
|      lv_16sc_t* _out = outVector; | ||||
|  | ||||
|      __m128 yl, yh, tmp1, tmp2, tmp3; | ||||
|      lv_16sc_t tmp16; | ||||
|      lv_32fc_t tmp32; | ||||
|  | ||||
|      for (unsigned int n = 0; n < sse_iters / ROTATOR_RELOAD; n++) | ||||
|          { | ||||
|              for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) | ||||
|                  { | ||||
|                      a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|                      //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|                      yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                      yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                      tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                      a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                      tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                      b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|                      c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic | ||||
|  | ||||
|                      //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|                      yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                      yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                      tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                      tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                      tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                      two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|                      //next two samples | ||||
|                      _in += 2; | ||||
|                      a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|                      __builtin_prefetch(_in + 8); | ||||
|                      //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|                      yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                      yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                      tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                      a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                      tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                      b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|                      c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic | ||||
|  | ||||
|                      //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|                      yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                      yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                      tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                      tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                      tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                      two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|                      // store four output samples | ||||
|                      result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic | ||||
|                      _mm_storeu_si128((__m128i*)_out, result); | ||||
|  | ||||
|                      //next two samples | ||||
|                      _in += 2; | ||||
|                      _out += 4; | ||||
|                  } | ||||
|              // Regenerate phase | ||||
|              tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); | ||||
|              tmp2 = _mm_hadd_ps(tmp1, tmp1); | ||||
|              tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); | ||||
|              tmp2 = _mm_sqrt_ps(tmp1); | ||||
|              two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); | ||||
|          } | ||||
|  | ||||
|      for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++) | ||||
|          { | ||||
|              a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|              //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|              yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|              yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|              tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|              a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|              tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|              b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|              c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic | ||||
|  | ||||
|              //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|              yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|              yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|              tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|              tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|              tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|              two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|              //next two samples | ||||
|              _in += 2; | ||||
|              a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|              __builtin_prefetch(_in + 8); | ||||
|              //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|              yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|              yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|              tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|              a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|              tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|              b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|              c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic | ||||
|  | ||||
|              //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|              yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|              yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|              tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|              tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|              tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|              two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|              // store four output samples | ||||
|              result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic | ||||
|              _mm_storeu_si128((__m128i*)_out, result); | ||||
|  | ||||
|              //next two samples | ||||
|              _in += 2; | ||||
|              _out += 4; | ||||
|          } | ||||
|  | ||||
|      _mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg); | ||||
|      (*phase) = two_phase_acc[0]; | ||||
|  | ||||
|      for (unsigned int i = sse_iters * 4; i < num_points; ++i) | ||||
|          { | ||||
|              tmp16 = *_in++; | ||||
|              tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
|              *_out++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); | ||||
|              (*phase) *= phase_inc; | ||||
|          } | ||||
| } | ||||
|  | ||||
| #endif /* LV_HAVE_SSE3 */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_NEON | ||||
| #include <arm_neon.h> | ||||
|  | ||||
| @@ -271,6 +645,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe | ||||
|     lv_16sc_t tmp16_; | ||||
|     lv_32fc_t tmp32_; | ||||
|  | ||||
|     float arg_phase0 = cargf(*phase); | ||||
|     float arg_phase_inc = cargf(phase_inc); | ||||
|     float phase_est = 0.0; | ||||
|  | ||||
|     const lv_16sc_t* _in = inVector; | ||||
|     lv_16sc_t* _out = outVector; | ||||
|  | ||||
| @@ -351,6 +729,24 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe | ||||
|                     /* store the four complex results */ | ||||
|                     vst2_s16((int16_t*)_out, tmp16); | ||||
|                     _out += 4; | ||||
|                     // Regenerate phase | ||||
|                     if ((i % 512) == 0) | ||||
|                         { | ||||
|                             //printf("Computed phase:  %f\n", cos(cargf(lv_cmake(_phase_real[0],_phase_imag[0])))); | ||||
|                             phase_est = arg_phase0 + (i + 1) * 4 * arg_phase_inc; | ||||
|                             //printf("Estimated phase: %f\n\n", cos(phase_est)); | ||||
|  | ||||
|                             *phase = lv_cmake(cos(phase_est), sin(phase_est)); | ||||
|                             phase2 = (lv_32fc_t)(*phase) * phase_inc; | ||||
|                             phase3 = phase2 * phase_inc; | ||||
|                             phase4 = phase3 * phase_inc; | ||||
|  | ||||
|                             __VOLK_ATTR_ALIGNED(16) float32_t  ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) }; | ||||
|                             __VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) }; | ||||
|  | ||||
|                             _phase_real = vld1q_f32(____phase_real); | ||||
|                             _phase_imag = vld1q_f32(____phase_imag); | ||||
|                         } | ||||
|                 } | ||||
|             vst1q_f32((float32_t*)__phase_real, _phase_real); | ||||
|             vst1q_f32((float32_t*)__phase_imag, _phase_imag); | ||||
| @@ -368,4 +764,190 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe | ||||
|  | ||||
| #endif /* LV_HAVE_NEON */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_NEON | ||||
| #include <arm_neon.h> | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) | ||||
| { | ||||
|     unsigned int i = 0; | ||||
|     const unsigned int neon_iters = num_points / 4; | ||||
|     const unsigned int ROTATOR_RELOAD = 512; | ||||
|  | ||||
|     lv_16sc_t tmp16_; | ||||
|     lv_32fc_t tmp32_; | ||||
|  | ||||
|     float arg_phase0 = cargf(*phase); | ||||
|     float arg_phase_inc = cargf(phase_inc); | ||||
|     float phase_est = 0.0; | ||||
|  | ||||
|     const lv_16sc_t* _in = inVector; | ||||
|     lv_16sc_t* _out = outVector; | ||||
|  | ||||
|     lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc; | ||||
|     __VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) }; | ||||
|     __VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) }; | ||||
|  | ||||
|     float32x4_t _phase4_real = vld1q_f32(__phase4_real); | ||||
|     float32x4_t _phase4_imag = vld1q_f32(__phase4_imag); | ||||
|  | ||||
|     lv_32fc_t phase2 = (lv_32fc_t)(*phase) * phase_inc; | ||||
|     lv_32fc_t phase3 = phase2 * phase_inc; | ||||
|     lv_32fc_t phase4 = phase3 * phase_inc; | ||||
|  | ||||
|     __VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) }; | ||||
|     __VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) }; | ||||
|  | ||||
|     float32x4_t _phase_real = vld1q_f32(__phase_real); | ||||
|     float32x4_t _phase_imag = vld1q_f32(__phase_imag); | ||||
|  | ||||
|     float32x4_t half = vdupq_n_f32(0.5f); | ||||
|     int16x4x2_t tmp16; | ||||
|     int32x4x2_t tmp32i; | ||||
|     float32x4x2_t tmp32f, tmp_real, tmp_imag; | ||||
|     float32x4_t sign, PlusHalf, Round; | ||||
|  | ||||
|     if (neon_iters > 0) | ||||
|         { | ||||
|             for (unsigned int n = 0; n < neon_iters / ROTATOR_RELOAD; n++) | ||||
|                 { | ||||
|                     for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) | ||||
|                         { | ||||
|                             /* load 4 complex numbers (int 16 bits each component) */ | ||||
|                             tmp16 = vld2_s16((int16_t*)_in); | ||||
|                             __builtin_prefetch(_in + 8); | ||||
|                             _in += 4; | ||||
|  | ||||
|                             /* promote them to int 32 bits */ | ||||
|                             tmp32i.val[0] = vmovl_s16(tmp16.val[0]); | ||||
|                             tmp32i.val[1] = vmovl_s16(tmp16.val[1]); | ||||
|  | ||||
|                             /* promote them to float 32 bits */ | ||||
|                             tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]); | ||||
|                             tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]); | ||||
|  | ||||
|                             /* complex multiplication of four complex samples (float 32 bits each component) */ | ||||
|                             tmp_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real); | ||||
|                             tmp_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag); | ||||
|                             tmp_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag); | ||||
|                             tmp_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real); | ||||
|  | ||||
|                             tmp32f.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]); | ||||
|                             tmp32f.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]); | ||||
|  | ||||
|                             /* downcast results to int32 */ | ||||
|                             /* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */ | ||||
|                             sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31))); | ||||
|                             PlusHalf = vaddq_f32(tmp32f.val[0], half); | ||||
|                             Round = vsubq_f32(PlusHalf, sign); | ||||
|                             tmp32i.val[0] = vcvtq_s32_f32(Round); | ||||
|  | ||||
|                             sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31))); | ||||
|                             PlusHalf = vaddq_f32(tmp32f.val[1], half); | ||||
|                             Round = vsubq_f32(PlusHalf, sign); | ||||
|                             tmp32i.val[1] = vcvtq_s32_f32(Round); | ||||
|  | ||||
|                             /* downcast results to int16 */ | ||||
|                             tmp16.val[0] = vqmovn_s32(tmp32i.val[0]); | ||||
|                             tmp16.val[1] = vqmovn_s32(tmp32i.val[1]); | ||||
|  | ||||
|                             /* compute next four phases */ | ||||
|                             tmp_real.val[0] = vmulq_f32(_phase_real, _phase4_real); | ||||
|                             tmp_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag); | ||||
|                             tmp_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag); | ||||
|                             tmp_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real); | ||||
|  | ||||
|                             _phase_real = vsubq_f32(tmp_real.val[0], tmp_real.val[1]); | ||||
|                             _phase_imag = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]); | ||||
|  | ||||
|                             /* store the four complex results */ | ||||
|                             vst2_s16((int16_t*)_out, tmp16); | ||||
|                             _out += 4; | ||||
|                         } | ||||
|                     // Regenerate phase | ||||
|                     //printf("Computed phase:  %f\n", cos(cargf(lv_cmake(_phase_real[0],_phase_imag[0])))); | ||||
|                     phase_est = arg_phase0 + (n + 1) * ROTATOR_RELOAD * 4 * arg_phase_inc; | ||||
|                     //printf("Estimated phase: %f\n\n", cos(phase_est)); | ||||
|                     *phase = lv_cmake(cos(phase_est), sin(phase_est)); | ||||
|                     phase2 = (lv_32fc_t)(*phase) * phase_inc; | ||||
|                     phase3 = phase2 * phase_inc; | ||||
|                     phase4 = phase3 * phase_inc; | ||||
|  | ||||
|                     __VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) }; | ||||
|                     __VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) }; | ||||
|  | ||||
|                     _phase_real = vld1q_f32(____phase_real); | ||||
|                     _phase_imag = vld1q_f32(____phase_imag); | ||||
|                 } | ||||
|  | ||||
|             for (unsigned int j = 0; j < neon_iters % ROTATOR_RELOAD; j++) | ||||
|                 { | ||||
|                     /* load 4 complex numbers (int 16 bits each component) */ | ||||
|                     tmp16 = vld2_s16((int16_t*)_in); | ||||
|                     __builtin_prefetch(_in + 8); | ||||
|                     _in += 4; | ||||
|  | ||||
|                     /* promote them to int 32 bits */ | ||||
|                     tmp32i.val[0] = vmovl_s16(tmp16.val[0]); | ||||
|                     tmp32i.val[1] = vmovl_s16(tmp16.val[1]); | ||||
|  | ||||
|                     /* promote them to float 32 bits */ | ||||
|                     tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]); | ||||
|                     tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]); | ||||
|  | ||||
|                     /* complex multiplication of four complex samples (float 32 bits each component) */ | ||||
|                     tmp_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real); | ||||
|                     tmp_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag); | ||||
|                     tmp_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag); | ||||
|                     tmp_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real); | ||||
|  | ||||
|                     tmp32f.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]); | ||||
|                     tmp32f.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]); | ||||
|  | ||||
|                     /* downcast results to int32 */ | ||||
|                     /* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */ | ||||
|                     sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31))); | ||||
|                     PlusHalf = vaddq_f32(tmp32f.val[0], half); | ||||
|                     Round = vsubq_f32(PlusHalf, sign); | ||||
|                     tmp32i.val[0] = vcvtq_s32_f32(Round); | ||||
|  | ||||
|                     sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31))); | ||||
|                     PlusHalf = vaddq_f32(tmp32f.val[1], half); | ||||
|                     Round = vsubq_f32(PlusHalf, sign); | ||||
|                     tmp32i.val[1] = vcvtq_s32_f32(Round); | ||||
|  | ||||
|                     /* downcast results to int16 */ | ||||
|                     tmp16.val[0] = vqmovn_s32(tmp32i.val[0]); | ||||
|                     tmp16.val[1] = vqmovn_s32(tmp32i.val[1]); | ||||
|  | ||||
|                     /* compute next four phases */ | ||||
|                     tmp_real.val[0] = vmulq_f32(_phase_real, _phase4_real); | ||||
|                     tmp_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag); | ||||
|                     tmp_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag); | ||||
|                     tmp_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real); | ||||
|  | ||||
|                     _phase_real = vsubq_f32(tmp_real.val[0], tmp_real.val[1]); | ||||
|                     _phase_imag = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]); | ||||
|  | ||||
|                     /* store the four complex results */ | ||||
|                     vst2_s16((int16_t*)_out, tmp16); | ||||
|                     _out += 4; | ||||
|                 } | ||||
|  | ||||
|             vst1q_f32((float32_t*)__phase_real, _phase_real); | ||||
|             vst1q_f32((float32_t*)__phase_imag, _phase_imag); | ||||
|  | ||||
|             (*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]); | ||||
|         } | ||||
|     for(i = 0; i < neon_iters % 4; ++i) | ||||
|         { | ||||
|             tmp16_ = *_in++; | ||||
|             tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase); | ||||
|             *_out++ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_))); | ||||
|             (*phase) *= phase_inc; | ||||
|         } | ||||
| } | ||||
|  | ||||
| #endif /* LV_HAVE_NEON */ | ||||
|  | ||||
| #endif /* INCLUDED_volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_H */ | ||||
|   | ||||
| @@ -94,7 +94,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con | ||||
|  | ||||
|     if (sse_iters > 0) | ||||
|         { | ||||
|             __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result; | ||||
|             __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc; | ||||
|             __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; | ||||
|  | ||||
|             realcacc = _mm_setzero_si128(); | ||||
| @@ -105,8 +105,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con | ||||
|  | ||||
|             for(unsigned int number = 0; number < sse_iters; number++) | ||||
|                 { | ||||
|                     //std::complex<T> memory structure: real part -> reinterpret_cast<cv T*>(a)[2*i] | ||||
|                     //imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1] | ||||
|                     // a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r] | ||||
|                     a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg | ||||
|                     __builtin_prefetch(_in_a + 8); | ||||
| @@ -115,7 +113,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con | ||||
|                     c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, .... | ||||
|  | ||||
|                     c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst. | ||||
|                     real = _mm_subs_epi16(c,c_sr); | ||||
|                     real = _mm_subs_epi16(c, c_sr); | ||||
|  | ||||
|                     b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i .... | ||||
|                     a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i .... | ||||
| @@ -123,7 +121,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con | ||||
|                     imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, .... | ||||
|                     imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, .... | ||||
|  | ||||
|                     imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic! | ||||
|                     imag = _mm_adds_epi16(imag1, imag2); //with saturation arithmetic! | ||||
|  | ||||
|                     realcacc = _mm_adds_epi16(realcacc, real); | ||||
|                     imagcacc = _mm_adds_epi16(imagcacc, imag); | ||||
| @@ -135,9 +133,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con | ||||
|             realcacc = _mm_and_si128(realcacc, mask_real); | ||||
|             imagcacc = _mm_and_si128(imagcacc, mask_imag); | ||||
|  | ||||
|             result = _mm_or_si128(realcacc, imagcacc); | ||||
|             a = _mm_or_si128(realcacc, imagcacc); | ||||
|  | ||||
|             _mm_store_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector | ||||
|             _mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector | ||||
|  | ||||
|             for (int i = 0; i < 4; ++i) | ||||
|                 { | ||||
| @@ -202,7 +200,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con | ||||
|                     imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, .... | ||||
|                     imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, .... | ||||
|  | ||||
|                     imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic! | ||||
|                     imag = _mm_adds_epi16(imag1, imag2); //with saturation arithmetic! | ||||
|  | ||||
|                     realcacc = _mm_adds_epi16(realcacc, real); | ||||
|                     imagcacc = _mm_adds_epi16(imagcacc, imag); | ||||
| @@ -245,46 +243,57 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const | ||||
|  | ||||
|     lv_16sc_t* a_ptr = (lv_16sc_t*) in_a; | ||||
|     lv_16sc_t* b_ptr = (lv_16sc_t*) in_b; | ||||
|     // for 2-lane vectors, 1st lane holds the real part, | ||||
|     // 2nd lane holds the imaginary part | ||||
|     int16x4x2_t a_val, b_val, c_val, accumulator; | ||||
|     int16x4x2_t tmp_real, tmp_imag; | ||||
|     __VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4]; | ||||
|     accumulator.val[0] = vdup_n_s16(0); | ||||
|     accumulator.val[1] = vdup_n_s16(0); | ||||
|     *out = lv_cmake((int16_t)0, (int16_t)0); | ||||
|  | ||||
|     for(number = 0; number < quarter_points; ++number) | ||||
|     if (quarter_points > 0) | ||||
|         { | ||||
|             a_val = vld2_s16((int16_t*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i | ||||
|             b_val = vld2_s16((int16_t*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i | ||||
|             __builtin_prefetch(a_ptr + 8); | ||||
|             __builtin_prefetch(b_ptr + 8); | ||||
|             // for 2-lane vectors, 1st lane holds the real part, | ||||
|             // 2nd lane holds the imaginary part | ||||
|             int16x4x2_t a_val, b_val, c_val, accumulator; | ||||
|             int16x4x2_t tmp_real, tmp_imag; | ||||
|             __VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4]; | ||||
|             accumulator.val[0] = vdup_n_s16(0); | ||||
|             accumulator.val[1] = vdup_n_s16(0); | ||||
|             lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0); | ||||
|  | ||||
|             // multiply the real*real and imag*imag to get real result | ||||
|             // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r | ||||
|             tmp_real.val[0] = vmul_s16(a_val.val[0], b_val.val[0]); | ||||
|             // a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i | ||||
|             tmp_real.val[1] = vmul_s16(a_val.val[1], b_val.val[1]); | ||||
|             for(number = 0; number < quarter_points; ++number) | ||||
|                 { | ||||
|                     a_val = vld2_s16((int16_t*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i | ||||
|                     b_val = vld2_s16((int16_t*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i | ||||
|                     __builtin_prefetch(a_ptr + 8); | ||||
|                     __builtin_prefetch(b_ptr + 8); | ||||
|  | ||||
|             // Multiply cross terms to get the imaginary result | ||||
|             // a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i | ||||
|             tmp_imag.val[0] = vmul_s16(a_val.val[0], b_val.val[1]); | ||||
|             // a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r | ||||
|             tmp_imag.val[1] = vmul_s16(a_val.val[1], b_val.val[0]); | ||||
|                     // multiply the real*real and imag*imag to get real result | ||||
|                     // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r | ||||
|                     tmp_real.val[0] = vmul_s16(a_val.val[0], b_val.val[0]); | ||||
|                     // a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i | ||||
|                     tmp_real.val[1] = vmul_s16(a_val.val[1], b_val.val[1]); | ||||
|  | ||||
|             c_val.val[0] = vsub_s16(tmp_real.val[0], tmp_real.val[1]); | ||||
|             c_val.val[1] = vadd_s16(tmp_imag.val[0], tmp_imag.val[1]); | ||||
|                     // Multiply cross terms to get the imaginary result | ||||
|                     // a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i | ||||
|                     tmp_imag.val[0] = vmul_s16(a_val.val[0], b_val.val[1]); | ||||
|                     // a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r | ||||
|                     tmp_imag.val[1] = vmul_s16(a_val.val[1], b_val.val[0]); | ||||
|  | ||||
|             accumulator.val[0] = vadd_s16(accumulator.val[0], c_val.val[0]); | ||||
|             accumulator.val[1] = vadd_s16(accumulator.val[1], c_val.val[1]); | ||||
|                     c_val.val[0] = vqsub_s16(tmp_real.val[0], tmp_real.val[1]); | ||||
|                     c_val.val[1] = vqadd_s16(tmp_imag.val[0], tmp_imag.val[1]); | ||||
|  | ||||
|             a_ptr += 4; | ||||
|             b_ptr += 4; | ||||
|                     accumulator.val[0] = vqadd_s16(accumulator.val[0], c_val.val[0]); | ||||
|                     accumulator.val[1] = vqadd_s16(accumulator.val[1], c_val.val[1]); | ||||
|  | ||||
|                     a_ptr += 4; | ||||
|                     b_ptr += 4; | ||||
|                 } | ||||
|  | ||||
|             vst2_s16((int16_t*)accum_result, accumulator); | ||||
|             for (unsigned int i = 0; i < 4; ++i) | ||||
|                 { | ||||
|                     dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(accum_result[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(accum_result[i]))); | ||||
|                 } | ||||
|  | ||||
|             *out = dotProduct; | ||||
|         } | ||||
|  | ||||
|     vst2_s16((int16_t*)accum_result, accumulator); | ||||
|     *out = accum_result[0] + accum_result[1] + accum_result[2] + accum_result[3]; | ||||
|  | ||||
|     // tail case | ||||
|     for(number = quarter_points * 4; number < num_points; ++number) | ||||
|         { | ||||
|   | ||||
| @@ -84,6 +84,25 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu | ||||
| #endif /*LV_HAVE_GENERIC*/ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic_sat(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) | ||||
| { | ||||
|     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|         { | ||||
|             result[n_vec] = lv_cmake(0,0); | ||||
|             for (unsigned int n = 0; n < num_points; n++) | ||||
|                 { | ||||
|                     lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(in_common[n]), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(in_common[n]), lv_cimag(in_a[n_vec][n]))), | ||||
|                                              sat_adds16i(sat_muls16i(lv_creal(in_common[n]), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(in_common[n]), lv_creal(in_a[n_vec][n])))); | ||||
|                     result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp))); | ||||
|                 } | ||||
|         } | ||||
| } | ||||
|  | ||||
| #endif /*LV_HAVE_GENERIC*/ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE2 | ||||
| #include <emmintrin.h> | ||||
|  | ||||
| @@ -398,8 +417,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res | ||||
|                             tmp.val[0] = vmls_s16(tmp.val[0], a_val.val[1], b_val.val[1]); | ||||
|                             tmp.val[1] = vmla_s16(tmp.val[1], a_val.val[0], b_val.val[1]); | ||||
|  | ||||
|                             accumulator[n_vec].val[0] = vadd_s16(accumulator[n_vec].val[0], tmp.val[0]); | ||||
|                             accumulator[n_vec].val[1] = vadd_s16(accumulator[n_vec].val[1], tmp.val[1]); | ||||
|                             accumulator[n_vec].val[0] = vqadd_s16(accumulator[n_vec].val[0], tmp.val[0]); | ||||
|                             accumulator[n_vec].val[1] = vqadd_s16(accumulator[n_vec].val[1], tmp.val[1]); | ||||
|                         } | ||||
|                     _in_common += 4; | ||||
|                 } | ||||
|   | ||||
| @@ -63,6 +63,30 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic(lv_16sc_t* | ||||
|  | ||||
| #endif  /* Generic */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
| static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic_sat(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points) | ||||
| { | ||||
|     int num_a_vectors = 3; | ||||
|     lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|     { | ||||
|        in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); | ||||
|        memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); | ||||
|     } | ||||
|  | ||||
|     volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic_sat(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|     { | ||||
|         volk_gnsssdr_free(in_a[n]); | ||||
|     } | ||||
|     volk_gnsssdr_free(in_a); | ||||
| } | ||||
|  | ||||
| #endif  /* Generic */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE2 | ||||
| static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points) | ||||
| { | ||||
|   | ||||
| @@ -43,6 +43,7 @@ | ||||
|  * | ||||
|  * Rotates and multiplies the reference complex vector with an arbitrary number of other complex vectors, | ||||
|  * accumulates the results and stores them in the output vector. | ||||
|  * The rotation is done at a fixed rate per sample, from an initial \p phase offset. | ||||
|  * This function can be used for Doppler wipe-off and multiple correlator. | ||||
|  * | ||||
|  * <b>Dispatcher Prototype</b> | ||||
| @@ -71,7 +72,7 @@ | ||||
| #include <volk_gnsssdr/volk_gnsssdr_complex.h> | ||||
| #include <volk_gnsssdr/saturation_arithmetic.h> | ||||
| #include <math.h> | ||||
| //#include <stdio.h> | ||||
| #include <stdio.h> | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
|  | ||||
| @@ -88,6 +89,19 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc | ||||
|             tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); | ||||
|             tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
|             tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); | ||||
|  | ||||
|             // Regenerate phase | ||||
|             if (n % 256 == 0) | ||||
|                 { | ||||
|                     //printf("Phase before regeneration %i: %f,%f  Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase)); | ||||
| #ifdef __cplusplus | ||||
|                     (*phase) /= std::abs((*phase)); | ||||
| #else | ||||
|                     (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); | ||||
| #endif | ||||
|                     //printf("Phase after regeneration %i: %f,%f  Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase)); | ||||
|                 } | ||||
|  | ||||
|             (*phase) *= phase_inc; | ||||
|             for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                 { | ||||
| @@ -101,6 +115,60 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc | ||||
| #endif /*LV_HAVE_GENERIC*/ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic_reload(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) | ||||
| { | ||||
|     lv_16sc_t tmp16; | ||||
|     lv_32fc_t tmp32; | ||||
|     const unsigned int ROTATOR_RELOAD = 256; | ||||
|     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|         { | ||||
|             result[n_vec] = lv_cmake(0,0); | ||||
|         } | ||||
|  | ||||
|     for (unsigned int n = 0; n < num_points / ROTATOR_RELOAD; n++) | ||||
|         { | ||||
|             for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) | ||||
|                 { | ||||
|                     tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); | ||||
|                     tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
|                     tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); | ||||
|                     (*phase) *= phase_inc; | ||||
|                     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                         { | ||||
|                             lv_16sc_t tmp = tmp16 * in_a[n_vec][n * ROTATOR_RELOAD + j]; | ||||
|                             //lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n])))); | ||||
|                             result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp))); | ||||
|                         } | ||||
|                 } | ||||
|             /* Regenerate phase */ | ||||
| #ifdef __cplusplus | ||||
|             (*phase) /= std::abs((*phase)); | ||||
| #else | ||||
|             //(*phase) /= cabsf((*phase)); | ||||
|             (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase)); | ||||
| #endif | ||||
|         } | ||||
|  | ||||
|     for (unsigned int j = 0; j < num_points % ROTATOR_RELOAD; j++) | ||||
|         { | ||||
|             tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); | ||||
|             tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
|             tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); | ||||
|             (*phase) *= phase_inc; | ||||
|             for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                 { | ||||
|                     lv_16sc_t tmp = tmp16 * in_a[n_vec][ (num_points / ROTATOR_RELOAD) * ROTATOR_RELOAD + j ]; | ||||
|                     //lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n])))); | ||||
|                     result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp))); | ||||
|                 } | ||||
|         } | ||||
| } | ||||
|  | ||||
| #endif /*LV_HAVE_GENERIC*/ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| #include <pmmintrin.h> | ||||
|  | ||||
| @@ -169,6 +237,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ | ||||
|             //next two samples | ||||
|             _in_common += 2; | ||||
|             pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|             __builtin_prefetch(_in_common + 8); | ||||
|             //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
| @@ -212,6 +281,15 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ | ||||
|                     realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real); | ||||
|                     imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag); | ||||
|                 } | ||||
|             // Regenerate phase | ||||
|             if ((number % 128) == 0) | ||||
|                 { | ||||
|                     tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); | ||||
|                     tmp2 = _mm_hadd_ps(tmp1, tmp1); | ||||
|                     tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); | ||||
|                     tmp2 = _mm_sqrt_ps(tmp1); | ||||
|                     two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); | ||||
|                 } | ||||
|         } | ||||
|  | ||||
|     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
| @@ -233,6 +311,254 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ | ||||
|     free(realcacc); | ||||
|     free(imagcacc); | ||||
|  | ||||
|     tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); | ||||
|     tmp2 = _mm_hadd_ps(tmp1, tmp1); | ||||
|     tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); | ||||
|     tmp2 = _mm_sqrt_ps(tmp1); | ||||
|     two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); | ||||
|  | ||||
|     _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); | ||||
|     //(*phase) = lv_cmake((float*)two_phase_acc[0], (float*)two_phase_acc[1]); | ||||
|     (*phase) = two_phase_acc[0]; | ||||
|  | ||||
|     for(unsigned int n  = sse_iters * 4; n < num_points; n++) | ||||
|         { | ||||
|             tmp16 = in_common[n];  //printf("a_sse phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); | ||||
|             tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
|             tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); | ||||
|             (*phase) *= phase_inc; | ||||
|  | ||||
|             for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                 { | ||||
|                     lv_16sc_t tmp = tmp16 * in_a[n_vec][n]; | ||||
|                     //lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n])))); | ||||
|                     _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), | ||||
|                             sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); | ||||
|                 } | ||||
|         } | ||||
| } | ||||
| #endif /* LV_HAVE_SSE3 */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| #include <pmmintrin.h> | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| { | ||||
|     lv_16sc_t dotProduct = lv_cmake(0,0); | ||||
|  | ||||
|     const unsigned int sse_iters = num_points / 4; | ||||
|     const unsigned int ROTATOR_RELOAD = 128; | ||||
|  | ||||
|     const lv_16sc_t** _in_a = in_a; | ||||
|     const lv_16sc_t* _in_common = in_common; | ||||
|     lv_16sc_t* _out = result; | ||||
|  | ||||
|     __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; | ||||
|  | ||||
|     //todo dyn mem reg | ||||
|  | ||||
|     __m128i* realcacc; | ||||
|     __m128i* imagcacc; | ||||
|  | ||||
|     realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 | ||||
|     imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 | ||||
|  | ||||
|     __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl; | ||||
|  | ||||
|     mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); | ||||
|     mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); | ||||
|  | ||||
|     // phase rotation registers | ||||
|     __m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg; | ||||
|     __m128i pc1, pc2; | ||||
|     __attribute__((aligned(16))) lv_32fc_t two_phase_inc[2]; | ||||
|     two_phase_inc[0] = phase_inc * phase_inc; | ||||
|     two_phase_inc[1] = phase_inc * phase_inc; | ||||
|     two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc); | ||||
|     __attribute__((aligned(16))) lv_32fc_t two_phase_acc[2]; | ||||
|     two_phase_acc[0] = (*phase); | ||||
|     two_phase_acc[1] = (*phase) * phase_inc; | ||||
|     two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc); | ||||
|     __m128 yl, yh, tmp1, tmp2, tmp3; | ||||
|     lv_16sc_t tmp16; | ||||
|     lv_32fc_t tmp32; | ||||
|  | ||||
|     for (unsigned int number = 0; number <  sse_iters / ROTATOR_RELOAD; ++number) | ||||
|         { | ||||
|             for (unsigned int j = 0; j < ROTATOR_RELOAD; j++) | ||||
|                 { | ||||
|                     // Phase rotation on operand in_common starts here: | ||||
|                     //printf("generic phase %i: %f,%f\n", n*4,lv_creal(*phase),lv_cimag(*phase)); | ||||
|                     pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|                     //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|                     yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                     yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                     tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                     pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                     tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                     pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|                     pc1 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic | ||||
|  | ||||
|                     //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|                     yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                     yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                     tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                     tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                     tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                     two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|                     //next two samples | ||||
|                     _in_common += 2; | ||||
|                     pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|                     __builtin_prefetch(_in_common + 8); | ||||
|                     //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|                     yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                     yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                     tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                     pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                     tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                     pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|                     pc2 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic | ||||
|  | ||||
|                     //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|                     yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|                     yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|                     tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|                     tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|                     tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|                     two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|                     // store four rotated in_common samples in the register b | ||||
|                     b = _mm_packs_epi32(pc1, pc2);// convert from 32ic to 16ic | ||||
|  | ||||
|                     //next two samples | ||||
|                     _in_common += 2; | ||||
|  | ||||
|                     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                         { | ||||
|                             a = _mm_load_si128((__m128i*)&(_in_a[n_vec][(number * ROTATOR_RELOAD + j) * 4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg | ||||
|  | ||||
|                             c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, .... | ||||
|  | ||||
|                             c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst. | ||||
|                             real = _mm_subs_epi16(c, c_sr); | ||||
|  | ||||
|                             b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i .... | ||||
|                             a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i .... | ||||
|  | ||||
|                             imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, .... | ||||
|                             imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, .... | ||||
|  | ||||
|                             imag = _mm_adds_epi16(imag1, imag2); | ||||
|  | ||||
|                             realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real); | ||||
|                             imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag); | ||||
|                         } | ||||
|                 } | ||||
|             // regenerate phase | ||||
|             tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); | ||||
|             tmp2 = _mm_hadd_ps(tmp1, tmp1); | ||||
|             tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); | ||||
|             tmp2 = _mm_sqrt_ps(tmp1); | ||||
|             two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); | ||||
|         } | ||||
|  | ||||
|     for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++) | ||||
|         { | ||||
|             pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|             //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|             tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|             pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|             tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|             pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|             pc1 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic | ||||
|  | ||||
|             //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|             tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|             tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|             tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|             two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|             //next two samples | ||||
|             _in_common += 2; | ||||
|             pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|             __builtin_prefetch(_in_common + 8); | ||||
|             //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|             tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|             pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|             tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|             pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|             pc2 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic | ||||
|  | ||||
|             //complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
|             tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | ||||
|             tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br | ||||
|             tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | ||||
|             two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||||
|  | ||||
|             // store four rotated in_common samples in the register b | ||||
|             b = _mm_packs_epi32(pc1, pc2);// convert from 32ic to 16ic | ||||
|  | ||||
|             //next two samples | ||||
|             _in_common += 2; | ||||
|  | ||||
|             for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                 { | ||||
|                     a = _mm_load_si128((__m128i*)&(_in_a[n_vec][((sse_iters / ROTATOR_RELOAD)  * ROTATOR_RELOAD + j) * 4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg | ||||
|  | ||||
|                     c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, .... | ||||
|  | ||||
|                     c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst. | ||||
|                     real = _mm_subs_epi16(c, c_sr); | ||||
|  | ||||
|                     b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i .... | ||||
|                     a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i .... | ||||
|  | ||||
|                     imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, .... | ||||
|                     imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, .... | ||||
|  | ||||
|                     imag = _mm_adds_epi16(imag1, imag2); | ||||
|  | ||||
|                     realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real); | ||||
|                     imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag); | ||||
|                 } | ||||
|         } | ||||
|  | ||||
|     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|         { | ||||
|             realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real); | ||||
|             imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag); | ||||
|  | ||||
|             a = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]); | ||||
|  | ||||
|             _mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector | ||||
|             dotProduct = lv_cmake(0,0); | ||||
|             for (int i = 0; i < 4; ++i) | ||||
|                 { | ||||
|                     dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), | ||||
|                             sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); | ||||
|                 } | ||||
|             _out[n_vec] = dotProduct; | ||||
|         } | ||||
|  | ||||
|     free(realcacc); | ||||
|     free(imagcacc); | ||||
|  | ||||
|     tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); | ||||
|     tmp2 = _mm_hadd_ps(tmp1, tmp1); | ||||
|     tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); | ||||
|     tmp2 = _mm_sqrt_ps(tmp1); | ||||
|     two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); | ||||
|  | ||||
|     _mm_store_ps((float*)two_phase_acc, two_phase_acc_reg); | ||||
|     //(*phase) = lv_cmake((float*)two_phase_acc[0], (float*)two_phase_acc[1]); | ||||
|     (*phase) = two_phase_acc[0]; | ||||
| @@ -303,7 +629,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ | ||||
|     for(unsigned int number = 0; number < sse_iters; number++) | ||||
|         { | ||||
|             // Phase rotation on operand in_common starts here: | ||||
|  | ||||
|             pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|             __builtin_prefetch(_in_common + 8); | ||||
|             //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
| @@ -326,6 +651,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ | ||||
|             //next two samples | ||||
|             _in_common += 2; | ||||
|             pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg | ||||
|             __builtin_prefetch(_in_common + 8); | ||||
|             //complex 32fc multiplication b=a*two_phase_acc_reg | ||||
|             yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr | ||||
|             yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di | ||||
| @@ -369,6 +695,15 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ | ||||
|                     realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real); | ||||
|                     imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag); | ||||
|                 } | ||||
|             // Regenerate phase | ||||
|             if ((number % 256) == 0) | ||||
|                 { | ||||
|                     tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); | ||||
|                     tmp2 = _mm_hadd_ps(tmp1, tmp1); | ||||
|                     tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8); | ||||
|                     tmp2 = _mm_sqrt_ps(tmp1); | ||||
|                     two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2); | ||||
|                 } | ||||
|         } | ||||
|  | ||||
|     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
| @@ -393,7 +728,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ | ||||
|     _mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg); | ||||
|     (*phase) = two_phase_acc[0]; | ||||
|  | ||||
|  | ||||
|     for(unsigned int n  = sse_iters * 4; n < num_points; n++) | ||||
|         { | ||||
|             tmp16 = in_common[n]; | ||||
| @@ -428,6 +762,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* | ||||
|     if (neon_iters > 0) | ||||
|         { | ||||
|             lv_16sc_t dotProduct = lv_cmake(0,0); | ||||
|             float arg_phase0 = cargf(*phase); | ||||
|             float arg_phase_inc = cargf(phase_inc); | ||||
|             float phase_est; | ||||
|  | ||||
|             lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc; | ||||
|             __VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) }; | ||||
| @@ -538,6 +875,22 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* | ||||
|                             accumulator[n_vec].val[0] = vqadd_s16(accumulator[n_vec].val[0], c_val.val[0]); | ||||
|                             accumulator[n_vec].val[1] = vqadd_s16(accumulator[n_vec].val[1], c_val.val[1]); | ||||
|                         } | ||||
|                     // Regenerate phase | ||||
|                     if ((number % 256) == 0) | ||||
|                         { | ||||
|                             phase_est = arg_phase0 + (number + 1) * 4 * arg_phase_inc; | ||||
|  | ||||
|                             *phase = lv_cmake(cos(phase_est), sin(phase_est)); | ||||
|                             phase2 = (lv_32fc_t)(*phase) * phase_inc; | ||||
|                             phase3 = phase2 * phase_inc; | ||||
|                             phase4 = phase3 * phase_inc; | ||||
|  | ||||
|                             __VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) }; | ||||
|                             __VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) }; | ||||
|  | ||||
|                             _phase_real = vld1q_f32(____phase_real); | ||||
|                             _phase_imag = vld1q_f32(____phase_imag); | ||||
|                         } | ||||
|                 } | ||||
|  | ||||
|             for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
| @@ -577,6 +930,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* | ||||
|  | ||||
| #ifdef LV_HAVE_NEON | ||||
| #include <arm_neon.h> | ||||
| #include <volk_gnsssdr/volk_gnsssdr_neon_intrinsics.h> | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| { | ||||
| @@ -592,7 +946,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s | ||||
|     if (neon_iters > 0) | ||||
|         { | ||||
|             lv_16sc_t dotProduct = lv_cmake(0,0); | ||||
|  | ||||
|             float arg_phase0 = cargf(*phase); | ||||
|             float arg_phase_inc = cargf(phase_inc); | ||||
|             float phase_est; | ||||
|             //printf("arg phase0: %f", arg_phase0); | ||||
|             lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc; | ||||
|             __VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) }; | ||||
|             __VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) }; | ||||
| @@ -677,6 +1034,37 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s | ||||
|                     _phase_real = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]); | ||||
|                     _phase_imag = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]); | ||||
|  | ||||
|                     // Regenerate phase | ||||
|                     if ((number % 256) == 0) | ||||
|                         { | ||||
|                             //printf("computed phase: %f\n", cos(cargf(lv_cmake(_phase_real[0],_phase_imag[0])))); | ||||
|                             phase_est = arg_phase0 + (number + 1) * 4 * arg_phase_inc; | ||||
|                             //printf("Estimated phase: %f\n\n", cos(phase_est)); | ||||
|  | ||||
|                             *phase = lv_cmake(cos(phase_est), sin(phase_est)); | ||||
|                             phase2 = (lv_32fc_t)(*phase) * phase_inc; | ||||
|                             phase3 = phase2 * phase_inc; | ||||
|                             phase4 = phase3 * phase_inc; | ||||
|  | ||||
|                             __VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) }; | ||||
|                             __VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) }; | ||||
|  | ||||
|                             _phase_real = vld1q_f32(____phase_real); | ||||
|                             _phase_imag = vld1q_f32(____phase_imag); | ||||
|  | ||||
|                             // Round = vmulq_f32(_phase_real, _phase_real); | ||||
|                             // Round = vmlaq_f32(Round, _phase_imag, _phase_imag); | ||||
|                             //               Round = vsqrtq_f32(Round);//printf("sqrt: %f \n", Round[0]); | ||||
|                             //Round = vrsqrteq_f32(Round);printf("1/sqtr: %f  \n",Round[0]);   | ||||
|                             //Round = vrecpeq_f32((Round); | ||||
|                             //              _phase_real = vdivq_f32(_phase_real, Round); | ||||
|                             //              _phase_imag = vdivq_f32(_phase_imag, Round); | ||||
|                             //_phase_real = vmulq_f32(_phase_real, Round); | ||||
|                             //_phase_imag = vmulq_f32(_phase_imag, Round); | ||||
|                             //printf("After  %i: %f,%f, %f\n\n", number, _phase_real[0], _phase_imag[0], sqrt(_phase_real[0]*_phase_real[0]+_phase_imag[0]*_phase_imag[0])); | ||||
|  | ||||
|                         } | ||||
|  | ||||
|                     vst1q_f32((float32_t*)__phase_real, _phase_real); | ||||
|                     vst1q_f32((float32_t*)__phase_imag, _phase_imag); | ||||
|  | ||||
| @@ -708,6 +1096,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s | ||||
|                     _out[n_vec] = dotProduct; | ||||
|                 } | ||||
|             free(accumulator); | ||||
|  | ||||
|             vst1q_f32((float32_t*)__phase_real, _phase_real); | ||||
|             vst1q_f32((float32_t*)__phase_imag, _phase_imag); | ||||
|  | ||||
| @@ -731,3 +1120,4 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s | ||||
| #endif /* LV_HAVE_NEON */ | ||||
|  | ||||
| #endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_H*/ | ||||
|  | ||||
|   | ||||
| @@ -70,6 +70,36 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic(lv_ | ||||
| #endif  // Generic | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic_reload(lv_16sc_t* result, const lv_16sc_t* local_code,  const lv_16sc_t* in, unsigned int num_points) | ||||
| { | ||||
|     // phases must be normalized. Phase rotator expects a complex exponential input! | ||||
|     float rem_carrier_phase_in_rad = 0.345; | ||||
|     float phase_step_rad = 0.1; | ||||
|     lv_32fc_t phase[1]; | ||||
|     phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); | ||||
|     lv_32fc_t phase_inc[1]; | ||||
|     phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); | ||||
|  | ||||
|     int num_a_vectors = 3; | ||||
|     lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|         { | ||||
|             in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); | ||||
|             memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); | ||||
|         } | ||||
|     volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic_reload(result, local_code, phase_inc[0], phase,(const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|         { | ||||
|             volk_gnsssdr_free(in_a[n]); | ||||
|         } | ||||
|     volk_gnsssdr_free(in_a); | ||||
| } | ||||
|  | ||||
| #endif  // Generic | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points) | ||||
| { | ||||
| @@ -101,6 +131,37 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_1 | ||||
| #endif // SSE3 | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points) | ||||
| { | ||||
|     // phases must be normalized. Phase rotator expects a complex exponential input! | ||||
|     float rem_carrier_phase_in_rad = 0.345; | ||||
|     float phase_step_rad = 0.1; | ||||
|     lv_32fc_t phase[1]; | ||||
|     phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad)); | ||||
|     lv_32fc_t phase_inc[1]; | ||||
|     phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad)); | ||||
|  | ||||
|     int num_a_vectors = 3; | ||||
|     lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|         { | ||||
|             in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); | ||||
|             memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); | ||||
|         } | ||||
|  | ||||
|     volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|         { | ||||
|             volk_gnsssdr_free(in_a[n]); | ||||
|         } | ||||
|     volk_gnsssdr_free(in_a); | ||||
| } | ||||
|  | ||||
| #endif // SSE3 | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points) | ||||
| { | ||||
|   | ||||
| @@ -58,6 +58,9 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t | ||||
|     // some others need more iterations *****  ADDED BY GNSS-SDR | ||||
|     volk_gnsssdr_test_params_t test_params_more_iters = volk_gnsssdr_test_params_t(test_params.tol(), test_params.scalar(), | ||||
|                 test_params.vlen(), 100000, test_params.benchmark_mode(), test_params.kernel_regex()); | ||||
|     // ... or more tolerance *****  ADDED BY GNSS-SDR | ||||
|     volk_gnsssdr_test_params_t test_params_int16 = volk_gnsssdr_test_params_t(16, test_params.scalar(), | ||||
|                 test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex()); | ||||
|  | ||||
|     std::vector<volk_gnsssdr_test_case_t> test_cases = boost::assign::list_of | ||||
|  | ||||
| @@ -77,11 +80,11 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_dot_prod_16ic, test_params)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_multiply_16ic, test_params_more_iters)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_16ic_convert_32fc, test_params_more_iters)) | ||||
|         (VOLK_INIT_PUPP(volk_gnsssdr_16ic_rotatorpuppet_16ic, volk_gnsssdr_16ic_s32fc_x2_rotator_16ic, test_params)) | ||||
|         (VOLK_INIT_PUPP(volk_gnsssdr_16ic_rotatorpuppet_16ic, volk_gnsssdr_16ic_s32fc_x2_rotator_16ic, test_params_int1)) | ||||
|         (VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerpuppet_16ic, volk_gnsssdr_16ic_resampler_16ic, test_params)) | ||||
|         (VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerxnpuppet_16ic, volk_gnsssdr_16ic_xn_resampler_16ic_xn, test_params)) | ||||
|         (VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_dot_prod_16ic_xn, test_params)) | ||||
|         (VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn, test_params)) | ||||
|         (VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn, test_params_int16)) | ||||
|         ; | ||||
|  | ||||
|     return test_cases; | ||||
|   | ||||
| @@ -76,10 +76,10 @@ void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n) | ||||
|                         else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; | ||||
|                         break; | ||||
|                     case 2: | ||||
|                         // 16 bits dot product saturates very fast even with moderate length vectors | ||||
|                     	// we produce here only 4 bits input range | ||||
|                         if(type.is_signed) ((int16_t *)data)[i] = (int16_t)((int16_t) scaled_rand % 16); | ||||
|                         else ((uint16_t *)data)[i] = (uint16_t) (int16_t)((int16_t) scaled_rand % 16); | ||||
|                         // 16 bit multiplication saturates very fast | ||||
|                     	// we produce here only 3 bits input range | ||||
|                         if(type.is_signed) ((int16_t *)data)[i] = (int16_t)((int16_t) scaled_rand % 8); | ||||
|                         else ((uint16_t *)data)[i] = (uint16_t) (int16_t)((int16_t) scaled_rand % 8); | ||||
|                         break; | ||||
|                     case 1: | ||||
|                         if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand; | ||||
|   | ||||
| @@ -25,7 +25,6 @@ set(TRACKING_ADAPTER_SOURCES | ||||
|      galileo_e1_dll_pll_veml_tracking.cc | ||||
|      galileo_e1_tcp_connector_tracking.cc | ||||
|      gps_l1_ca_dll_fll_pll_tracking.cc | ||||
|      gps_l1_ca_dll_pll_optim_tracking.cc | ||||
|      gps_l1_ca_dll_pll_tracking.cc | ||||
|      gps_l1_ca_dll_pll_c_aid_tracking.cc | ||||
|      gps_l1_ca_tcp_connector_tracking.cc | ||||
|   | ||||
| @@ -1,159 +0,0 @@ | ||||
| /*! | ||||
|  * \file gps_l1_ca_dll_pll_optim_tracking.cc | ||||
|  * \brief  Interface of an adapter of a speed optimized DLL+PLL tracking loop block | ||||
|  * for GPS L1 C/A to a TrackingInterface | ||||
|  * \author Javier Arribas, 2012. jarribas(at)cttc.es | ||||
|  * | ||||
|  * GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED: | ||||
|  * - Code Doppler is not compensated in the local replica | ||||
|  * Code DLL + carrier PLL according to the algorithms described in: | ||||
|  * K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen, | ||||
|  * A Software-Defined GPS and Galileo Receiver. A Single-Frequency | ||||
|  * Approach, Birkha user, 2007 | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  * | ||||
|  * Copyright (C) 2010-2015  (see AUTHORS file for a list of contributors) | ||||
|  * | ||||
|  * GNSS-SDR is a software defined Global Navigation | ||||
|  *          Satellite Systems receiver | ||||
|  * | ||||
|  * This file is part of GNSS-SDR. | ||||
|  * | ||||
|  * GNSS-SDR is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * GNSS-SDR is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  */ | ||||
|  | ||||
| #include "gps_l1_ca_dll_pll_optim_tracking.h" | ||||
| #include <cmath> | ||||
| #include <glog/logging.h> | ||||
| #include "GPS_L1_CA.h" | ||||
| #include "configuration_interface.h" | ||||
|  | ||||
| using google::LogMessage; | ||||
|  | ||||
| GpsL1CaDllPllOptimTracking::GpsL1CaDllPllOptimTracking( | ||||
|         ConfigurationInterface* configuration, std::string role, | ||||
|         unsigned int in_streams, unsigned int out_streams, | ||||
|         boost::shared_ptr<gr::msg_queue> queue) : | ||||
|         role_(role), in_streams_(in_streams), out_streams_(out_streams), | ||||
|         queue_(queue) | ||||
| { | ||||
|     DLOG(INFO) << "role " << role; | ||||
|     //################# CONFIGURATION PARAMETERS ######################## | ||||
|     int fs_in; | ||||
|     int vector_length; | ||||
|     int f_if; | ||||
|     bool dump; | ||||
|     std::string dump_filename; | ||||
|     std::string item_type; | ||||
|     std::string default_item_type = "gr_complex"; | ||||
|     float pll_bw_hz; | ||||
|     float dll_bw_hz; | ||||
|     float early_late_space_chips; | ||||
|     item_type = configuration->property(role + ".item_type",default_item_type); | ||||
|     //vector_length = configuration->property(role + ".vector_length", 2048); | ||||
|     fs_in = configuration->property("GNSS-SDR.internal_fs_hz", 2048000); | ||||
|     f_if = configuration->property(role + ".if", 0); | ||||
|     dump = configuration->property(role + ".dump", false); | ||||
|     pll_bw_hz = configuration->property(role + ".pll_bw_hz", 50.0); | ||||
|     dll_bw_hz = configuration->property(role + ".dll_bw_hz", 2.0); | ||||
|     early_late_space_chips = configuration->property(role + ".early_late_space_chips", 0.5); | ||||
|     std::string default_dump_filename = "./track_ch"; | ||||
|     dump_filename = configuration->property(role + ".dump_filename", default_dump_filename); //unused! | ||||
|     vector_length = round(fs_in / (GPS_L1_CA_CODE_RATE_HZ / GPS_L1_CA_CODE_LENGTH_CHIPS)); | ||||
|  | ||||
|     //################# MAKE TRACKING GNURadio object ################### | ||||
|     if (item_type.compare("gr_complex") == 0) | ||||
|         { | ||||
|             item_size_ = sizeof(gr_complex); | ||||
|             tracking_ = gps_l1_ca_dll_pll_make_optim_tracking_cc( | ||||
|                     f_if, | ||||
|                     fs_in, | ||||
|                     vector_length, | ||||
|                     queue_, | ||||
|                     dump, | ||||
|                     dump_filename, | ||||
|                     pll_bw_hz, | ||||
|                     dll_bw_hz, | ||||
|                     early_late_space_chips); | ||||
|         } | ||||
|     else | ||||
|         { | ||||
|             item_size_ = sizeof(gr_complex); | ||||
|             LOG(WARNING) << item_type << " unknown tracking item type."; | ||||
|         } | ||||
|  | ||||
|     channel_ = 0; | ||||
|     channel_internal_queue_ = 0; | ||||
|     DLOG(INFO) << "tracking(" << tracking_->unique_id() << ")"; | ||||
| } | ||||
|  | ||||
|  | ||||
| GpsL1CaDllPllOptimTracking::~GpsL1CaDllPllOptimTracking() | ||||
| {} | ||||
|  | ||||
|  | ||||
| void GpsL1CaDllPllOptimTracking::start_tracking() | ||||
| { | ||||
|     tracking_->start_tracking(); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Set tracking channel unique ID | ||||
|  */ | ||||
| void GpsL1CaDllPllOptimTracking::set_channel(unsigned int channel) | ||||
| { | ||||
|     channel_ = channel; | ||||
|     tracking_->set_channel(channel); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Set tracking channel internal queue | ||||
|  */ | ||||
| void GpsL1CaDllPllOptimTracking::set_channel_queue( | ||||
|         concurrent_queue<int> *channel_internal_queue) | ||||
| { | ||||
|     channel_internal_queue_ = channel_internal_queue; | ||||
|     tracking_->set_channel_queue(channel_internal_queue_); | ||||
| } | ||||
|  | ||||
| void GpsL1CaDllPllOptimTracking::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro) | ||||
| { | ||||
|     tracking_->set_gnss_synchro(p_gnss_synchro); | ||||
| } | ||||
|  | ||||
| void GpsL1CaDllPllOptimTracking::connect(gr::top_block_sptr top_block) | ||||
| { | ||||
| 	if(top_block) { /* top_block is not null */}; | ||||
| 	//nothing to connect, now the tracking uses gr_sync_decimator | ||||
| } | ||||
|  | ||||
| void GpsL1CaDllPllOptimTracking::disconnect(gr::top_block_sptr top_block) | ||||
| { | ||||
| 	if(top_block) { /* top_block is not null */}; | ||||
| 	//nothing to disconnect, now the tracking uses gr_sync_decimator | ||||
| } | ||||
|  | ||||
| gr::basic_block_sptr GpsL1CaDllPllOptimTracking::get_left_block() | ||||
| { | ||||
|     return tracking_; | ||||
| } | ||||
|  | ||||
| gr::basic_block_sptr GpsL1CaDllPllOptimTracking::get_right_block() | ||||
| { | ||||
|     return tracking_; | ||||
| } | ||||
|  | ||||
| @@ -1,119 +0,0 @@ | ||||
| /*! | ||||
|  * \file gps_l1_ca_dll_pll_optim_tracking.h | ||||
|  * \brief  Interface of an adapter of a speed optimized DLL+PLL tracking loop block | ||||
|  * for GPS L1 C/A to a TrackingInterface | ||||
|  * \author Javier Arribas, 2012. jarribas(at)cttc.es | ||||
|  * | ||||
|  * GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED: | ||||
|  * - Code Doppler is not compensated in the local replica | ||||
|  * Code DLL + carrier PLL according to the algorithms described in: | ||||
|  * K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen, | ||||
|  * A Software-Defined GPS and Galileo Receiver. A Single-Frequency | ||||
|  * Approach, Birkha user, 2007 | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  * | ||||
|  * Copyright (C) 2010-2015  (see AUTHORS file for a list of contributors) | ||||
|  * | ||||
|  * GNSS-SDR is a software defined Global Navigation | ||||
|  *          Satellite Systems receiver | ||||
|  * | ||||
|  * This file is part of GNSS-SDR. | ||||
|  * | ||||
|  * GNSS-SDR is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * GNSS-SDR is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  */ | ||||
|  | ||||
| #ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_H_ | ||||
| #define GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_H_ | ||||
|  | ||||
| #include <string> | ||||
| #include <gnuradio/msg_queue.h> | ||||
| #include "tracking_interface.h" | ||||
| #include "gps_l1_ca_dll_pll_optim_tracking_cc.h" | ||||
|  | ||||
|  | ||||
| class ConfigurationInterface; | ||||
|  | ||||
| /*! | ||||
|  * \brief This class implements a code DLL + carrier PLL tracking loop | ||||
|  */ | ||||
| class GpsL1CaDllPllOptimTracking : public TrackingInterface | ||||
| { | ||||
|  | ||||
| public: | ||||
|  | ||||
|   GpsL1CaDllPllOptimTracking(ConfigurationInterface* configuration, | ||||
|             std::string role, | ||||
|             unsigned int in_streams, | ||||
|             unsigned int out_streams, | ||||
|             boost::shared_ptr<gr::msg_queue> queue); | ||||
|  | ||||
|     virtual ~GpsL1CaDllPllOptimTracking(); | ||||
|  | ||||
|     std::string role() | ||||
|     { | ||||
|         return role_; | ||||
|     } | ||||
|  | ||||
|     //! Returns "GPS_L1_CA_DLL_PLL_Optim_Tracking" | ||||
|     std::string implementation() | ||||
|     { | ||||
|         return "GPS_L1_CA_DLL_PLL_Optim_Tracking"; | ||||
|     } | ||||
|     size_t item_size() | ||||
|     { | ||||
|         return item_size_; | ||||
|     } | ||||
|  | ||||
|     void connect(gr::top_block_sptr top_block); | ||||
|     void disconnect(gr::top_block_sptr top_block); | ||||
|     gr::basic_block_sptr get_left_block(); | ||||
|     gr::basic_block_sptr get_right_block(); | ||||
|  | ||||
|  | ||||
|     /*! | ||||
|      * \brief Set tracking channel unique ID | ||||
|      */ | ||||
|     void set_channel(unsigned int channel); | ||||
|  | ||||
|     /*! | ||||
|      * \brief Set acquisition/tracking common Gnss_Synchro object pointer | ||||
|      * to efficiently exchange synchronization data between acquisition and tracking blocks | ||||
|      */ | ||||
|     void set_gnss_synchro(Gnss_Synchro* p_gnss_synchro); | ||||
|  | ||||
|     /*! | ||||
|      * \brief Set tracking channel internal queue | ||||
|      */ | ||||
|     void set_channel_queue(concurrent_queue<int> *channel_internal_queue); | ||||
|  | ||||
|     void start_tracking(); | ||||
|  | ||||
| private: | ||||
|  | ||||
|     gps_l1_ca_dll_pll_optim_tracking_cc_sptr tracking_; | ||||
|     size_t item_size_; | ||||
|  | ||||
|     unsigned int channel_; | ||||
|  | ||||
|     std::string role_; | ||||
|     unsigned int in_streams_; | ||||
|     unsigned int out_streams_; | ||||
|     boost::shared_ptr<gr::msg_queue> queue_; | ||||
|     concurrent_queue<int> *channel_internal_queue_; | ||||
| }; | ||||
|  | ||||
| #endif // GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_H_ | ||||
| @@ -26,7 +26,6 @@ set(TRACKING_GR_BLOCKS_SOURCES | ||||
|      galileo_e1_dll_pll_veml_tracking_cc.cc | ||||
|      galileo_e1_tcp_connector_tracking_cc.cc | ||||
|      gps_l1_ca_dll_fll_pll_tracking_cc.cc | ||||
|      gps_l1_ca_dll_pll_optim_tracking_cc.cc | ||||
|      gps_l1_ca_dll_pll_tracking_cc.cc | ||||
|      gps_l1_ca_tcp_connector_tracking_cc.cc | ||||
|      galileo_e5a_dll_pll_tracking_cc.cc | ||||
|   | ||||
| @@ -1,654 +0,0 @@ | ||||
| /*! | ||||
|  * \file gps_l1_ca_dll_pll_optim_tracking_cc.cc | ||||
|  * \brief Implementation of a code DLL + carrier PLL tracking block | ||||
|  * \author Javier Arribas, 2012. jarribas(at)cttc.es | ||||
|  * GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED: | ||||
|  * - Code Doppler is not compensated in the local replica | ||||
|  * Code DLL + carrier PLL according to the algorithms described in: | ||||
|  * [1] K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen, | ||||
|  * A Software-Defined GPS and Galileo Receiver. A Single-Frequency | ||||
|  * Approach, Birkha user, 2007 | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  * | ||||
|  * Copyright (C) 2010-2015  (see AUTHORS file for a list of contributors) | ||||
|  * | ||||
|  * GNSS-SDR is a software defined Global Navigation | ||||
|  *          Satellite Systems receiver | ||||
|  * | ||||
|  * This file is part of GNSS-SDR. | ||||
|  * | ||||
|  * GNSS-SDR is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * GNSS-SDR is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  */ | ||||
|  | ||||
| #include "gps_l1_ca_dll_pll_optim_tracking_cc.h" | ||||
| #include <iostream> | ||||
| #include <memory> | ||||
| #include <sstream> | ||||
| #include <boost/lexical_cast.hpp> | ||||
| #include <gnuradio/io_signature.h> | ||||
| #include <glog/logging.h> | ||||
| #include <volk/volk.h> | ||||
| #include "gps_sdr_signal_processing.h" | ||||
| #include "tracking_discriminators.h" | ||||
| #include "lock_detectors.h" | ||||
| #include "GPS_L1_CA.h" | ||||
| #include "nco_lib.h" | ||||
| #include "control_message_factory.h" | ||||
|  | ||||
|  | ||||
|  | ||||
| /*! | ||||
|  * \todo Include in definition header file | ||||
|  */ | ||||
| #define CN0_ESTIMATION_SAMPLES 20 | ||||
| #define MINIMUM_VALID_CN0 25 | ||||
| #define MAXIMUM_LOCK_FAIL_COUNTER 50 | ||||
| #define CARRIER_LOCK_THRESHOLD 0.85 | ||||
|  | ||||
| using google::LogMessage; | ||||
|  | ||||
| gps_l1_ca_dll_pll_optim_tracking_cc_sptr | ||||
| gps_l1_ca_dll_pll_make_optim_tracking_cc( | ||||
|         long if_freq, | ||||
|         long fs_in, | ||||
|         unsigned int vector_length, | ||||
|         boost::shared_ptr<gr::msg_queue> queue, | ||||
|         bool dump, | ||||
|         std::string dump_filename, | ||||
|         float pll_bw_hz, | ||||
|         float dll_bw_hz, | ||||
|         float early_late_space_chips) | ||||
| { | ||||
|     return gps_l1_ca_dll_pll_optim_tracking_cc_sptr(new Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(if_freq, | ||||
|             fs_in, vector_length, queue, dump, dump_filename, pll_bw_hz, dll_bw_hz, early_late_space_chips)); | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::forecast (int noutput_items, | ||||
|         gr_vector_int &ninput_items_required) | ||||
| { | ||||
|     if (noutput_items != 0) | ||||
|         { | ||||
|             ninput_items_required[0] = d_gnuradio_forecast_samples; //set the required available samples in each call | ||||
|         } | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc( | ||||
|         long if_freq, | ||||
|         long fs_in, | ||||
|         unsigned int vector_length, | ||||
|         boost::shared_ptr<gr::msg_queue> queue, | ||||
|         bool dump, | ||||
|         std::string dump_filename, | ||||
|         float pll_bw_hz, | ||||
|         float dll_bw_hz, | ||||
|         float early_late_space_chips) : | ||||
|         gr::block("Gps_L1_Ca_Dll_Pll_Tracking_cc", | ||||
|                   gr::io_signature::make(1, 1, sizeof(gr_complex)), | ||||
|                   gr::io_signature::make(1, 1, sizeof(Gnss_Synchro))) | ||||
| { | ||||
|     // initialize internal vars | ||||
|     d_queue = queue; | ||||
|     d_dump = dump; | ||||
|     d_if_freq = if_freq; | ||||
|     d_fs_in = fs_in; | ||||
|     d_vector_length = vector_length; | ||||
|     d_gnuradio_forecast_samples = static_cast<int>(d_vector_length) * 2; | ||||
|     d_dump_filename = dump_filename; | ||||
|  | ||||
|     // Initialize tracking  ========================================== | ||||
|     d_code_loop_filter.set_DLL_BW(dll_bw_hz); | ||||
|     d_carrier_loop_filter.set_PLL_BW(pll_bw_hz); | ||||
|  | ||||
|     //--- DLL variables -------------------------------------------------------- | ||||
|     d_early_late_spc_chips = early_late_space_chips; // Define early-late offset (in chips) | ||||
|  | ||||
|     // Initialization of local code replica | ||||
|     // Get space for a vector with the C/A code replica sampled 1x/chip | ||||
|     d_ca_code = static_cast<gr_complex*>(volk_malloc((GPS_L1_CA_CODE_LENGTH_CHIPS + 2) * sizeof(gr_complex), volk_get_alignment())); | ||||
|  | ||||
|     // Get space for the resampled early / prompt / late local replicas | ||||
|     d_early_code = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment())); | ||||
|     d_prompt_code = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment())); | ||||
|     d_late_code = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment())); | ||||
|  | ||||
|     // space for carrier wipeoff and signal baseband vectors | ||||
|     d_carr_sign = static_cast<gr_complex*>(volk_malloc(2*d_vector_length * sizeof(gr_complex), volk_get_alignment())); | ||||
|  | ||||
|     // correlator outputs (scalar) | ||||
|     d_Early = static_cast<gr_complex*>(volk_malloc(sizeof(gr_complex), volk_get_alignment())); | ||||
|     d_Prompt = static_cast<gr_complex*>(volk_malloc(sizeof(gr_complex), volk_get_alignment())); | ||||
|     d_Late = static_cast<gr_complex*>(volk_malloc(sizeof(gr_complex), volk_get_alignment())); | ||||
|  | ||||
|     //--- Perform initializations ------------------------------ | ||||
|     // define initial code frequency basis of NCO | ||||
|     d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ; | ||||
|     // define residual code phase (in chips) | ||||
|     d_rem_code_phase_samples = 0.0; | ||||
|     // define residual carrier phase | ||||
|     d_rem_carr_phase_rad = 0.0; | ||||
|  | ||||
|     // sample synchronization | ||||
|     d_sample_counter = 0; | ||||
|     //d_sample_counter_seconds = 0; | ||||
|     d_acq_sample_stamp = 0; | ||||
|  | ||||
|     d_enable_tracking = false; | ||||
|     d_pull_in = false; | ||||
|     d_last_seg = 0; | ||||
|  | ||||
|     d_current_prn_length_samples = static_cast<int>(d_vector_length); | ||||
|  | ||||
|     // CN0 estimation and lock detector buffers | ||||
|     d_cn0_estimation_counter = 0; | ||||
|     d_Prompt_buffer = new gr_complex[CN0_ESTIMATION_SAMPLES]; | ||||
|     d_carrier_lock_test = 1; | ||||
|     d_CN0_SNV_dB_Hz = 0; | ||||
|     d_carrier_lock_fail_counter = 0; | ||||
|     d_carrier_lock_threshold = CARRIER_LOCK_THRESHOLD; | ||||
|  | ||||
|     systemName["G"] = std::string("GPS"); | ||||
|  | ||||
|     d_channel_internal_queue = 0; | ||||
|     d_acquisition_gnss_synchro = 0; | ||||
|     d_channel = 0; | ||||
|     d_acq_code_phase_samples = 0.0; | ||||
|     d_acq_carrier_doppler_hz = 0.0; | ||||
|     d_carrier_doppler_hz = 0.0; | ||||
|     d_acc_carrier_phase_rad = 0.0; | ||||
|     d_code_phase_samples = 0.0; | ||||
|     d_acc_code_phase_secs = 0.0; | ||||
| } | ||||
|  | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking() | ||||
| { | ||||
|     // correct the code phase according to the delay between acq and trk | ||||
|     d_acq_code_phase_samples = d_acquisition_gnss_synchro->Acq_delay_samples; | ||||
|     d_acq_carrier_doppler_hz = d_acquisition_gnss_synchro->Acq_doppler_hz; | ||||
|     d_acq_sample_stamp = d_acquisition_gnss_synchro->Acq_samplestamp_samples; | ||||
|  | ||||
|     long int acq_trk_diff_samples; | ||||
|     double acq_trk_diff_seconds; | ||||
|     acq_trk_diff_samples = static_cast<long int>(d_sample_counter) - static_cast<long int>(d_acq_sample_stamp); //-d_vector_length; | ||||
|     LOG(INFO) << "Number of samples between Acquisition and Tracking =" << acq_trk_diff_samples; | ||||
|     acq_trk_diff_seconds = static_cast<float>(acq_trk_diff_samples) / static_cast<float>(d_fs_in); | ||||
|     //doppler effect | ||||
|     // Fd=(C/(C+Vr))*F | ||||
|     double radial_velocity; | ||||
|     radial_velocity = (GPS_L1_FREQ_HZ + d_acq_carrier_doppler_hz) / GPS_L1_FREQ_HZ; | ||||
|     // new chip and prn sequence periods based on acq Doppler | ||||
|     double T_chip_mod_seconds; | ||||
|     double T_prn_mod_seconds; | ||||
|     double T_prn_mod_samples; | ||||
|     d_code_freq_chips = radial_velocity * GPS_L1_CA_CODE_RATE_HZ; | ||||
|     T_chip_mod_seconds = 1/d_code_freq_chips; | ||||
|     T_prn_mod_seconds = T_chip_mod_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS; | ||||
|     T_prn_mod_samples = T_prn_mod_seconds * static_cast<float>(d_fs_in); | ||||
|     d_current_prn_length_samples = round(T_prn_mod_samples); | ||||
|  | ||||
|     double T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ; | ||||
|     double T_prn_true_samples = T_prn_true_seconds * static_cast<float>(d_fs_in); | ||||
|     double T_prn_diff_seconds = T_prn_true_seconds - T_prn_mod_seconds; | ||||
|     double N_prn_diff = acq_trk_diff_seconds / T_prn_true_seconds; | ||||
|     double corrected_acq_phase_samples, delay_correction_samples; | ||||
|     corrected_acq_phase_samples = fmod((d_acq_code_phase_samples + T_prn_diff_seconds * N_prn_diff * static_cast<float>(d_fs_in)), T_prn_true_samples); | ||||
|     if (corrected_acq_phase_samples < 0) | ||||
|         { | ||||
|             corrected_acq_phase_samples = T_prn_mod_samples + corrected_acq_phase_samples; | ||||
|         } | ||||
|     delay_correction_samples = d_acq_code_phase_samples - corrected_acq_phase_samples; | ||||
|     d_acq_code_phase_samples = corrected_acq_phase_samples; | ||||
|     d_carrier_doppler_hz = d_acq_carrier_doppler_hz; | ||||
|  | ||||
|     // DLL/PLL filter initialization | ||||
|     d_carrier_loop_filter.initialize(); //initialize the carrier filter | ||||
|     d_code_loop_filter.initialize();    //initialize the code filter | ||||
|  | ||||
|     // generate local reference ALWAYS starting at chip 1 (1 sample per chip) | ||||
|     gps_l1_ca_code_gen_complex(&d_ca_code[1], d_acquisition_gnss_synchro->PRN, 0); | ||||
|     d_ca_code[0] = d_ca_code[static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS)]; | ||||
|     d_ca_code[static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS) + 1] = d_ca_code[1]; | ||||
|  | ||||
|     //****************************************************************************** | ||||
|     // Experimental: pre-sampled local signal replica at nominal code frequency. | ||||
|     // No code doppler correction | ||||
|     double tcode_chips; | ||||
|     int associated_chip_index; | ||||
|     int code_length_chips = static_cast<float>(GPS_L1_CA_CODE_LENGTH_CHIPS); | ||||
|     double code_phase_step_chips; | ||||
|     int early_late_spc_samples; | ||||
|     int epl_loop_length_samples; | ||||
|  | ||||
|     // unified loop for E, P, L code vectors | ||||
|     code_phase_step_chips = (static_cast<double>(d_code_freq_chips)) / (static_cast<double>(d_fs_in)); | ||||
|     tcode_chips = 0; | ||||
|  | ||||
|     // Alternative EPL code generation (40% of speed improvement!) | ||||
|     early_late_spc_samples = round(d_early_late_spc_chips / code_phase_step_chips); | ||||
|     epl_loop_length_samples = d_current_prn_length_samples  +early_late_spc_samples*2; | ||||
|     for (int i = 0; i < epl_loop_length_samples; i++) | ||||
|         { | ||||
|             associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips)); | ||||
|             d_early_code[i] = d_ca_code[associated_chip_index]; | ||||
|             tcode_chips = tcode_chips + code_phase_step_chips; | ||||
|         } | ||||
|  | ||||
|     memcpy(d_prompt_code, &d_early_code[early_late_spc_samples], d_current_prn_length_samples* sizeof(gr_complex)); | ||||
|     memcpy(d_late_code, &d_early_code[early_late_spc_samples*2], d_current_prn_length_samples* sizeof(gr_complex)); | ||||
|     //****************************************************************************** | ||||
|  | ||||
|     d_carrier_lock_fail_counter = 0; | ||||
|     d_rem_code_phase_samples = 0; | ||||
|     d_rem_carr_phase_rad = 0; | ||||
|     d_acc_carrier_phase_rad = 0; | ||||
|  | ||||
|     d_code_phase_samples = d_acq_code_phase_samples; | ||||
|  | ||||
|     std::string sys_ = &d_acquisition_gnss_synchro->System; | ||||
|     sys = sys_.substr(0,1); | ||||
|  | ||||
|     // DEBUG OUTPUT | ||||
|     std::cout << "Tracking start on channel " << d_channel << " for satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) << std::endl; | ||||
|     LOG(INFO) << "Starting tracking of satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) << " on channel " << d_channel; | ||||
|  | ||||
|     // enable tracking | ||||
|     d_pull_in = true; | ||||
|     d_enable_tracking = true; | ||||
|  | ||||
|     LOG(INFO) << "PULL-IN Doppler [Hz]=" << d_carrier_doppler_hz | ||||
|               << " Code Phase correction [samples]=" << delay_correction_samples | ||||
|               << " PULL-IN Code Phase [samples]=" << d_acq_code_phase_samples << std::endl; | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_code() | ||||
| { | ||||
|     double tcode_chips; | ||||
|     double rem_code_phase_chips; | ||||
|     int associated_chip_index; | ||||
|     int code_length_chips = static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS); | ||||
|     double code_phase_step_chips; | ||||
|     int early_late_spc_samples; | ||||
|     int epl_loop_length_samples; | ||||
|  | ||||
|     // unified loop for E, P, L code vectors | ||||
|     code_phase_step_chips = (static_cast<double>(d_code_freq_chips)) / (static_cast<double>(d_fs_in)); | ||||
|     rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in); | ||||
|     tcode_chips = -rem_code_phase_chips; | ||||
|  | ||||
|     //EPL code generation | ||||
|     early_late_spc_samples = round(d_early_late_spc_chips / code_phase_step_chips); | ||||
|     epl_loop_length_samples = d_current_prn_length_samples + early_late_spc_samples*2; | ||||
|     for (int i = 0; i < epl_loop_length_samples; i++) | ||||
|         { | ||||
|             associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips)); | ||||
|             d_early_code[i] = d_ca_code[associated_chip_index]; | ||||
|             tcode_chips = tcode_chips + code_phase_step_chips; | ||||
|         } | ||||
|  | ||||
|     memcpy(d_prompt_code, &d_early_code[early_late_spc_samples], d_current_prn_length_samples* sizeof(gr_complex)); | ||||
|     memcpy(d_late_code, &d_early_code[early_late_spc_samples*2], d_current_prn_length_samples* sizeof(gr_complex)); | ||||
| } | ||||
|  | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_carrier() | ||||
| { | ||||
|     float phase_step_rad; | ||||
|     phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in); | ||||
|     fxp_nco(d_carr_sign, d_current_prn_length_samples, d_rem_carr_phase_rad, phase_step_rad); | ||||
| } | ||||
|  | ||||
|  | ||||
| Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::~Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc() | ||||
| { | ||||
|     d_dump_file.close(); | ||||
|  | ||||
|     volk_free(d_prompt_code); | ||||
|     volk_free(d_late_code); | ||||
|     volk_free(d_early_code); | ||||
|     volk_free(d_carr_sign); | ||||
|     volk_free(d_Early); | ||||
|     volk_free(d_Prompt); | ||||
|     volk_free(d_Late); | ||||
|  | ||||
|     delete[] d_ca_code; | ||||
|     delete[] d_Prompt_buffer; | ||||
| } | ||||
|  | ||||
|  | ||||
| // Tracking signal processing | ||||
| int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vector_int &ninput_items, | ||||
|         gr_vector_const_void_star &input_items, gr_vector_void_star &output_items) | ||||
| { | ||||
|     // stream to collect cout calls to improve thread safety | ||||
|     std::stringstream tmp_str_stream; | ||||
|     double carr_error_hz = 0.0; | ||||
|     double carr_error_filt_hz = 0.0; | ||||
|     double code_error_chips = 0.0; | ||||
|     double code_error_filt_chips = 0.0; | ||||
|  | ||||
|     if (d_enable_tracking == true) | ||||
|         { | ||||
|             /* | ||||
|              * Receiver signal alignment | ||||
|              */ | ||||
|             if (d_pull_in == true) | ||||
|                 { | ||||
|                     int samples_offset; | ||||
|                     float acq_trk_shif_correction_samples; | ||||
|                     int acq_to_trk_delay_samples; | ||||
|                     acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp; | ||||
|                     acq_trk_shif_correction_samples = d_current_prn_length_samples - fmod(static_cast<float>(acq_to_trk_delay_samples), static_cast<float>(d_current_prn_length_samples)); | ||||
|                     samples_offset = round(d_acq_code_phase_samples + acq_trk_shif_correction_samples); | ||||
|                     d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples | ||||
|                     d_pull_in = false; | ||||
|                     consume_each(samples_offset); //shift input to perform alignment with local replica | ||||
|                     return 1; | ||||
|                 } | ||||
|             // GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder | ||||
|             Gnss_Synchro current_synchro_data; | ||||
|             // Fill the acquisition data | ||||
|             current_synchro_data = *d_acquisition_gnss_synchro; | ||||
|  | ||||
|             // Block input data and block output stream pointers | ||||
|             const gr_complex* in = (gr_complex*) input_items[0]; //PRN start block alignment | ||||
|             Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0]; | ||||
|  | ||||
|             // Generate local code and carrier replicas (using \hat{f}_d(k-1)) | ||||
|             //update_local_code(); //disabled in the speed optimized tracking! | ||||
|             update_local_carrier(); | ||||
|  | ||||
|             // perform Early, Prompt and Late correlation | ||||
| #if USING_VOLK_CW_EPL_CORR_CUSTOM | ||||
|             d_correlator.Carrier_wipeoff_and_EPL_volk_custom(d_current_prn_length_samples, | ||||
|                     in, | ||||
|                     d_carr_sign, | ||||
|                     d_early_code, | ||||
|                     d_prompt_code, | ||||
|                     d_late_code, | ||||
|                     d_Early, | ||||
|                     d_Prompt, | ||||
|                     d_Late); | ||||
| #else | ||||
|             d_correlator.Carrier_wipeoff_and_EPL_volk(d_current_prn_length_samples, | ||||
|                     in, | ||||
|                     d_carr_sign, | ||||
|                     d_early_code, | ||||
|                     d_prompt_code, | ||||
|                     d_late_code, | ||||
|                     d_Early, | ||||
|                     d_Prompt, | ||||
|                     d_Late); | ||||
| #endif | ||||
|             // ################## PLL ########################################################## | ||||
|             // PLL discriminator | ||||
|             carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / GPS_TWO_PI; | ||||
|             // Carrier discriminator filter | ||||
|             carr_error_filt_hz = d_carrier_loop_filter.get_carrier_nco(carr_error_hz); | ||||
|             // New carrier Doppler frequency estimation | ||||
|             d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_error_filt_hz; | ||||
|             // New code Doppler frequency estimation | ||||
|             d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ); | ||||
|             //carrier phase accumulator for (K) doppler estimation | ||||
|             d_acc_carrier_phase_rad -= GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD; | ||||
|             //remnant carrier phase to prevent overflow in the code NCO | ||||
|             d_rem_carr_phase_rad = d_rem_carr_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD; | ||||
|             d_rem_carr_phase_rad = fmod(d_rem_carr_phase_rad, GPS_TWO_PI); | ||||
|  | ||||
|             // ################## DLL ########################################################## | ||||
|             // DLL discriminator | ||||
|             code_error_chips = dll_nc_e_minus_l_normalized(*d_Early, *d_Late); //[chips/Ti] | ||||
|             // Code discriminator filter | ||||
|             code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips); //[chips/second] | ||||
|             //Code phase accumulator | ||||
|             double code_error_filt_secs; | ||||
|             code_error_filt_secs = (GPS_L1_CA_CODE_PERIOD * code_error_filt_chips) / GPS_L1_CA_CODE_RATE_HZ; //[seconds] | ||||
|             d_acc_code_phase_secs = d_acc_code_phase_secs + code_error_filt_secs; | ||||
|  | ||||
|             // ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT ####################### | ||||
|             // keep alignment parameters for the next input buffer | ||||
|             double T_chip_seconds; | ||||
|             double T_prn_seconds; | ||||
|             double T_prn_samples; | ||||
|             double K_blk_samples; | ||||
|             // Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation | ||||
|             T_chip_seconds = 1.0 / d_code_freq_chips; | ||||
|             T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS; | ||||
|             T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in); | ||||
|             K_blk_samples = T_prn_samples + d_rem_code_phase_samples + code_error_filt_secs * static_cast<double>(d_fs_in); | ||||
|             d_current_prn_length_samples = round(K_blk_samples); //round to a discrete samples | ||||
|             //d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample | ||||
|  | ||||
|             // ####### CN0 ESTIMATION AND LOCK DETECTORS ###### | ||||
|             if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES) | ||||
|                 { | ||||
|                     // fill buffer with prompt correlator output values | ||||
|                     d_Prompt_buffer[d_cn0_estimation_counter] = *d_Prompt; | ||||
|                     d_cn0_estimation_counter++; | ||||
|                 } | ||||
|             else | ||||
|                 { | ||||
|                     d_cn0_estimation_counter = 0; | ||||
|                     // Code lock indicator | ||||
|                     d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS); | ||||
|                     // Carrier lock indicator | ||||
|                     d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES); | ||||
|                     // Loss of lock detection | ||||
|                     if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0) | ||||
|                         { | ||||
|                             d_carrier_lock_fail_counter++; | ||||
|                         } | ||||
|                     else | ||||
|                         { | ||||
|                             if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--; | ||||
|                         } | ||||
|                     if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER) | ||||
|                         { | ||||
|                             std::cout << "Loss of lock in channel " << d_channel << "!" << std::endl; | ||||
|                             LOG(INFO) << "Loss of lock in channel " << d_channel << "!"; | ||||
|                             std::unique_ptr<ControlMessageFactory> cmf(new ControlMessageFactory()); | ||||
|                             if (d_queue != gr::msg_queue::sptr()) | ||||
|                                 { | ||||
|                                     d_queue->handle(cmf->GetQueueMessage(d_channel, 2)); | ||||
|                                 } | ||||
|                             d_carrier_lock_fail_counter = 0; | ||||
|                             d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine | ||||
|                         } | ||||
|                 } | ||||
|             // ########### Output the tracking data to navigation and PVT ########## | ||||
|             current_synchro_data.Prompt_I = static_cast<double>((*d_Prompt).real()); | ||||
|             current_synchro_data.Prompt_Q = static_cast<double>((*d_Prompt).imag()); | ||||
|             // Tracking_timestamp_secs is aligned with the PRN start sample | ||||
|             //current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter + (double)d_current_prn_length_samples + (double)d_rem_code_phase_samples) / static_cast<double>(d_fs_in); | ||||
|             current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + static_cast<double>(d_rem_code_phase_samples)) / static_cast<double>(d_fs_in); | ||||
|             d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample | ||||
|             // This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0 | ||||
|             current_synchro_data.Code_phase_secs = 0; | ||||
|             current_synchro_data.Carrier_phase_rads = static_cast<double>(d_acc_carrier_phase_rad); | ||||
|             current_synchro_data.Carrier_Doppler_hz = static_cast<double>(d_carrier_doppler_hz); | ||||
|             current_synchro_data.CN0_dB_hz = static_cast<double>(d_CN0_SNV_dB_Hz); | ||||
|             current_synchro_data.Flag_valid_pseudorange = false; | ||||
|             *out[0] = current_synchro_data; | ||||
|  | ||||
|             // ########## DEBUG OUTPUT | ||||
|             /*! | ||||
|              *  \todo The stop timer has to be moved to the signal source! | ||||
|              */ | ||||
|             if (floor(d_sample_counter / d_fs_in) != d_last_seg) | ||||
|                 { | ||||
|                     d_last_seg = floor(d_sample_counter / d_fs_in); | ||||
|  | ||||
|                     if (d_channel == 0) | ||||
|                         { | ||||
|                             // debug: Second counter in channel 0 | ||||
|                             tmp_str_stream << "Current input signal time = " << d_last_seg << " [s]" << std::endl; | ||||
|                             std::cout << tmp_str_stream.rdbuf(); | ||||
|                             LOG(INFO) << tmp_str_stream.rdbuf(); | ||||
|                         } | ||||
|  | ||||
|                     tmp_str_stream << "Tracking CH " << d_channel <<  ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) | ||||
|                                    << ", Doppler=" << d_carrier_doppler_hz << " [Hz] CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]" << std::endl; | ||||
|                     //std::cout << tmp_str_stream.rdbuf() << std::flush; | ||||
|                     LOG(INFO) << tmp_str_stream.rdbuf(); | ||||
|                     //if (d_channel == 0 || d_last_seg==5) d_carrier_lock_fail_counter=500; //DEBUG: force unlock! | ||||
|                 } | ||||
|         } | ||||
|     else | ||||
|         { | ||||
|             // ########## DEBUG OUTPUT (TIME ONLY for channel 0 when tracking is disabled) | ||||
|             /*! | ||||
|              *  \todo The stop timer has to be moved to the signal source! | ||||
|              */ | ||||
|             // stream to collect cout calls to improve thread safety | ||||
|             std::stringstream tmp_str_stream; | ||||
|             if (floor(d_sample_counter / d_fs_in) != d_last_seg) | ||||
|                 { | ||||
|                     d_last_seg = floor(d_sample_counter / d_fs_in); | ||||
|  | ||||
|                     if (d_channel == 0) | ||||
|                         { | ||||
|                             // debug: Second counter in channel 0 | ||||
|                             tmp_str_stream << "Current input signal time = " << d_last_seg << " [s]" << std::endl << std::flush; | ||||
|                             std::cout << tmp_str_stream.rdbuf() << std::flush; | ||||
|                         } | ||||
|                 } | ||||
|             *d_Early = gr_complex(0,0); | ||||
|             *d_Prompt = gr_complex(0,0); | ||||
|             *d_Late = gr_complex(0,0); | ||||
|             Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0]; //block output streams pointer | ||||
|             // GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder | ||||
|             d_acquisition_gnss_synchro->Flag_valid_pseudorange = false; | ||||
|             *out[0] = *d_acquisition_gnss_synchro; | ||||
|         } | ||||
|  | ||||
|     if(d_dump) | ||||
|         { | ||||
|             // MULTIPLEXED FILE RECORDING - Record results to file | ||||
|             float prompt_I; | ||||
|             float prompt_Q; | ||||
|             float tmp_E, tmp_P, tmp_L; | ||||
|             float tmp_float; | ||||
|             double tmp_double; | ||||
|             prompt_I = (*d_Prompt).real(); | ||||
|             prompt_Q = (*d_Prompt).imag(); | ||||
|             tmp_E = std::abs<float>(*d_Early); | ||||
|             tmp_P = std::abs<float>(*d_Prompt); | ||||
|             tmp_L = std::abs<float>(*d_Late); | ||||
|             try | ||||
|             { | ||||
|                     // EPR | ||||
|                     d_dump_file.write((char*)&tmp_E, sizeof(float)); | ||||
|                     d_dump_file.write((char*)&tmp_P, sizeof(float)); | ||||
|                     d_dump_file.write((char*)&tmp_L, sizeof(float)); | ||||
|                     // PROMPT I and Q (to analyze navigation symbols) | ||||
|                     d_dump_file.write((char*)&prompt_I, sizeof(float)); | ||||
|                     d_dump_file.write((char*)&prompt_Q, sizeof(float)); | ||||
|                     // PRN start sample stamp | ||||
|                     //tmp_float=(float)d_sample_counter; | ||||
|                     d_dump_file.write((char*)&d_sample_counter, sizeof(unsigned long int)); | ||||
|                     // accumulated carrier phase | ||||
|                     tmp_float = d_acc_carrier_phase_rad; | ||||
|                     d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|  | ||||
|                     // carrier and code frequency | ||||
|                     tmp_float = d_carrier_doppler_hz; | ||||
|                     d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                     tmp_float = d_code_freq_chips; | ||||
|                     d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|  | ||||
|                     //PLL commands | ||||
|                     tmp_float = carr_error_hz; | ||||
|                     d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                     tmp_float = carr_error_filt_hz; | ||||
|                     d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|  | ||||
|                     //DLL commands | ||||
|                     tmp_float = code_error_chips; | ||||
|                     d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                     tmp_float = code_error_filt_chips; | ||||
|                     d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|  | ||||
|                     // CN0 and carrier lock test | ||||
|                     tmp_float = d_CN0_SNV_dB_Hz; | ||||
|                     d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                     tmp_float = d_carrier_lock_test; | ||||
|                     d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|  | ||||
|                     // AUX vars (for debug purposes) | ||||
|                     tmp_float = d_rem_code_phase_samples; | ||||
|                     d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                     tmp_double = (double)(d_sample_counter + d_current_prn_length_samples); | ||||
|                     d_dump_file.write((char*)&tmp_double, sizeof(double)); | ||||
|             } | ||||
|             catch (std::ifstream::failure& e) | ||||
|             { | ||||
|                     LOG(WARNING) << "Exception writing trk dump file " << e.what(); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|     consume_each(d_current_prn_length_samples); // this is necesary in gr_block derivates | ||||
|     d_sample_counter += d_current_prn_length_samples; //count for the processed samples | ||||
|     if((noutput_items == 0) || (ninput_items[0] == 0)) | ||||
|         { | ||||
|             LOG(WARNING) << "noutput_items = 0"; | ||||
|         } | ||||
|     return 1; //output tracking result ALWAYS even in the case of d_enable_tracking==false | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel(unsigned int channel) | ||||
| { | ||||
|     d_channel = channel; | ||||
|     LOG(INFO) << "Tracking Channel set to " << d_channel; | ||||
|     // ############# ENABLE DATA FILE LOG ################# | ||||
|     if (d_dump == true) | ||||
|         { | ||||
|             if (d_dump_file.is_open() == false) | ||||
|                 { | ||||
|                     try | ||||
|                     { | ||||
|                             d_dump_filename.append(boost::lexical_cast<std::string>(d_channel)); | ||||
|                             d_dump_filename.append(".dat"); | ||||
|                             d_dump_file.exceptions (std::ifstream::failbit | std::ifstream::badbit); | ||||
|                             d_dump_file.open(d_dump_filename.c_str(), std::ios::out | std::ios::binary); | ||||
|                             LOG(INFO) << "Tracking dump enabled on channel " << d_channel << " Log file: " << d_dump_filename.c_str(); | ||||
|                     } | ||||
|                     catch (std::ifstream::failure& e) | ||||
|                     { | ||||
|                             LOG(WARNING) << "channel " << d_channel << " Exception opening trk dump file " << e.what(); | ||||
|                     } | ||||
|                 } | ||||
|         } | ||||
| } | ||||
|  | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel_queue(concurrent_queue<int> *channel_internal_queue) | ||||
| { | ||||
|     d_channel_internal_queue = channel_internal_queue; | ||||
| } | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro) | ||||
| { | ||||
|     d_acquisition_gnss_synchro = p_gnss_synchro; | ||||
|  | ||||
| } | ||||
| @@ -1,181 +0,0 @@ | ||||
| /*! | ||||
|  * \file gps_l1_ca_dll_pll_optim_tracking_cc.h | ||||
|  * \brief Implementation of a code DLL + carrier PLL tracking block | ||||
|  * \author Javier Arribas, 2012. jarribas(at)cttc.es | ||||
|  * GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED: | ||||
|  * - Code Doppler is not compensated in the local replica | ||||
|  * Code DLL + carrier PLL according to the algorithms described in: | ||||
|  * [1] K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen, | ||||
|  * A Software-Defined GPS and Galileo Receiver. A Single-Frequency | ||||
|  * Approach, Birkha user, 2007 | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  * | ||||
|  * Copyright (C) 2010-2015  (see AUTHORS file for a list of contributors) | ||||
|  * | ||||
|  * GNSS-SDR is a software defined Global Navigation | ||||
|  *          Satellite Systems receiver | ||||
|  * | ||||
|  * This file is part of GNSS-SDR. | ||||
|  * | ||||
|  * GNSS-SDR is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * GNSS-SDR is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  */ | ||||
|  | ||||
| #ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H | ||||
| #define GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H | ||||
|  | ||||
| #include <fstream> | ||||
| #include <map> | ||||
| #include <string> | ||||
| #include <gnuradio/block.h> | ||||
| #include <gnuradio/msg_queue.h> | ||||
| #include "concurrent_queue.h" | ||||
| #include "gnss_synchro.h" | ||||
| #include "tracking_2nd_DLL_filter.h" | ||||
| #include "tracking_2nd_PLL_filter.h" | ||||
| #include "correlator.h" | ||||
|  | ||||
| class Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc; | ||||
|  | ||||
| typedef boost::shared_ptr<Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc> gps_l1_ca_dll_pll_optim_tracking_cc_sptr; | ||||
|  | ||||
| gps_l1_ca_dll_pll_optim_tracking_cc_sptr gps_l1_ca_dll_pll_make_optim_tracking_cc(long if_freq, | ||||
|                                                                                   long fs_in, | ||||
|                                                                                   unsigned int vector_length, | ||||
|                                                                                   boost::shared_ptr<gr::msg_queue> queue, | ||||
|                                                                                   bool dump, | ||||
|                                                                                   std::string dump_filename, | ||||
|                                                                                   float pll_bw_hz, | ||||
|                                                                                   float dll_bw_hz, | ||||
|                                                                                   float early_late_space_chips); | ||||
|  | ||||
|  | ||||
| /*! | ||||
|  * \brief This class implements a DLL + PLL tracking loop block | ||||
|  */ | ||||
| class Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc: public gr::block | ||||
| { | ||||
| public: | ||||
|     ~Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(); | ||||
|  | ||||
|     void set_channel(unsigned int channel); | ||||
|     void set_gnss_synchro(Gnss_Synchro* p_gnss_synchro); | ||||
|     void start_tracking(); | ||||
|     void set_channel_queue(concurrent_queue<int> *channel_internal_queue); | ||||
|  | ||||
|     int general_work (int noutput_items, gr_vector_int &ninput_items, | ||||
|             gr_vector_const_void_star &input_items, gr_vector_void_star &output_items); | ||||
|  | ||||
|     void forecast (int noutput_items, gr_vector_int &ninput_items_required); | ||||
| private: | ||||
|     friend gps_l1_ca_dll_pll_optim_tracking_cc_sptr | ||||
|     gps_l1_ca_dll_pll_make_optim_tracking_cc(long if_freq, | ||||
|             long fs_in, unsigned | ||||
|             int vector_length, | ||||
|             boost::shared_ptr<gr::msg_queue> queue, | ||||
|             bool dump, | ||||
|             std::string dump_filename, | ||||
|             float pll_bw_hz, | ||||
|             float dll_bw_hz, | ||||
|             float early_late_space_chips); | ||||
|  | ||||
|     Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(long if_freq, | ||||
|             long fs_in, unsigned | ||||
|             int vector_length, | ||||
|             boost::shared_ptr<gr::msg_queue> queue, | ||||
|             bool dump, | ||||
|             std::string dump_filename, | ||||
|             float pll_bw_hz, | ||||
|             float dll_bw_hz, | ||||
|             float early_late_space_chips); | ||||
|     void update_local_code(); | ||||
|     void update_local_carrier(); | ||||
|  | ||||
|     // tracking configuration vars | ||||
|     boost::shared_ptr<gr::msg_queue> d_queue; | ||||
|     concurrent_queue<int> *d_channel_internal_queue; | ||||
|     unsigned int d_vector_length; | ||||
|     bool d_dump; | ||||
|  | ||||
|     Gnss_Synchro* d_acquisition_gnss_synchro; | ||||
|     unsigned int d_channel; | ||||
|     int d_last_seg; | ||||
|     long d_if_freq; | ||||
|     long d_fs_in; | ||||
|  | ||||
|     double d_early_late_spc_chips; | ||||
|  | ||||
|     gr_complex* d_ca_code; | ||||
|  | ||||
|     gr_complex* d_early_code; | ||||
|     gr_complex* d_late_code; | ||||
|     gr_complex* d_prompt_code; | ||||
|     gr_complex* d_carr_sign; | ||||
|  | ||||
|     gr_complex *d_Early; | ||||
|     gr_complex *d_Prompt; | ||||
|     gr_complex *d_Late; | ||||
|  | ||||
|     // remaining code phase and carrier phase between tracking loops | ||||
|     double d_rem_code_phase_samples; | ||||
|     double d_rem_carr_phase_rad; | ||||
|  | ||||
|     // PLL and DLL filter library | ||||
|     Tracking_2nd_DLL_filter d_code_loop_filter; | ||||
|     Tracking_2nd_PLL_filter d_carrier_loop_filter; | ||||
|  | ||||
|     // acquisition | ||||
|     double d_acq_code_phase_samples; | ||||
|     double d_acq_carrier_doppler_hz; | ||||
|     // correlator | ||||
|     Correlator d_correlator; | ||||
|  | ||||
|     // tracking vars | ||||
|     double d_code_freq_chips; | ||||
|     double d_carrier_doppler_hz; | ||||
|     double d_acc_carrier_phase_rad; | ||||
|     double d_code_phase_samples; | ||||
|     double d_acc_code_phase_secs; | ||||
|  | ||||
|     //PRN period in samples | ||||
|     int d_current_prn_length_samples; | ||||
|  | ||||
|     //processing samples counters | ||||
|     unsigned long int d_sample_counter; | ||||
|     unsigned long int d_acq_sample_stamp; | ||||
|  | ||||
|     // CN0 estimation and lock detector | ||||
|     int d_cn0_estimation_counter; | ||||
|     gr_complex* d_Prompt_buffer; | ||||
|     double d_carrier_lock_test; | ||||
|     double d_CN0_SNV_dB_Hz; | ||||
|     double d_carrier_lock_threshold; | ||||
|     int d_carrier_lock_fail_counter; | ||||
|  | ||||
|     // control vars | ||||
|     int d_gnuradio_forecast_samples; | ||||
|     bool d_enable_tracking; | ||||
|     bool d_pull_in; | ||||
|  | ||||
|     // file dump | ||||
|     std::string d_dump_filename; | ||||
|     std::ofstream d_dump_file; | ||||
|  | ||||
|     std::map<std::string, std::string> systemName; | ||||
|     std::string sys; | ||||
| }; | ||||
|  | ||||
| #endif //GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H | ||||
| @@ -80,7 +80,6 @@ | ||||
| #include "galileo_e5a_noncoherent_iq_acquisition_caf.h" | ||||
| #include "gps_l1_ca_dll_pll_tracking.h" | ||||
| #include "gps_l1_ca_dll_pll_c_aid_tracking.h" | ||||
| #include "gps_l1_ca_dll_pll_optim_tracking.h" | ||||
| #include "gps_l1_ca_dll_fll_pll_tracking.h" | ||||
| #include "gps_l1_ca_tcp_connector_tracking.h" | ||||
| #include "galileo_e1_dll_pll_veml_tracking.h" | ||||
| @@ -1324,12 +1323,6 @@ std::unique_ptr<GNSSBlockInterface> GNSSBlockFactory::GetBlock( | ||||
|                     out_streams, queue)); | ||||
|             block = std::move(block_); | ||||
|         } | ||||
|     else if (implementation.compare("GPS_L1_CA_DLL_PLL_Optim_Tracking") == 0) | ||||
|         { | ||||
|             std::unique_ptr<GNSSBlockInterface> block_(new GpsL1CaDllPllOptimTracking(configuration.get(), role, in_streams, | ||||
|                     out_streams, queue)); | ||||
|             block = std::move(block_); | ||||
|         } | ||||
|     else if (implementation.compare("GPS_L1_CA_DLL_FLL_PLL_Tracking") == 0) | ||||
|         { | ||||
|             std::unique_ptr<GNSSBlockInterface> block_(new GpsL1CaDllFllPllTracking(configuration.get(), role, in_streams, | ||||
| @@ -1589,12 +1582,6 @@ std::unique_ptr<TrackingInterface> GNSSBlockFactory::GetTrkBlock( | ||||
|                     out_streams, queue)); | ||||
|             block = std::move(block_); | ||||
|         } | ||||
|     else if (implementation.compare("GPS_L1_CA_DLL_PLL_Optim_Tracking") == 0) | ||||
|         { | ||||
|             std::unique_ptr<TrackingInterface> block_(new GpsL1CaDllPllOptimTracking(configuration.get(), role, in_streams, | ||||
|                     out_streams, queue)); | ||||
|             block = std::move(block_); | ||||
|         } | ||||
|     else if (implementation.compare("GPS_L1_CA_DLL_FLL_PLL_Tracking") == 0) | ||||
|         { | ||||
|             std::unique_ptr<TrackingInterface> block_(new GpsL1CaDllFllPllTracking(configuration.get(), role, in_streams, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Carles Fernandez
					Carles Fernandez