From 0b5c827eda1746da1d8f775ad29d743477ae22fe Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Wed, 8 Aug 2018 12:03:58 +0200 Subject: [PATCH 1/2] Add Doppler rate in fast_resampler kernel. Still not used --- ...k_gnsssdr_32f_fast_resamplerxnpuppet_32f.h | 121 ++++--- ...olk_gnsssdr_32f_xn_fast_resampler_32f_xn.h | 335 ++++++++++-------- .../libs/cpu_multicorrelator_real_codes.cc | 3 +- .../libs/cpu_multicorrelator_real_codes.h | 2 +- 4 files changed, 248 insertions(+), 213 deletions(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_fast_resamplerxnpuppet_32f.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_fast_resamplerxnpuppet_32f.h index 9c097953f..8a0e20ffb 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_fast_resamplerxnpuppet_32f.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_fast_resamplerxnpuppet_32f.h @@ -49,7 +49,8 @@ static inline void volk_gnsssdr_32f_fast_resamplerxnpuppet_32f_generic(float* re int code_length_chips = 2046; float code_phase_step_chips = ((float)(code_length_chips) + 0.1) / ((float)num_points); int num_out_vectors = 3; - float rem_code_phase_chips = -0.234; + float rem_code_phase_chips = -0.8234; + float code_phase_rate_step_chips = 1.0 / powf(2.0, 33.0); unsigned int n; float shifts_chips[3] = {-0.1, 0.0, 0.1}; @@ -59,7 +60,7 @@ static inline void volk_gnsssdr_32f_fast_resamplerxnpuppet_32f_generic(float* re result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment()); } - volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); + volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, code_phase_rate_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points); @@ -73,63 +74,65 @@ static inline void volk_gnsssdr_32f_fast_resamplerxnpuppet_32f_generic(float* re #endif /* LV_HAVE_GENERIC */ -//#ifdef LV_HAVE_SSE3 -//static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_a_sse3(float* result, const float* local_code, unsigned int num_points) -//{ -// int code_length_chips = 2046; -// float code_phase_step_chips = ((float)(code_length_chips) + 0.1) / ((float)num_points); -// int num_out_vectors = 3; -// float rem_code_phase_chips = -0.234; -// unsigned int n; -// float shifts_chips[3] = {-0.1, 0.0, 0.1}; -// -// float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment()); -// for (n = 0; n < num_out_vectors; n++) -// { -// result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment()); -// } -// -// volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); -// -// memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points); -// -// for (n = 0; n < num_out_vectors; n++) -// { -// volk_gnsssdr_free(result_aux[n]); -// } -// volk_gnsssdr_free(result_aux); -//} -// -//#endif -// -//#ifdef LV_HAVE_SSE3 -//static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_u_sse3(float* result, const float* local_code, unsigned int num_points) -//{ -// int code_length_chips = 2046; -// float code_phase_step_chips = ((float)(code_length_chips) + 0.1) / ((float)num_points); -// int num_out_vectors = 3; -// float rem_code_phase_chips = -0.234; -// unsigned int n; -// float shifts_chips[3] = {-0.1, 0.0, 0.1}; -// -// float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment()); -// for (n = 0; n < num_out_vectors; n++) -// { -// result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment()); -// } -// -// volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); -// -// memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points); -// -// for (n = 0; n < num_out_vectors; n++) -// { -// volk_gnsssdr_free(result_aux[n]); -// } -// volk_gnsssdr_free(result_aux); -//} -// -//#endif +#ifdef LV_HAVE_SSE3 +static inline void volk_gnsssdr_32f_fast_resamplerxnpuppet_32f_a_sse3(float* result, const float* local_code, unsigned int num_points) +{ + int code_length_chips = 2046; + float code_phase_step_chips = ((float)(code_length_chips) + 0.1) / ((float)num_points); + int num_out_vectors = 3; + float rem_code_phase_chips = -0.8234; + float code_phase_rate_step_chips = 1.0 / powf(2.0, 33.0); + unsigned int n; + float shifts_chips[3] = {-0.1, 0.0, 0.1}; + + float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment()); + for (n = 0; n < num_out_vectors; n++) + { + result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment()); + } + + volk_gnsssdr_32f_xn_fast_resampler_32f_xn_a_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, code_phase_rate_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); + + memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points); + + for (n = 0; n < num_out_vectors; n++) + { + volk_gnsssdr_free(result_aux[n]); + } + volk_gnsssdr_free(result_aux); +} + +#endif + +#ifdef LV_HAVE_SSE3 +static inline void volk_gnsssdr_32f_fast_resamplerxnpuppet_32f_u_sse3(float* result, const float* local_code, unsigned int num_points) +{ + int code_length_chips = 2046; + float code_phase_step_chips = ((float)(code_length_chips) + 0.1) / ((float)num_points); + int num_out_vectors = 3; + float rem_code_phase_chips = -0.8234; + float code_phase_rate_step_chips = 1.0 / powf(2.0, 33.0); + unsigned int n; + float shifts_chips[3] = {-0.1, 0.0, 0.1}; + + float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment()); + for (n = 0; n < num_out_vectors; n++) + { + result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment()); + } + + volk_gnsssdr_32f_xn_fast_resampler_32f_xn_u_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, code_phase_rate_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); + + memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points); + + for (n = 0; n < num_out_vectors; n++) + { + volk_gnsssdr_free(result_aux[n]); + } + volk_gnsssdr_free(result_aux); +} + +#endif // // //#ifdef LV_HAVE_SSE4_1 diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_xn_fast_resampler_32f_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_xn_fast_resampler_32f_xn.h index e622dfe73..c98a9f7c2 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_xn_fast_resampler_32f_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_xn_fast_resampler_32f_xn.h @@ -46,20 +46,21 @@ * * Dispatcher Prototype * \code - * void volk_gnsssdr_32f_xn_fast_resampler_32f_xn(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) + * void volk_gnsssdr_32f_xn_fast_resampler_32f_xn(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) * \endcode * * \b Inputs - * \li local_code: Vector to be resampled. - * \li rem_code_phase_chips: Remnant code phase [chips]. - * \li code_phase_step_chips: Phase increment per sample [chips/sample]. - * \li shifts_chips: Vector of floats that defines the spacing (in chips) between the replicas of \p local_code - * \li code_length_chips: Code length in chips. - * \li num_out_vectors Number of output vectors. - * \li num_points: The number of data values to be in the resampled vector. + * \li local_code: Vector to be resampled. + * \li rem_code_phase_chips: Remnant code phase [chips]. + * \li code_phase_step_chips: Phase increment per sample [chips/sample]. + * \li code_phase_rate_step_chips: Phase rate increment per sample [chips/sample^2]. + * \li shifts_chips: Vector of floats that defines the spacing (in chips) between the replicas of \p local_code + * \li code_length_chips: Code length in chips. + * \li num_out_vectors Number of output vectors. + * \li num_points: The number of data values to be in the resampled vector. * * \b Outputs - * \li result: Pointer to a vector of pointers where the results will be stored. + * \li result: Pointer to a vector of pointers where the results will be stored. * */ @@ -77,7 +78,7 @@ #ifdef LV_HAVE_GENERIC -static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) +static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) { int local_code_chip_index; int current_correlator_tap; @@ -85,9 +86,9 @@ static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(float** res //first correlator for (n = 0; n < num_points; n++) { - // resample code for current tap - local_code_chip_index = (int)floor(code_phase_step_chips * (float)n + shifts_chips[0] - rem_code_phase_chips); - //Take into account that in multitap correlators, the shifts can be negative! + // resample code for first tap + local_code_chip_index = (int)floor(code_phase_step_chips * (float)n + code_phase_rate_step_chips * (float)(n * n) + shifts_chips[0] - rem_code_phase_chips); + // Take into account that in multitap correlators, the shifts can be negative! if (local_code_chip_index < 0) local_code_chip_index += (int)code_length_chips * (abs(local_code_chip_index) / code_length_chips + 1); local_code_chip_index = local_code_chip_index % code_length_chips; result[0][n] = local_code[local_code_chip_index]; @@ -106,145 +107,175 @@ static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(float** res #endif /*LV_HAVE_GENERIC*/ -//#ifdef LV_HAVE_SSE3 -//#include -//static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_a_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) -//{ -// float** _result = result; -// const unsigned int quarterPoints = num_points / 4; -// int current_correlator_tap; -// unsigned int n; -// unsigned int k; -// const __m128 ones = _mm_set1_ps(1.0f); -// const __m128 fours = _mm_set1_ps(4.0f); -// const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); -// const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); -// -// __VOLK_ATTR_ALIGNED(16) -// int local_code_chip_index[4]; -// int local_code_chip_index_; -// -// const __m128i zeros = _mm_setzero_si128(); -// const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips); -// const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips); -// __m128i local_code_chip_index_reg, aux_i, negatives, i; -// __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base; -// -// for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) -// { -// shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); -// aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); -// __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); -// for (n = 0; n < quarterPoints; n++) -// { -// aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); -// aux = _mm_add_ps(aux, aux2); -// // floor -// i = _mm_cvttps_epi32(aux); -// fi = _mm_cvtepi32_ps(i); -// igx = _mm_cmpgt_ps(fi, aux); -// j = _mm_and_ps(igx, ones); -// aux = _mm_sub_ps(fi, j); -// // fmod -// c = _mm_div_ps(aux, code_length_chips_reg_f); -// i = _mm_cvttps_epi32(c); -// cTrunc = _mm_cvtepi32_ps(i); -// base = _mm_mul_ps(cTrunc, code_length_chips_reg_f); -// local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base)); -// -// negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros); -// aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); -// local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); -// _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); -// for (k = 0; k < 4; ++k) -// { -// _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; -// } -// indexn = _mm_add_ps(indexn, fours); -// } -// for (n = quarterPoints * 4; n < num_points; n++) -// { -// // resample code for current tap -// local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); -// //Take into account that in multitap correlators, the shifts can be negative! -// if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1); -// local_code_chip_index_ = local_code_chip_index_ % code_length_chips; -// _result[current_correlator_tap][n] = local_code[local_code_chip_index_]; -// } -// } -//} -// -//#endif -// -// -//#ifdef LV_HAVE_SSE3 -//#include -//static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_u_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) -//{ -// float** _result = result; -// const unsigned int quarterPoints = num_points / 4; -// int current_correlator_tap; -// unsigned int n; -// unsigned int k; -// const __m128 ones = _mm_set1_ps(1.0f); -// const __m128 fours = _mm_set1_ps(4.0f); -// const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); -// const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); -// -// __VOLK_ATTR_ALIGNED(16) -// int local_code_chip_index[4]; -// int local_code_chip_index_; -// -// const __m128i zeros = _mm_setzero_si128(); -// const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips); -// const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips); -// __m128i local_code_chip_index_reg, aux_i, negatives, i; -// __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base; -// -// for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) -// { -// shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); -// aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); -// __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); -// for (n = 0; n < quarterPoints; n++) -// { -// aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); -// aux = _mm_add_ps(aux, aux2); -// // floor -// i = _mm_cvttps_epi32(aux); -// fi = _mm_cvtepi32_ps(i); -// igx = _mm_cmpgt_ps(fi, aux); -// j = _mm_and_ps(igx, ones); -// aux = _mm_sub_ps(fi, j); -// // fmod -// c = _mm_div_ps(aux, code_length_chips_reg_f); -// i = _mm_cvttps_epi32(c); -// cTrunc = _mm_cvtepi32_ps(i); -// base = _mm_mul_ps(cTrunc, code_length_chips_reg_f); -// local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base)); -// -// negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros); -// aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); -// local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); -// _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); -// for (k = 0; k < 4; ++k) -// { -// _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; -// } -// indexn = _mm_add_ps(indexn, fours); -// } -// for (n = quarterPoints * 4; n < num_points; n++) -// { -// // resample code for current tap -// local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); -// //Take into account that in multitap correlators, the shifts can be negative! -// if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1); -// local_code_chip_index_ = local_code_chip_index_ % code_length_chips; -// _result[current_correlator_tap][n] = local_code[local_code_chip_index_]; -// } -// } -//} -//#endif +#ifdef LV_HAVE_SSE3 +#include +static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_a_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) +{ + float** _result = result; + const unsigned int quarterPoints = num_points / 4; + // int current_correlator_tap; + unsigned int n; + unsigned int k; + unsigned int current_correlator_tap; + const __m128 ones = _mm_set1_ps(1.0f); + const __m128 fours = _mm_set1_ps(4.0f); + const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); + const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); + const __m128 code_phase_rate_step_chips_reg = _mm_set_ps1(code_phase_rate_step_chips); + + __VOLK_ATTR_ALIGNED(16) + int local_code_chip_index[4]; + int local_code_chip_index_; + const __m128i zeros = _mm_setzero_si128(); + const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips); + const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips); + __m128i local_code_chip_index_reg, aux_i, negatives; + __m128 aux, aux2, aux3, indexnn, shifts_chips_reg, i, fi, igx, j, c, cTrunc, base; + __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); + + shifts_chips_reg = _mm_set_ps1((float)shifts_chips[0]); + aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); + + for (n = 0; n < quarterPoints; n++) + { + aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); + indexnn = _mm_mul_ps(indexn, indexn); + aux3 = _mm_mul_ps(code_phase_rate_step_chips_reg, indexnn); + aux = _mm_add_ps(aux, aux3); + aux = _mm_add_ps(aux, aux2); + // floor + i = _mm_cvttps_epi32(aux); + fi = _mm_cvtepi32_ps(i); + igx = _mm_cmpgt_ps(fi, aux); + j = _mm_and_ps(igx, ones); + aux = _mm_sub_ps(fi, j); + + // Correct negative shift + c = _mm_div_ps(aux, code_length_chips_reg_f); + aux3 = _mm_add_ps(c, ones); + i = _mm_cvttps_epi32(aux3); + cTrunc = _mm_cvtepi32_ps(i); + base = _mm_mul_ps(cTrunc, code_length_chips_reg_f); + local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base)); + negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros); + aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); + local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); + + _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); + + for (k = 0; k < 4; ++k) + { + _result[0][n * 4 + k] = local_code[local_code_chip_index[k]]; + } + indexn = _mm_add_ps(indexn, fours); + } + + for (n = quarterPoints * 4; n < num_points; n++) + { + // resample code for first tap + local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + code_phase_rate_step_chips * (float)(n * n) + shifts_chips[0] - rem_code_phase_chips); + // Take into account that in multitap correlators, the shifts can be negative! + if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1); + local_code_chip_index_ = local_code_chip_index_ % code_length_chips; + _result[0][n] = local_code[local_code_chip_index_]; + } + + // adjacent correlators + unsigned int shift_samples = 0; + for (current_correlator_tap = 1; current_correlator_tap < num_out_vectors; current_correlator_tap++) + { + shift_samples += (int)round((shifts_chips[current_correlator_tap] - shifts_chips[current_correlator_tap - 1]) / code_phase_step_chips); + memcpy(&_result[current_correlator_tap][0], &_result[0][shift_samples], (num_points - shift_samples) * sizeof(float)); + memcpy(&_result[current_correlator_tap][num_points - shift_samples], &_result[0][0], shift_samples * sizeof(float)); + } +} +#endif + + +#ifdef LV_HAVE_SSE3 +#include +static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_u_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) +{ + float** _result = result; + const unsigned int quarterPoints = num_points / 4; + // int current_correlator_tap; + unsigned int n; + unsigned int k; + unsigned int current_correlator_tap; + const __m128 ones = _mm_set1_ps(1.0f); + const __m128 fours = _mm_set1_ps(4.0f); + const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); + const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); + const __m128 code_phase_rate_step_chips_reg = _mm_set_ps1(code_phase_rate_step_chips); + + __VOLK_ATTR_ALIGNED(16) + int local_code_chip_index[4]; + int local_code_chip_index_; + const __m128i zeros = _mm_setzero_si128(); + const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips); + const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips); + __m128i local_code_chip_index_reg, aux_i, negatives; + __m128 aux, aux2, aux3, indexnn, shifts_chips_reg, i, fi, igx, j, c, cTrunc, base; + __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); + + shifts_chips_reg = _mm_set_ps1((float)shifts_chips[0]); + aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); + + for (n = 0; n < quarterPoints; n++) + { + aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); + indexnn = _mm_mul_ps(indexn, indexn); + aux3 = _mm_mul_ps(code_phase_rate_step_chips_reg, indexnn); + aux = _mm_add_ps(aux, aux3); + aux = _mm_add_ps(aux, aux2); + // floor + i = _mm_cvttps_epi32(aux); + fi = _mm_cvtepi32_ps(i); + igx = _mm_cmpgt_ps(fi, aux); + j = _mm_and_ps(igx, ones); + aux = _mm_sub_ps(fi, j); + + // Correct negative shift + c = _mm_div_ps(aux, code_length_chips_reg_f); + aux3 = _mm_add_ps(c, ones); + i = _mm_cvttps_epi32(aux3); + cTrunc = _mm_cvtepi32_ps(i); + base = _mm_mul_ps(cTrunc, code_length_chips_reg_f); + local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base)); + negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros); + aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); + local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); + + _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); + + for (k = 0; k < 4; ++k) + { + _result[0][n * 4 + k] = local_code[local_code_chip_index[k]]; + } + indexn = _mm_add_ps(indexn, fours); + } + + for (n = quarterPoints * 4; n < num_points; n++) + { + // resample code for first tap + local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + code_phase_rate_step_chips * (float)(n * n) + shifts_chips[0] - rem_code_phase_chips); + // Take into account that in multitap correlators, the shifts can be negative! + if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1); + local_code_chip_index_ = local_code_chip_index_ % code_length_chips; + _result[0][n] = local_code[local_code_chip_index_]; + } + + // adjacent correlators + unsigned int shift_samples = 0; + for (current_correlator_tap = 1; current_correlator_tap < num_out_vectors; current_correlator_tap++) + { + shift_samples += (int)round((shifts_chips[current_correlator_tap] - shifts_chips[current_correlator_tap - 1]) / code_phase_step_chips); + memcpy(&_result[current_correlator_tap][0], &_result[0][shift_samples], (num_points - shift_samples) * sizeof(float)); + memcpy(&_result[current_correlator_tap][num_points - shift_samples], &_result[0][0], shift_samples * sizeof(float)); + } +} + +#endif // // //#ifdef LV_HAVE_SSE4_1 diff --git a/src/algorithms/tracking/libs/cpu_multicorrelator_real_codes.cc b/src/algorithms/tracking/libs/cpu_multicorrelator_real_codes.cc index 967d66047..e54d77177 100644 --- a/src/algorithms/tracking/libs/cpu_multicorrelator_real_codes.cc +++ b/src/algorithms/tracking/libs/cpu_multicorrelator_real_codes.cc @@ -98,7 +98,7 @@ bool cpu_multicorrelator_real_codes::set_input_output_vectors(std::complex *corr_out, const std::complex *sig_in); - void update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips); + void update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips = 0.0); bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float rem_code_phase_chips, float code_phase_step_chips, int signal_length_samples); bool free(); From 69803b55da6add58bc5c81adbc09a7c515117bc2 Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Wed, 8 Aug 2018 15:02:29 +0200 Subject: [PATCH 2/2] Remove stream_to_vector in generic acquisition block --- .../galileo_e1_pcps_ambiguous_acquisition.cc | 25 +++---- .../galileo_e1_pcps_ambiguous_acquisition.h | 2 - .../adapters/galileo_e5a_pcps_acquisition.cc | 11 ++- .../adapters/galileo_e5a_pcps_acquisition.h | 2 - .../glonass_l1_ca_pcps_acquisition.cc | 21 +++--- .../adapters/glonass_l1_ca_pcps_acquisition.h | 2 - .../glonass_l2_ca_pcps_acquisition.cc | 25 +++---- .../adapters/glonass_l2_ca_pcps_acquisition.h | 2 - .../adapters/gps_l1_ca_pcps_acquisition.cc | 26 +++---- .../adapters/gps_l1_ca_pcps_acquisition.h | 2 - .../adapters/gps_l2_m_pcps_acquisition.cc | 25 +++---- .../adapters/gps_l2_m_pcps_acquisition.h | 2 - .../adapters/gps_l5i_pcps_acquisition.cc | 24 +++---- .../adapters/gps_l5i_pcps_acquisition.h | 2 - .../gnuradio_blocks/pcps_acquisition.cc | 68 ++++++++++++++----- .../gnuradio_blocks/pcps_acquisition.h | 1 + 16 files changed, 115 insertions(+), 125 deletions(-) diff --git a/src/algorithms/acquisition/adapters/galileo_e1_pcps_ambiguous_acquisition.cc b/src/algorithms/acquisition/adapters/galileo_e1_pcps_ambiguous_acquisition.cc index cfbe8199e..065da5193 100644 --- a/src/algorithms/acquisition/adapters/galileo_e1_pcps_ambiguous_acquisition.cc +++ b/src/algorithms/acquisition/adapters/galileo_e1_pcps_ambiguous_acquisition.cc @@ -115,9 +115,6 @@ GalileoE1PcpsAmbiguousAcquisition::GalileoE1PcpsAmbiguousAcquisition( acquisition_ = pcps_make_acquisition(acq_parameters); DLOG(INFO) << "acquisition(" << acquisition_->unique_id() << ")"; - stream_to_vector_ = gr::blocks::stream_to_vector::make(item_size_, vector_length_); - DLOG(INFO) << "stream_to_vector(" << stream_to_vector_->unique_id() << ")"; - if (item_type_.compare("cbyte") == 0) { cbyte_to_float_x2_ = make_complex_byte_to_float_x2(); @@ -278,18 +275,19 @@ void GalileoE1PcpsAmbiguousAcquisition::connect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cshort") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cbyte") == 0) { + // Since a byte-based acq implementation is not available, + // we just convert cshorts to gr_complex top_block->connect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->connect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->connect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + top_block->connect(float_to_complex_, 0, acquisition_, 0); } else { @@ -302,20 +300,17 @@ void GalileoE1PcpsAmbiguousAcquisition::disconnect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cshort") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cbyte") == 0) { - // Since a byte-based acq implementation is not available, - // we just convert cshorts to gr_complex top_block->disconnect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->disconnect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->disconnect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + top_block->disconnect(float_to_complex_, 0, acquisition_, 0); } else { @@ -328,11 +323,11 @@ gr::basic_block_sptr GalileoE1PcpsAmbiguousAcquisition::get_left_block() { if (item_type_.compare("gr_complex") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cshort") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cbyte") == 0) { diff --git a/src/algorithms/acquisition/adapters/galileo_e1_pcps_ambiguous_acquisition.h b/src/algorithms/acquisition/adapters/galileo_e1_pcps_ambiguous_acquisition.h index 56f79774d..7390758d1 100644 --- a/src/algorithms/acquisition/adapters/galileo_e1_pcps_ambiguous_acquisition.h +++ b/src/algorithms/acquisition/adapters/galileo_e1_pcps_ambiguous_acquisition.h @@ -36,7 +36,6 @@ #include "gnss_synchro.h" #include "pcps_acquisition.h" #include "complex_byte_to_float_x2.h" -#include #include #include #include @@ -135,7 +134,6 @@ public: private: ConfigurationInterface* configuration_; pcps_acquisition_sptr acquisition_; - gr::blocks::stream_to_vector::sptr stream_to_vector_; gr::blocks::float_to_complex::sptr float_to_complex_; complex_byte_to_float_x2_sptr cbyte_to_float_x2_; size_t item_size_; diff --git a/src/algorithms/acquisition/adapters/galileo_e5a_pcps_acquisition.cc b/src/algorithms/acquisition/adapters/galileo_e5a_pcps_acquisition.cc index 037ba3152..eef7ee66d 100644 --- a/src/algorithms/acquisition/adapters/galileo_e5a_pcps_acquisition.cc +++ b/src/algorithms/acquisition/adapters/galileo_e5a_pcps_acquisition.cc @@ -111,7 +111,6 @@ GalileoE5aPcpsAcquisition::GalileoE5aPcpsAcquisition(ConfigurationInterface* con acq_parameters.blocking_on_standby = configuration_->property(role + ".blocking_on_standby", false); acquisition_ = pcps_make_acquisition(acq_parameters); - stream_to_vector_ = gr::blocks::stream_to_vector::make(item_size_, vector_length_); channel_ = 0; threshold_ = 0.0; doppler_step_ = 0; @@ -263,11 +262,11 @@ void GalileoE5aPcpsAcquisition::connect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cshort") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else { @@ -280,11 +279,11 @@ void GalileoE5aPcpsAcquisition::disconnect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cshort") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else { @@ -295,7 +294,7 @@ void GalileoE5aPcpsAcquisition::disconnect(gr::top_block_sptr top_block) gr::basic_block_sptr GalileoE5aPcpsAcquisition::get_left_block() { - return stream_to_vector_; + return acquisition_; } diff --git a/src/algorithms/acquisition/adapters/galileo_e5a_pcps_acquisition.h b/src/algorithms/acquisition/adapters/galileo_e5a_pcps_acquisition.h index ebea0a5e6..344e8f5b6 100644 --- a/src/algorithms/acquisition/adapters/galileo_e5a_pcps_acquisition.h +++ b/src/algorithms/acquisition/adapters/galileo_e5a_pcps_acquisition.h @@ -35,7 +35,6 @@ #include "acquisition_interface.h" #include "gnss_synchro.h" #include "pcps_acquisition.h" -#include #include class ConfigurationInterface; @@ -129,7 +128,6 @@ private: ConfigurationInterface* configuration_; pcps_acquisition_sptr acquisition_; - gr::blocks::stream_to_vector::sptr stream_to_vector_; size_t item_size_; diff --git a/src/algorithms/acquisition/adapters/glonass_l1_ca_pcps_acquisition.cc b/src/algorithms/acquisition/adapters/glonass_l1_ca_pcps_acquisition.cc index 3aedd6f0b..90cfc7014 100644 --- a/src/algorithms/acquisition/adapters/glonass_l1_ca_pcps_acquisition.cc +++ b/src/algorithms/acquisition/adapters/glonass_l1_ca_pcps_acquisition.cc @@ -110,9 +110,6 @@ GlonassL1CaPcpsAcquisition::GlonassL1CaPcpsAcquisition( acquisition_ = pcps_make_acquisition(acq_parameters); DLOG(INFO) << "acquisition(" << acquisition_->unique_id() << ")"; - stream_to_vector_ = gr::blocks::stream_to_vector::make(item_size_, vector_length_); - DLOG(INFO) << "stream_to_vector(" << stream_to_vector_->unique_id() << ")"; - if (item_type_.compare("cbyte") == 0) { cbyte_to_float_x2_ = make_complex_byte_to_float_x2(); @@ -262,18 +259,17 @@ void GlonassL1CaPcpsAcquisition::connect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cshort") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cbyte") == 0) { top_block->connect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->connect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->connect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + top_block->connect(float_to_complex_, 0, acquisition_, 0); } else { @@ -286,11 +282,11 @@ void GlonassL1CaPcpsAcquisition::disconnect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cshort") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cbyte") == 0) { @@ -298,8 +294,7 @@ void GlonassL1CaPcpsAcquisition::disconnect(gr::top_block_sptr top_block) // we just convert cshorts to gr_complex top_block->disconnect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->disconnect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->disconnect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + top_block->disconnect(float_to_complex_, 0, acquisition_, 0); } else { @@ -312,11 +307,11 @@ gr::basic_block_sptr GlonassL1CaPcpsAcquisition::get_left_block() { if (item_type_.compare("gr_complex") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cshort") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cbyte") == 0) { diff --git a/src/algorithms/acquisition/adapters/glonass_l1_ca_pcps_acquisition.h b/src/algorithms/acquisition/adapters/glonass_l1_ca_pcps_acquisition.h index 8a956e045..6f4947917 100644 --- a/src/algorithms/acquisition/adapters/glonass_l1_ca_pcps_acquisition.h +++ b/src/algorithms/acquisition/adapters/glonass_l1_ca_pcps_acquisition.h @@ -38,7 +38,6 @@ #include "gnss_synchro.h" #include "pcps_acquisition.h" #include "complex_byte_to_float_x2.h" -#include #include #include @@ -135,7 +134,6 @@ public: private: ConfigurationInterface* configuration_; pcps_acquisition_sptr acquisition_; - gr::blocks::stream_to_vector::sptr stream_to_vector_; gr::blocks::float_to_complex::sptr float_to_complex_; complex_byte_to_float_x2_sptr cbyte_to_float_x2_; size_t item_size_; diff --git a/src/algorithms/acquisition/adapters/glonass_l2_ca_pcps_acquisition.cc b/src/algorithms/acquisition/adapters/glonass_l2_ca_pcps_acquisition.cc index a849bc661..86052d6f2 100644 --- a/src/algorithms/acquisition/adapters/glonass_l2_ca_pcps_acquisition.cc +++ b/src/algorithms/acquisition/adapters/glonass_l2_ca_pcps_acquisition.cc @@ -109,9 +109,6 @@ GlonassL2CaPcpsAcquisition::GlonassL2CaPcpsAcquisition( acquisition_ = pcps_make_acquisition(acq_parameters); DLOG(INFO) << "acquisition(" << acquisition_->unique_id() << ")"; - stream_to_vector_ = gr::blocks::stream_to_vector::make(item_size_, vector_length_); - DLOG(INFO) << "stream_to_vector(" << stream_to_vector_->unique_id() << ")"; - if (item_type_.compare("cbyte") == 0) { cbyte_to_float_x2_ = make_complex_byte_to_float_x2(); @@ -261,18 +258,19 @@ void GlonassL2CaPcpsAcquisition::connect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cshort") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cbyte") == 0) { + // Since a byte-based acq implementation is not available, + // we just convert cshorts to gr_complex top_block->connect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->connect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->connect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + top_block->connect(float_to_complex_, 0, acquisition_, 0); } else { @@ -285,20 +283,17 @@ void GlonassL2CaPcpsAcquisition::disconnect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cshort") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cbyte") == 0) { - // Since a byte-based acq implementation is not available, - // we just convert cshorts to gr_complex top_block->disconnect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->disconnect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->disconnect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + top_block->disconnect(float_to_complex_, 0, acquisition_, 0); } else { @@ -311,11 +306,11 @@ gr::basic_block_sptr GlonassL2CaPcpsAcquisition::get_left_block() { if (item_type_.compare("gr_complex") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cshort") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cbyte") == 0) { diff --git a/src/algorithms/acquisition/adapters/glonass_l2_ca_pcps_acquisition.h b/src/algorithms/acquisition/adapters/glonass_l2_ca_pcps_acquisition.h index 73162f6f3..f25412d2b 100644 --- a/src/algorithms/acquisition/adapters/glonass_l2_ca_pcps_acquisition.h +++ b/src/algorithms/acquisition/adapters/glonass_l2_ca_pcps_acquisition.h @@ -37,7 +37,6 @@ #include "gnss_synchro.h" #include "pcps_acquisition.h" #include "complex_byte_to_float_x2.h" -#include #include #include @@ -134,7 +133,6 @@ public: private: ConfigurationInterface* configuration_; pcps_acquisition_sptr acquisition_; - gr::blocks::stream_to_vector::sptr stream_to_vector_; gr::blocks::float_to_complex::sptr float_to_complex_; complex_byte_to_float_x2_sptr cbyte_to_float_x2_; size_t item_size_; diff --git a/src/algorithms/acquisition/adapters/gps_l1_ca_pcps_acquisition.cc b/src/algorithms/acquisition/adapters/gps_l1_ca_pcps_acquisition.cc index e9a034873..b91223b06 100644 --- a/src/algorithms/acquisition/adapters/gps_l1_ca_pcps_acquisition.cc +++ b/src/algorithms/acquisition/adapters/gps_l1_ca_pcps_acquisition.cc @@ -105,9 +105,6 @@ GpsL1CaPcpsAcquisition::GpsL1CaPcpsAcquisition( acquisition_ = pcps_make_acquisition(acq_parameters); DLOG(INFO) << "acquisition(" << acquisition_->unique_id() << ")"; - stream_to_vector_ = gr::blocks::stream_to_vector::make(item_size_, vector_length_); - DLOG(INFO) << "stream_to_vector(" << stream_to_vector_->unique_id() << ")"; - if (item_type_.compare("cbyte") == 0) { cbyte_to_float_x2_ = make_complex_byte_to_float_x2(); @@ -194,7 +191,6 @@ signed int GpsL1CaPcpsAcquisition::mag() void GpsL1CaPcpsAcquisition::init() { acquisition_->init(); - //set_local_code(); } @@ -251,18 +247,19 @@ void GpsL1CaPcpsAcquisition::connect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cshort") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cbyte") == 0) { + // Since a byte-based acq implementation is not available, + // we just convert cshorts to gr_complex top_block->connect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->connect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->connect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + top_block->connect(float_to_complex_, 0, acquisition_, 0); } else { @@ -275,20 +272,17 @@ void GpsL1CaPcpsAcquisition::disconnect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cshort") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cbyte") == 0) { - // Since a byte-based acq implementation is not available, - // we just convert cshorts to gr_complex top_block->disconnect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->disconnect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->disconnect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + top_block->disconnect(float_to_complex_, 0, acquisition_, 0); } else { @@ -301,11 +295,11 @@ gr::basic_block_sptr GpsL1CaPcpsAcquisition::get_left_block() { if (item_type_.compare("gr_complex") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cshort") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cbyte") == 0) { diff --git a/src/algorithms/acquisition/adapters/gps_l1_ca_pcps_acquisition.h b/src/algorithms/acquisition/adapters/gps_l1_ca_pcps_acquisition.h index a5ad9ef67..2a4126bd8 100644 --- a/src/algorithms/acquisition/adapters/gps_l1_ca_pcps_acquisition.h +++ b/src/algorithms/acquisition/adapters/gps_l1_ca_pcps_acquisition.h @@ -40,7 +40,6 @@ #include "gnss_synchro.h" #include "pcps_acquisition.h" #include "complex_byte_to_float_x2.h" -#include #include #include #include @@ -139,7 +138,6 @@ public: private: ConfigurationInterface* configuration_; pcps_acquisition_sptr acquisition_; - gr::blocks::stream_to_vector::sptr stream_to_vector_; gr::blocks::float_to_complex::sptr float_to_complex_; complex_byte_to_float_x2_sptr cbyte_to_float_x2_; size_t item_size_; diff --git a/src/algorithms/acquisition/adapters/gps_l2_m_pcps_acquisition.cc b/src/algorithms/acquisition/adapters/gps_l2_m_pcps_acquisition.cc index 7dab0a72a..806adad4e 100644 --- a/src/algorithms/acquisition/adapters/gps_l2_m_pcps_acquisition.cc +++ b/src/algorithms/acquisition/adapters/gps_l2_m_pcps_acquisition.cc @@ -112,9 +112,6 @@ GpsL2MPcpsAcquisition::GpsL2MPcpsAcquisition( acquisition_ = pcps_make_acquisition(acq_parameters); DLOG(INFO) << "acquisition(" << acquisition_->unique_id() << ")"; - stream_to_vector_ = gr::blocks::stream_to_vector::make(item_size_, vector_length_); - DLOG(INFO) << "stream_to_vector(" << stream_to_vector_->unique_id() << ")"; - if (item_type_.compare("cbyte") == 0) { cbyte_to_float_x2_ = make_complex_byte_to_float_x2(); @@ -264,18 +261,19 @@ void GpsL2MPcpsAcquisition::connect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cshort") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cbyte") == 0) { + // Since a byte-based acq implementation is not available, + // we just convert cshorts to gr_complex top_block->connect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->connect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->connect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + top_block->connect(float_to_complex_, 0, acquisition_, 0); } else { @@ -288,20 +286,17 @@ void GpsL2MPcpsAcquisition::disconnect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cshort") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cbyte") == 0) { - // Since a byte-based acq implementation is not available, - // we just convert cshorts to gr_complex top_block->disconnect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->disconnect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->disconnect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + top_block->disconnect(float_to_complex_, 0, acquisition_, 0); } else { @@ -314,11 +309,11 @@ gr::basic_block_sptr GpsL2MPcpsAcquisition::get_left_block() { if (item_type_.compare("gr_complex") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cshort") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cbyte") == 0) { diff --git a/src/algorithms/acquisition/adapters/gps_l2_m_pcps_acquisition.h b/src/algorithms/acquisition/adapters/gps_l2_m_pcps_acquisition.h index 570de69d0..f1e57b9b5 100644 --- a/src/algorithms/acquisition/adapters/gps_l2_m_pcps_acquisition.h +++ b/src/algorithms/acquisition/adapters/gps_l2_m_pcps_acquisition.h @@ -38,7 +38,6 @@ #include "gnss_synchro.h" #include "pcps_acquisition.h" #include "complex_byte_to_float_x2.h" -#include #include #include #include @@ -137,7 +136,6 @@ public: private: ConfigurationInterface* configuration_; pcps_acquisition_sptr acquisition_; - gr::blocks::stream_to_vector::sptr stream_to_vector_; gr::blocks::float_to_complex::sptr float_to_complex_; complex_byte_to_float_x2_sptr cbyte_to_float_x2_; size_t item_size_; diff --git a/src/algorithms/acquisition/adapters/gps_l5i_pcps_acquisition.cc b/src/algorithms/acquisition/adapters/gps_l5i_pcps_acquisition.cc index 377938cef..a0b072a31 100644 --- a/src/algorithms/acquisition/adapters/gps_l5i_pcps_acquisition.cc +++ b/src/algorithms/acquisition/adapters/gps_l5i_pcps_acquisition.cc @@ -103,8 +103,6 @@ GpsL5iPcpsAcquisition::GpsL5iPcpsAcquisition( acq_parameters.blocking_on_standby = configuration_->property(role + ".blocking_on_standby", false); acquisition_ = pcps_make_acquisition(acq_parameters); DLOG(INFO) << "acquisition(" << acquisition_->unique_id() << ")"; - stream_to_vector_ = gr::blocks::stream_to_vector::make(item_size_, vector_length_); - DLOG(INFO) << "stream_to_vector(" << stream_to_vector_->unique_id() << ")"; if (item_type_.compare("cbyte") == 0) { @@ -251,18 +249,19 @@ void GpsL5iPcpsAcquisition::connect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cshort") == 0) { - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + // nothing to connect } else if (item_type_.compare("cbyte") == 0) { + // Since a byte-based acq implementation is not available, + // we just convert cshorts to gr_complex top_block->connect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->connect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->connect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->connect(stream_to_vector_, 0, acquisition_, 0); + top_block->connect(float_to_complex_, 0, acquisition_, 0); } else { @@ -275,20 +274,17 @@ void GpsL5iPcpsAcquisition::disconnect(gr::top_block_sptr top_block) { if (item_type_.compare("gr_complex") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cshort") == 0) { - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + // nothing to disconnect } else if (item_type_.compare("cbyte") == 0) { - // Since a byte-based acq implementation is not available, - // we just convert cshorts to gr_complex top_block->disconnect(cbyte_to_float_x2_, 0, float_to_complex_, 0); top_block->disconnect(cbyte_to_float_x2_, 1, float_to_complex_, 1); - top_block->disconnect(float_to_complex_, 0, stream_to_vector_, 0); - top_block->disconnect(stream_to_vector_, 0, acquisition_, 0); + top_block->disconnect(float_to_complex_, 0, acquisition_, 0); } else { @@ -301,11 +297,11 @@ gr::basic_block_sptr GpsL5iPcpsAcquisition::get_left_block() { if (item_type_.compare("gr_complex") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cshort") == 0) { - return stream_to_vector_; + return acquisition_; } else if (item_type_.compare("cbyte") == 0) { diff --git a/src/algorithms/acquisition/adapters/gps_l5i_pcps_acquisition.h b/src/algorithms/acquisition/adapters/gps_l5i_pcps_acquisition.h index a871b9c8a..2b4d86eeb 100644 --- a/src/algorithms/acquisition/adapters/gps_l5i_pcps_acquisition.h +++ b/src/algorithms/acquisition/adapters/gps_l5i_pcps_acquisition.h @@ -38,7 +38,6 @@ #include "gnss_synchro.h" #include "pcps_acquisition.h" #include "complex_byte_to_float_x2.h" -#include #include #include #include @@ -137,7 +136,6 @@ public: private: ConfigurationInterface* configuration_; pcps_acquisition_sptr acquisition_; - gr::blocks::stream_to_vector::sptr stream_to_vector_; gr::blocks::float_to_complex::sptr float_to_complex_; complex_byte_to_float_x2_sptr cbyte_to_float_x2_; size_t item_size_; diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition.cc b/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition.cc index 303b597a1..3a8c3cdf6 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition.cc @@ -52,7 +52,7 @@ pcps_acquisition_sptr pcps_make_acquisition(const Acq_Conf& conf_) pcps_acquisition::pcps_acquisition(const Acq_Conf& conf_) : gr::block("pcps_acquisition", - gr::io_signature::make(1, 1, conf_.it_size * std::floor(conf_.sampled_ms * conf_.samples_per_ms) * (conf_.bit_transition_flag ? 2 : 1)), + gr::io_signature::make(1, 1, conf_.it_size), gr::io_signature::make(0, 0, conf_.it_size)) { this->message_port_register_out(pmt::mp("events")); @@ -73,7 +73,7 @@ pcps_acquisition::pcps_acquisition(const Acq_Conf& conf_) : gr::block("pcps_acqu { d_fft_size = d_consumed_samples * 2; } - //d_fft_size = next power of two? //// + // d_fft_size = next power of two? //// d_mag = 0; d_input_power = 0.0; d_num_doppler_bins = 0; @@ -137,6 +137,7 @@ pcps_acquisition::pcps_acquisition(const Acq_Conf& conf_) : gr::block("pcps_acqu d_dump_number = 0; d_dump_channel = acq_parameters.dump_channel; d_samplesPerChip = acq_parameters.samples_per_chip; + d_buffer_count = 0; // todo: CFAR statistic not available for non-coherent integration if (acq_parameters.max_dwells == 1) { @@ -347,8 +348,8 @@ void pcps_acquisition::set_state(int state) void pcps_acquisition::send_positive_acquisition() { - // 6.1- Declare positive acquisition using a message port - //0=STOP_CHANNEL 1=ACQ_SUCCEES 2=ACQ_FAIL + // Declare positive acquisition using a message port + // 0=STOP_CHANNEL 1=ACQ_SUCCEES 2=ACQ_FAIL DLOG(INFO) << "positive acquisition" << ", satellite " << d_gnss_synchro->System << " " << d_gnss_synchro->PRN << ", sample_stamp " << d_sample_counter @@ -365,8 +366,8 @@ void pcps_acquisition::send_positive_acquisition() void pcps_acquisition::send_negative_acquisition() { - // 6.2- Declare negative acquisition using a message port - //0=STOP_CHANNEL 1=ACQ_SUCCEES 2=ACQ_FAIL + // Declare negative acquisition using a message port + // 0=STOP_CHANNEL 1=ACQ_SUCCEES 2=ACQ_FAIL DLOG(INFO) << "negative acquisition" << ", satellite " << d_gnss_synchro->System << " " << d_gnss_synchro->PRN << ", sample_stamp " << d_sample_counter @@ -564,7 +565,7 @@ void pcps_acquisition::acquisition_core(unsigned long int samp_count) { gr::thread::scoped_lock lk(d_setlock); - // initialize acquisition algorithm + // Initialize acquisition algorithm int doppler = 0; uint32_t indext = 0; int effective_fft_size = (acq_parameters.bit_transition_flag ? d_fft_size / 2 : d_fft_size); @@ -658,7 +659,7 @@ void pcps_acquisition::acquisition_core(unsigned long int samp_count) { volk_32fc_x2_multiply_32fc(d_fft_if->get_inbuf(), in, d_grid_doppler_wipeoffs_step_two[doppler_index], d_fft_size); - // 3- Perform the FFT-based convolution (parallel time search) + // Perform the FFT-based convolution (parallel time search) // Compute the FFT of the carrier wiped--off incoming signal d_fft_if->execute(); @@ -803,7 +804,7 @@ int pcps_acquisition::general_work(int noutput_items __attribute__((unused)), { if (!acq_parameters.blocking_on_standby) { - d_sample_counter += d_consumed_samples * ninput_items[0]; + d_sample_counter += ninput_items[0]; consume_each(ninput_items[0]); } if (d_step_two) @@ -820,7 +821,7 @@ int pcps_acquisition::general_work(int noutput_items __attribute__((unused)), { case 0: { - //restart acquisition variables + // Restart acquisition variables d_gnss_synchro->Acq_delay_samples = 0.0; d_gnss_synchro->Acq_doppler_hz = 0.0; d_gnss_synchro->Acq_samplestamp_samples = 0; @@ -828,25 +829,58 @@ int pcps_acquisition::general_work(int noutput_items __attribute__((unused)), d_input_power = 0.0; d_test_statistics = 0.0; d_state = 1; + d_buffer_count = 0; if (!acq_parameters.blocking_on_standby) { - d_sample_counter += d_consumed_samples * ninput_items[0]; // sample counter + d_sample_counter += ninput_items[0]; // sample counter consume_each(ninput_items[0]); } break; } - case 1: { - // Copy the data to the core and let it know that new data is available + unsigned int buff_increment; if (d_cshort) { - memcpy(d_data_buffer_sc, input_items[0], d_consumed_samples * sizeof(lv_16sc_t)); + const lv_16sc_t* in = reinterpret_cast(input_items[0]); // Get the input samples pointer + if ((ninput_items[0] + d_buffer_count) <= d_consumed_samples) + { + buff_increment = ninput_items[0]; + } + else + { + buff_increment = d_consumed_samples - d_buffer_count; + } + memcpy(&d_data_buffer_sc[d_buffer_count], in, sizeof(lv_16sc_t) * buff_increment); } else { - memcpy(d_data_buffer, input_items[0], d_consumed_samples * sizeof(gr_complex)); + const gr_complex* in = reinterpret_cast(input_items[0]); // Get the input samples pointer + if ((ninput_items[0] + d_buffer_count) <= d_consumed_samples) + { + buff_increment = ninput_items[0]; + } + else + { + buff_increment = d_consumed_samples - d_buffer_count; + } + memcpy(&d_data_buffer[d_buffer_count], in, sizeof(gr_complex) * buff_increment); } + + // If buffer will be full in next iteration + if (d_buffer_count >= d_consumed_samples) + { + d_state = 2; + } + d_buffer_count += buff_increment; + d_sample_counter += buff_increment; + consume_each(buff_increment); + break; + } + + case 2: + { + // Copy the data to the core and let it know that new data is available if (acq_parameters.blocking) { lk.unlock(); @@ -857,8 +891,8 @@ int pcps_acquisition::general_work(int noutput_items __attribute__((unused)), gr::thread::thread d_worker(&pcps_acquisition::acquisition_core, this, d_sample_counter); d_worker_active = true; } - d_sample_counter += d_consumed_samples; - consume_each(1); + consume_each(0); + d_buffer_count = 0; break; } } diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition.h b/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition.h index 4e71340ba..af64cda40 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition.h +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition.h @@ -135,6 +135,7 @@ private: arma::fmat grid_; long int d_dump_number; unsigned int d_dump_channel; + unsigned int d_buffer_count; public: ~pcps_acquisition();