mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-24 20:17:39 +00:00 
			
		
		
		
	Add Doppler rate in fast_resampler kernel. Still not used
This commit is contained in:
		| @@ -49,7 +49,8 @@ static inline void volk_gnsssdr_32f_fast_resamplerxnpuppet_32f_generic(float* re | ||||
|     int code_length_chips = 2046; | ||||
|     float code_phase_step_chips = ((float)(code_length_chips) + 0.1) / ((float)num_points); | ||||
|     int num_out_vectors = 3; | ||||
|     float rem_code_phase_chips = -0.234; | ||||
|     float rem_code_phase_chips = -0.8234; | ||||
|     float code_phase_rate_step_chips = 1.0 / powf(2.0, 33.0); | ||||
|     unsigned int n; | ||||
|     float shifts_chips[3] = {-0.1, 0.0, 0.1}; | ||||
|  | ||||
| @@ -59,7 +60,7 @@ static inline void volk_gnsssdr_32f_fast_resamplerxnpuppet_32f_generic(float* re | ||||
|             result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment()); | ||||
|         } | ||||
|  | ||||
|     volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); | ||||
|     volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, code_phase_rate_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); | ||||
|  | ||||
|     memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points); | ||||
|  | ||||
| @@ -73,63 +74,65 @@ static inline void volk_gnsssdr_32f_fast_resamplerxnpuppet_32f_generic(float* re | ||||
|  | ||||
| #endif /* LV_HAVE_GENERIC */ | ||||
|  | ||||
| //#ifdef LV_HAVE_SSE3 | ||||
| //static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_a_sse3(float* result, const float* local_code, unsigned int num_points) | ||||
| //{ | ||||
| //    int code_length_chips = 2046; | ||||
| //    float code_phase_step_chips = ((float)(code_length_chips) + 0.1) / ((float)num_points); | ||||
| //    int num_out_vectors = 3; | ||||
| //    float rem_code_phase_chips = -0.234; | ||||
| //    unsigned int n; | ||||
| //    float shifts_chips[3] = {-0.1, 0.0, 0.1}; | ||||
| // | ||||
| //    float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment()); | ||||
| //    for (n = 0; n < num_out_vectors; n++) | ||||
| //        { | ||||
| //            result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment()); | ||||
| //        } | ||||
| // | ||||
| //    volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); | ||||
| // | ||||
| //    memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points); | ||||
| // | ||||
| //    for (n = 0; n < num_out_vectors; n++) | ||||
| //        { | ||||
| //            volk_gnsssdr_free(result_aux[n]); | ||||
| //        } | ||||
| //    volk_gnsssdr_free(result_aux); | ||||
| //} | ||||
| // | ||||
| //#endif | ||||
| // | ||||
| //#ifdef LV_HAVE_SSE3 | ||||
| //static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_u_sse3(float* result, const float* local_code, unsigned int num_points) | ||||
| //{ | ||||
| //    int code_length_chips = 2046; | ||||
| //    float code_phase_step_chips = ((float)(code_length_chips) + 0.1) / ((float)num_points); | ||||
| //    int num_out_vectors = 3; | ||||
| //    float rem_code_phase_chips = -0.234; | ||||
| //    unsigned int n; | ||||
| //    float shifts_chips[3] = {-0.1, 0.0, 0.1}; | ||||
| // | ||||
| //    float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment()); | ||||
| //    for (n = 0; n < num_out_vectors; n++) | ||||
| //        { | ||||
| //            result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment()); | ||||
| //        } | ||||
| // | ||||
| //    volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); | ||||
| // | ||||
| //    memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points); | ||||
| // | ||||
| //    for (n = 0; n < num_out_vectors; n++) | ||||
| //        { | ||||
| //            volk_gnsssdr_free(result_aux[n]); | ||||
| //        } | ||||
| //    volk_gnsssdr_free(result_aux); | ||||
| //} | ||||
| // | ||||
| //#endif | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| static inline void volk_gnsssdr_32f_fast_resamplerxnpuppet_32f_a_sse3(float* result, const float* local_code, unsigned int num_points) | ||||
| { | ||||
|     int code_length_chips = 2046; | ||||
|     float code_phase_step_chips = ((float)(code_length_chips) + 0.1) / ((float)num_points); | ||||
|     int num_out_vectors = 3; | ||||
|     float rem_code_phase_chips = -0.8234; | ||||
|     float code_phase_rate_step_chips = 1.0 / powf(2.0, 33.0); | ||||
|     unsigned int n; | ||||
|     float shifts_chips[3] = {-0.1, 0.0, 0.1}; | ||||
|  | ||||
|     float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment()); | ||||
|     for (n = 0; n < num_out_vectors; n++) | ||||
|         { | ||||
|             result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment()); | ||||
|         } | ||||
|  | ||||
|     volk_gnsssdr_32f_xn_fast_resampler_32f_xn_a_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, code_phase_rate_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); | ||||
|  | ||||
|     memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points); | ||||
|  | ||||
|     for (n = 0; n < num_out_vectors; n++) | ||||
|         { | ||||
|             volk_gnsssdr_free(result_aux[n]); | ||||
|         } | ||||
|     volk_gnsssdr_free(result_aux); | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| static inline void volk_gnsssdr_32f_fast_resamplerxnpuppet_32f_u_sse3(float* result, const float* local_code, unsigned int num_points) | ||||
| { | ||||
|     int code_length_chips = 2046; | ||||
|     float code_phase_step_chips = ((float)(code_length_chips) + 0.1) / ((float)num_points); | ||||
|     int num_out_vectors = 3; | ||||
|     float rem_code_phase_chips = -0.8234; | ||||
|     float code_phase_rate_step_chips = 1.0 / powf(2.0, 33.0); | ||||
|     unsigned int n; | ||||
|     float shifts_chips[3] = {-0.1, 0.0, 0.1}; | ||||
|  | ||||
|     float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment()); | ||||
|     for (n = 0; n < num_out_vectors; n++) | ||||
|         { | ||||
|             result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment()); | ||||
|         } | ||||
|  | ||||
|     volk_gnsssdr_32f_xn_fast_resampler_32f_xn_u_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, code_phase_rate_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points); | ||||
|  | ||||
|     memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points); | ||||
|  | ||||
|     for (n = 0; n < num_out_vectors; n++) | ||||
|         { | ||||
|             volk_gnsssdr_free(result_aux[n]); | ||||
|         } | ||||
|     volk_gnsssdr_free(result_aux); | ||||
| } | ||||
|  | ||||
| #endif | ||||
| // | ||||
| // | ||||
| //#ifdef LV_HAVE_SSE4_1 | ||||
|   | ||||
| @@ -46,20 +46,21 @@ | ||||
|  * | ||||
|  * <b>Dispatcher Prototype</b> | ||||
|  * \code | ||||
|  * void volk_gnsssdr_32f_xn_fast_resampler_32f_xn(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) | ||||
|  * void volk_gnsssdr_32f_xn_fast_resampler_32f_xn(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) | ||||
|  * \endcode | ||||
|  * | ||||
|  * \b Inputs | ||||
|  * \li local_code:            Vector to be resampled. | ||||
|  * \li rem_code_phase_chips:  Remnant code phase [chips]. | ||||
|  * \li code_phase_step_chips: Phase increment per sample [chips/sample]. | ||||
|  * \li shifts_chips:          Vector of floats that defines the spacing (in chips) between the replicas of \p local_code | ||||
|  * \li code_length_chips:     Code length in chips. | ||||
|  * \li num_out_vectors        Number of output vectors. | ||||
|  * \li num_points:            The number of data values to be in the resampled vector. | ||||
|  * \li local_code:                 Vector to be resampled. | ||||
|  * \li rem_code_phase_chips:       Remnant code phase [chips]. | ||||
|  * \li code_phase_step_chips:      Phase increment per sample [chips/sample]. | ||||
|  * \li code_phase_rate_step_chips: Phase rate increment per sample [chips/sample^2]. | ||||
|  * \li shifts_chips:               Vector of floats that defines the spacing (in chips) between the replicas of \p local_code | ||||
|  * \li code_length_chips:          Code length in chips. | ||||
|  * \li num_out_vectors             Number of output vectors. | ||||
|  * \li num_points:                 The number of data values to be in the resampled vector. | ||||
|  * | ||||
|  * \b Outputs | ||||
|  * \li result:                Pointer to a vector of pointers where the results will be stored. | ||||
|  * \li result:                     Pointer to a vector of pointers where the results will be stored. | ||||
|  * | ||||
|  */ | ||||
|  | ||||
| @@ -77,7 +78,7 @@ | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
|  | ||||
| static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) | ||||
| static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) | ||||
| { | ||||
|     int local_code_chip_index; | ||||
|     int current_correlator_tap; | ||||
| @@ -85,9 +86,9 @@ static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(float** res | ||||
|     //first correlator | ||||
|     for (n = 0; n < num_points; n++) | ||||
|         { | ||||
|             // resample code for current tap | ||||
|             local_code_chip_index = (int)floor(code_phase_step_chips * (float)n + shifts_chips[0] - rem_code_phase_chips); | ||||
|             //Take into account that in multitap correlators, the shifts can be negative! | ||||
|             // resample code for first tap | ||||
|             local_code_chip_index = (int)floor(code_phase_step_chips * (float)n + code_phase_rate_step_chips * (float)(n * n) + shifts_chips[0] - rem_code_phase_chips); | ||||
|             // Take into account that in multitap correlators, the shifts can be negative! | ||||
|             if (local_code_chip_index < 0) local_code_chip_index += (int)code_length_chips * (abs(local_code_chip_index) / code_length_chips + 1); | ||||
|             local_code_chip_index = local_code_chip_index % code_length_chips; | ||||
|             result[0][n] = local_code[local_code_chip_index]; | ||||
| @@ -106,145 +107,175 @@ static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_generic(float** res | ||||
| #endif /*LV_HAVE_GENERIC*/ | ||||
|  | ||||
|  | ||||
| //#ifdef LV_HAVE_SSE3 | ||||
| //#include <pmmintrin.h> | ||||
| //static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_a_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) | ||||
| //{ | ||||
| //    float** _result = result; | ||||
| //    const unsigned int quarterPoints = num_points / 4; | ||||
| //    int current_correlator_tap; | ||||
| //    unsigned int n; | ||||
| //    unsigned int k; | ||||
| //    const __m128 ones = _mm_set1_ps(1.0f); | ||||
| //    const __m128 fours = _mm_set1_ps(4.0f); | ||||
| //    const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); | ||||
| //    const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); | ||||
| // | ||||
| //    __VOLK_ATTR_ALIGNED(16) | ||||
| //    int local_code_chip_index[4]; | ||||
| //    int local_code_chip_index_; | ||||
| // | ||||
| //    const __m128i zeros = _mm_setzero_si128(); | ||||
| //    const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips); | ||||
| //    const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips); | ||||
| //    __m128i local_code_chip_index_reg, aux_i, negatives, i; | ||||
| //    __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base; | ||||
| // | ||||
| //    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) | ||||
| //        { | ||||
| //            shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); | ||||
| //            aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); | ||||
| //            __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); | ||||
| //            for (n = 0; n < quarterPoints; n++) | ||||
| //                { | ||||
| //                    aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); | ||||
| //                    aux = _mm_add_ps(aux, aux2); | ||||
| //                    // floor | ||||
| //                    i = _mm_cvttps_epi32(aux); | ||||
| //                    fi = _mm_cvtepi32_ps(i); | ||||
| //                    igx = _mm_cmpgt_ps(fi, aux); | ||||
| //                    j = _mm_and_ps(igx, ones); | ||||
| //                    aux = _mm_sub_ps(fi, j); | ||||
| //                    // fmod | ||||
| //                    c = _mm_div_ps(aux, code_length_chips_reg_f); | ||||
| //                    i = _mm_cvttps_epi32(c); | ||||
| //                    cTrunc = _mm_cvtepi32_ps(i); | ||||
| //                    base = _mm_mul_ps(cTrunc, code_length_chips_reg_f); | ||||
| //                    local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base)); | ||||
| // | ||||
| //                    negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros); | ||||
| //                    aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); | ||||
| //                    local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); | ||||
| //                    _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); | ||||
| //                    for (k = 0; k < 4; ++k) | ||||
| //                        { | ||||
| //                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; | ||||
| //                        } | ||||
| //                    indexn = _mm_add_ps(indexn, fours); | ||||
| //                } | ||||
| //            for (n = quarterPoints * 4; n < num_points; n++) | ||||
| //                { | ||||
| //                    // resample code for current tap | ||||
| //                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); | ||||
| //                    //Take into account that in multitap correlators, the shifts can be negative! | ||||
| //                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1); | ||||
| //                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips; | ||||
| //                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_]; | ||||
| //                } | ||||
| //        } | ||||
| //} | ||||
| // | ||||
| //#endif | ||||
| // | ||||
| // | ||||
| //#ifdef LV_HAVE_SSE3 | ||||
| //#include <pmmintrin.h> | ||||
| //static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_u_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) | ||||
| //{ | ||||
| //    float** _result = result; | ||||
| //    const unsigned int quarterPoints = num_points / 4; | ||||
| //    int current_correlator_tap; | ||||
| //    unsigned int n; | ||||
| //    unsigned int k; | ||||
| //    const __m128 ones = _mm_set1_ps(1.0f); | ||||
| //    const __m128 fours = _mm_set1_ps(4.0f); | ||||
| //    const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); | ||||
| //    const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); | ||||
| // | ||||
| //    __VOLK_ATTR_ALIGNED(16) | ||||
| //    int local_code_chip_index[4]; | ||||
| //    int local_code_chip_index_; | ||||
| // | ||||
| //    const __m128i zeros = _mm_setzero_si128(); | ||||
| //    const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips); | ||||
| //    const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips); | ||||
| //    __m128i local_code_chip_index_reg, aux_i, negatives, i; | ||||
| //    __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base; | ||||
| // | ||||
| //    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++) | ||||
| //        { | ||||
| //            shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]); | ||||
| //            aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); | ||||
| //            __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); | ||||
| //            for (n = 0; n < quarterPoints; n++) | ||||
| //                { | ||||
| //                    aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); | ||||
| //                    aux = _mm_add_ps(aux, aux2); | ||||
| //                    // floor | ||||
| //                    i = _mm_cvttps_epi32(aux); | ||||
| //                    fi = _mm_cvtepi32_ps(i); | ||||
| //                    igx = _mm_cmpgt_ps(fi, aux); | ||||
| //                    j = _mm_and_ps(igx, ones); | ||||
| //                    aux = _mm_sub_ps(fi, j); | ||||
| //                    // fmod | ||||
| //                    c = _mm_div_ps(aux, code_length_chips_reg_f); | ||||
| //                    i = _mm_cvttps_epi32(c); | ||||
| //                    cTrunc = _mm_cvtepi32_ps(i); | ||||
| //                    base = _mm_mul_ps(cTrunc, code_length_chips_reg_f); | ||||
| //                    local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base)); | ||||
| // | ||||
| //                    negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros); | ||||
| //                    aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); | ||||
| //                    local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); | ||||
| //                    _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); | ||||
| //                    for (k = 0; k < 4; ++k) | ||||
| //                        { | ||||
| //                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]]; | ||||
| //                        } | ||||
| //                    indexn = _mm_add_ps(indexn, fours); | ||||
| //                } | ||||
| //            for (n = quarterPoints * 4; n < num_points; n++) | ||||
| //                { | ||||
| //                    // resample code for current tap | ||||
| //                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips); | ||||
| //                    //Take into account that in multitap correlators, the shifts can be negative! | ||||
| //                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1); | ||||
| //                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips; | ||||
| //                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_]; | ||||
| //                } | ||||
| //        } | ||||
| //} | ||||
| //#endif | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| #include <pmmintrin.h> | ||||
| static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_a_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) | ||||
| { | ||||
|     float** _result = result; | ||||
|     const unsigned int quarterPoints = num_points / 4; | ||||
|     //    int current_correlator_tap; | ||||
|     unsigned int n; | ||||
|     unsigned int k; | ||||
|     unsigned int current_correlator_tap; | ||||
|     const __m128 ones = _mm_set1_ps(1.0f); | ||||
|     const __m128 fours = _mm_set1_ps(4.0f); | ||||
|     const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); | ||||
|     const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); | ||||
|     const __m128 code_phase_rate_step_chips_reg = _mm_set_ps1(code_phase_rate_step_chips); | ||||
|  | ||||
|     __VOLK_ATTR_ALIGNED(16) | ||||
|     int local_code_chip_index[4]; | ||||
|     int local_code_chip_index_; | ||||
|     const __m128i zeros = _mm_setzero_si128(); | ||||
|     const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips); | ||||
|     const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips); | ||||
|     __m128i local_code_chip_index_reg, aux_i, negatives; | ||||
|     __m128 aux, aux2, aux3, indexnn, shifts_chips_reg, i, fi, igx, j, c, cTrunc, base; | ||||
|     __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); | ||||
|  | ||||
|     shifts_chips_reg = _mm_set_ps1((float)shifts_chips[0]); | ||||
|     aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); | ||||
|  | ||||
|     for (n = 0; n < quarterPoints; n++) | ||||
|         { | ||||
|             aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); | ||||
|             indexnn = _mm_mul_ps(indexn, indexn); | ||||
|             aux3 = _mm_mul_ps(code_phase_rate_step_chips_reg, indexnn); | ||||
|             aux = _mm_add_ps(aux, aux3); | ||||
|             aux = _mm_add_ps(aux, aux2); | ||||
|             // floor | ||||
|             i = _mm_cvttps_epi32(aux); | ||||
|             fi = _mm_cvtepi32_ps(i); | ||||
|             igx = _mm_cmpgt_ps(fi, aux); | ||||
|             j = _mm_and_ps(igx, ones); | ||||
|             aux = _mm_sub_ps(fi, j); | ||||
|  | ||||
|             // Correct negative shift | ||||
|             c = _mm_div_ps(aux, code_length_chips_reg_f); | ||||
|             aux3 = _mm_add_ps(c, ones); | ||||
|             i = _mm_cvttps_epi32(aux3); | ||||
|             cTrunc = _mm_cvtepi32_ps(i); | ||||
|             base = _mm_mul_ps(cTrunc, code_length_chips_reg_f); | ||||
|             local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base)); | ||||
|             negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros); | ||||
|             aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); | ||||
|             local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); | ||||
|  | ||||
|             _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); | ||||
|  | ||||
|             for (k = 0; k < 4; ++k) | ||||
|                 { | ||||
|                     _result[0][n * 4 + k] = local_code[local_code_chip_index[k]]; | ||||
|                 } | ||||
|             indexn = _mm_add_ps(indexn, fours); | ||||
|         } | ||||
|  | ||||
|     for (n = quarterPoints * 4; n < num_points; n++) | ||||
|         { | ||||
|             // resample code for first tap | ||||
|             local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + code_phase_rate_step_chips * (float)(n * n) + shifts_chips[0] - rem_code_phase_chips); | ||||
|             // Take into account that in multitap correlators, the shifts can be negative! | ||||
|             if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1); | ||||
|             local_code_chip_index_ = local_code_chip_index_ % code_length_chips; | ||||
|             _result[0][n] = local_code[local_code_chip_index_]; | ||||
|         } | ||||
|  | ||||
|     // adjacent correlators | ||||
|     unsigned int shift_samples = 0; | ||||
|     for (current_correlator_tap = 1; current_correlator_tap < num_out_vectors; current_correlator_tap++) | ||||
|         { | ||||
|             shift_samples += (int)round((shifts_chips[current_correlator_tap] - shifts_chips[current_correlator_tap - 1]) / code_phase_step_chips); | ||||
|             memcpy(&_result[current_correlator_tap][0], &_result[0][shift_samples], (num_points - shift_samples) * sizeof(float)); | ||||
|             memcpy(&_result[current_correlator_tap][num_points - shift_samples], &_result[0][0], shift_samples * sizeof(float)); | ||||
|         } | ||||
| } | ||||
| #endif | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE3 | ||||
| #include <pmmintrin.h> | ||||
| static inline void volk_gnsssdr_32f_xn_fast_resampler_32f_xn_u_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points) | ||||
| { | ||||
|     float** _result = result; | ||||
|     const unsigned int quarterPoints = num_points / 4; | ||||
|     //    int current_correlator_tap; | ||||
|     unsigned int n; | ||||
|     unsigned int k; | ||||
|     unsigned int current_correlator_tap; | ||||
|     const __m128 ones = _mm_set1_ps(1.0f); | ||||
|     const __m128 fours = _mm_set1_ps(4.0f); | ||||
|     const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips); | ||||
|     const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips); | ||||
|     const __m128 code_phase_rate_step_chips_reg = _mm_set_ps1(code_phase_rate_step_chips); | ||||
|  | ||||
|     __VOLK_ATTR_ALIGNED(16) | ||||
|     int local_code_chip_index[4]; | ||||
|     int local_code_chip_index_; | ||||
|     const __m128i zeros = _mm_setzero_si128(); | ||||
|     const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips); | ||||
|     const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips); | ||||
|     __m128i local_code_chip_index_reg, aux_i, negatives; | ||||
|     __m128 aux, aux2, aux3, indexnn, shifts_chips_reg, i, fi, igx, j, c, cTrunc, base; | ||||
|     __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f); | ||||
|  | ||||
|     shifts_chips_reg = _mm_set_ps1((float)shifts_chips[0]); | ||||
|     aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg); | ||||
|  | ||||
|     for (n = 0; n < quarterPoints; n++) | ||||
|         { | ||||
|             aux = _mm_mul_ps(code_phase_step_chips_reg, indexn); | ||||
|             indexnn = _mm_mul_ps(indexn, indexn); | ||||
|             aux3 = _mm_mul_ps(code_phase_rate_step_chips_reg, indexnn); | ||||
|             aux = _mm_add_ps(aux, aux3); | ||||
|             aux = _mm_add_ps(aux, aux2); | ||||
|             // floor | ||||
|             i = _mm_cvttps_epi32(aux); | ||||
|             fi = _mm_cvtepi32_ps(i); | ||||
|             igx = _mm_cmpgt_ps(fi, aux); | ||||
|             j = _mm_and_ps(igx, ones); | ||||
|             aux = _mm_sub_ps(fi, j); | ||||
|  | ||||
|             // Correct negative shift | ||||
|             c = _mm_div_ps(aux, code_length_chips_reg_f); | ||||
|             aux3 = _mm_add_ps(c, ones); | ||||
|             i = _mm_cvttps_epi32(aux3); | ||||
|             cTrunc = _mm_cvtepi32_ps(i); | ||||
|             base = _mm_mul_ps(cTrunc, code_length_chips_reg_f); | ||||
|             local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base)); | ||||
|             negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros); | ||||
|             aux_i = _mm_and_si128(code_length_chips_reg_i, negatives); | ||||
|             local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i); | ||||
|  | ||||
|             _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg); | ||||
|  | ||||
|             for (k = 0; k < 4; ++k) | ||||
|                 { | ||||
|                     _result[0][n * 4 + k] = local_code[local_code_chip_index[k]]; | ||||
|                 } | ||||
|             indexn = _mm_add_ps(indexn, fours); | ||||
|         } | ||||
|  | ||||
|     for (n = quarterPoints * 4; n < num_points; n++) | ||||
|         { | ||||
|             // resample code for first tap | ||||
|             local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + code_phase_rate_step_chips * (float)(n * n) + shifts_chips[0] - rem_code_phase_chips); | ||||
|             // Take into account that in multitap correlators, the shifts can be negative! | ||||
|             if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1); | ||||
|             local_code_chip_index_ = local_code_chip_index_ % code_length_chips; | ||||
|             _result[0][n] = local_code[local_code_chip_index_]; | ||||
|         } | ||||
|  | ||||
|     // adjacent correlators | ||||
|     unsigned int shift_samples = 0; | ||||
|     for (current_correlator_tap = 1; current_correlator_tap < num_out_vectors; current_correlator_tap++) | ||||
|         { | ||||
|             shift_samples += (int)round((shifts_chips[current_correlator_tap] - shifts_chips[current_correlator_tap - 1]) / code_phase_step_chips); | ||||
|             memcpy(&_result[current_correlator_tap][0], &_result[0][shift_samples], (num_points - shift_samples) * sizeof(float)); | ||||
|             memcpy(&_result[current_correlator_tap][num_points - shift_samples], &_result[0][0], shift_samples * sizeof(float)); | ||||
|         } | ||||
| } | ||||
|  | ||||
| #endif | ||||
| // | ||||
| // | ||||
| //#ifdef LV_HAVE_SSE4_1 | ||||
|   | ||||
| @@ -98,7 +98,7 @@ bool cpu_multicorrelator_real_codes::set_input_output_vectors(std::complex<float | ||||
| } | ||||
|  | ||||
|  | ||||
| void cpu_multicorrelator_real_codes::update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips) | ||||
| void cpu_multicorrelator_real_codes::update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips) | ||||
| { | ||||
|     if (d_use_fast_resampler) | ||||
|         { | ||||
| @@ -106,6 +106,7 @@ void cpu_multicorrelator_real_codes::update_local_code(int correlator_length_sam | ||||
|                 d_local_code_in, | ||||
|                 rem_code_phase_chips, | ||||
|                 code_phase_step_chips, | ||||
|                 code_phase_rate_step_chips, | ||||
|                 d_shifts_chips, | ||||
|                 d_code_length_chips, | ||||
|                 d_n_correlators, | ||||
|   | ||||
| @@ -51,7 +51,7 @@ public: | ||||
|     bool init(int max_signal_length_samples, int n_correlators); | ||||
|     bool set_local_code_and_taps(int code_length_chips, const float *local_code_in, float *shifts_chips); | ||||
|     bool set_input_output_vectors(std::complex<float> *corr_out, const std::complex<float> *sig_in); | ||||
|     void update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips); | ||||
|     void update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips = 0.0); | ||||
|     bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float rem_code_phase_chips, float code_phase_step_chips, int signal_length_samples); | ||||
|     bool free(); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Carles Fernandez
					Carles Fernandez