mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-26 21:17:38 +00:00 
			
		
		
		
	Moving two kernels to volk_gnsssdr. Still no testing
This commit is contained in:
		| @@ -5,20 +5,20 @@ | ||||
| //#include <types.h> | ||||
| static inline int16_t sat_adds16b(int16_t x, int16_t y) | ||||
| { | ||||
| //	int16_t ux = x; | ||||
| //	int16_t uy = y; | ||||
| //	int16_t res = ux + uy; | ||||
| // | ||||
| //	/* Calculate overflowed result. (Don't change the sign bit of ux) */ | ||||
| //	ux = (ux >> 15) + SHRT_MAX; | ||||
| // | ||||
| //	/* Force compiler to use cmovns instruction */ | ||||
| //	if ((int16_t) ((ux ^ uy) | ~(uy ^ res)) >= 0) | ||||
| //	{ | ||||
| //		res = ux; | ||||
| //	} | ||||
| // | ||||
| //	return res; | ||||
|     //	int16_t ux = x; | ||||
|     //	int16_t uy = y; | ||||
|     //	int16_t res = ux + uy; | ||||
|     // | ||||
|     //	/* Calculate overflowed result. (Don't change the sign bit of ux) */ | ||||
|     //	ux = (ux >> 15) + SHRT_MAX; | ||||
|     // | ||||
|     //	/* Force compiler to use cmovns instruction */ | ||||
|     //	if ((int16_t) ((ux ^ uy) | ~(uy ^ res)) >= 0) | ||||
|     //	{ | ||||
|     //		res = ux; | ||||
|     //	} | ||||
|     // | ||||
|     //	return res; | ||||
|  | ||||
|     int32_t res = (int32_t) x + (int32_t) y; | ||||
|  | ||||
|   | ||||
| @@ -48,7 +48,7 @@ | ||||
|  \param bVector One of the vectors to be multiplied and accumulated | ||||
|  \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector | ||||
|  */ | ||||
| static inline void volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, unsigned int num_points, int num_a_vectors) | ||||
| static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, unsigned int num_points, int num_a_vectors) | ||||
| { | ||||
|     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|         { | ||||
| @@ -57,7 +57,7 @@ static inline void volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_generic(lv_16sc_t* resu | ||||
|                 { | ||||
|                     //r*a.r - i*a.i, i*a.r + r*a.i
 | ||||
|                     //result[n_vec]+=in_common[n]*in_a[n_vec][n];
 | ||||
|                     lv_16sc_t tmp = in_common[n]*in_a[n_vec][n]; | ||||
|                     lv_16sc_t tmp = in_common[n] * in_a[n_vec][n]; | ||||
|                     result[n_vec] = lv_cmake(sat_adds16b(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16b(lv_cimag(result[n_vec]), lv_cimag(tmp))); | ||||
|                 } | ||||
|         } | ||||
| @@ -68,7 +68,7 @@ static inline void volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_generic(lv_16sc_t* resu | ||||
| 
 | ||||
| #ifdef LV_HAVE_SSE2 | ||||
| #include <emmintrin.h> | ||||
| static inline void volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, unsigned int num_points, int num_a_vectors) | ||||
| static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, unsigned int num_points, int num_a_vectors) | ||||
| { | ||||
|     lv_16sc_t dotProduct = lv_cmake(0,0); | ||||
| 
 | ||||
| @@ -148,9 +148,9 @@ static inline void volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, | ||||
| 
 | ||||
|     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|         { | ||||
|             for(unsigned int n  = sse_iters * 4;n < num_points; n++){ | ||||
| 
 | ||||
|                     lv_16sc_t tmp = in_common[n]*in_a[n_vec][n]; | ||||
|             for(unsigned int n  = sse_iters * 4; n < num_points; n++) | ||||
|                 { | ||||
|                     lv_16sc_t tmp = in_common[n] * in_a[n_vec][n]; | ||||
| 
 | ||||
|                     _out[n_vec] = lv_cmake(sat_adds16b(lv_creal(_out[n_vec]), lv_creal(tmp)), | ||||
|                             sat_adds16b(lv_cimag(_out[n_vec]), lv_cimag(tmp))); | ||||
| @@ -0,0 +1,171 @@ | ||||
| /*! | ||||
|  * \file volk_gnsssdr_16ic_xn_resampler_16ic_xn.h | ||||
|  * \brief Volk protokernel: resample a 16 bits complex vector | ||||
|  * \authors <ul> | ||||
|  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es | ||||
|  *          </ul> | ||||
|  * | ||||
|  * Volk protokernel that multiplies two 16 bits vectors (8 bits the real part  | ||||
|  * and 8 bits the imaginary part) and accumulates them | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  * | ||||
|  * Copyright (C) 2010-2015  (see AUTHORS file for a list of contributors) | ||||
|  * | ||||
|  * GNSS-SDR is a software defined Global Navigation | ||||
|  *          Satellite Systems receiver | ||||
|  * | ||||
|  * This file is part of GNSS-SDR. | ||||
|  * | ||||
|  * GNSS-SDR is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * GNSS-SDR is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  */ | ||||
|  | ||||
| #ifndef INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H | ||||
| #define INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H | ||||
|  | ||||
| #include <math.h> | ||||
| #include <volk_gnsssdr/volk_gnsssdr_common.h> | ||||
| #include <volk_gnsssdr/volk_gnsssdr_complex.h> | ||||
|  | ||||
| //#pragma STDC FENV_ACCESS ON | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
|  | ||||
| //int round_int( float r ) { | ||||
| //    return (r > 0.0) ? (r + 0.5) : (r - 0.5); | ||||
| //} | ||||
| /*! | ||||
|  \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector | ||||
|  \param cVector The vector where the result will be stored | ||||
|  \param aVector One of the vectors to be multiplied | ||||
|  \param bVector One of the vectors to be multiplied | ||||
|  \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector | ||||
|  */ | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors) | ||||
| { | ||||
|     int local_code_chip_index; | ||||
|     //fesetround(FE_TONEAREST); | ||||
|     for (int current_vector = 0; current_vector < num_out_vectors; current_vector++) | ||||
|         { | ||||
|             for (unsigned int n = 0; n < num_output_samples; n++) | ||||
|                 { | ||||
|                     // resample code for current tap | ||||
|                     local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector]-0.5f); | ||||
|                     if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips; | ||||
|                     if (local_code_chip_index > (code_length_chips-1)) local_code_chip_index -= code_length_chips; | ||||
|                     //std::cout<<"g["<<n<<"]="<<code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips-0.5f<<","<<local_code_chip_index<<" "; | ||||
|                     result[current_vector][n] = local_code[local_code_chip_index]; | ||||
|                 } | ||||
|         } | ||||
|     //std::cout<<std::endl; | ||||
| } | ||||
|  | ||||
| #endif /*LV_HAVE_GENERIC*/ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE2 | ||||
| #include <emmintrin.h> | ||||
| static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors) | ||||
| { | ||||
|     _MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO | ||||
|     unsigned int number; | ||||
|     const unsigned int quarterPoints = num_output_samples / 4; | ||||
|  | ||||
|     lv_16sc_t** _result = result; | ||||
|     __attribute__((aligned(16))) int local_code_chip_index[4]; | ||||
|     float tmp_rem_code_phase_chips; | ||||
|     __m128 _rem_code_phase,_code_phase_step_chips; | ||||
|     __m128i _code_length_chips,_code_length_chips_minus1; | ||||
|     __m128 _code_phase_out,_code_phase_out_with_offset; | ||||
|  | ||||
|     _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register | ||||
|     __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; | ||||
|     four_times_code_length_chips_minus1[0] = code_length_chips - 1; | ||||
|     four_times_code_length_chips_minus1[1] = code_length_chips - 1; | ||||
|     four_times_code_length_chips_minus1[2] = code_length_chips - 1; | ||||
|     four_times_code_length_chips_minus1[3] = code_length_chips - 1; | ||||
|  | ||||
|     __attribute__((aligned(16))) int four_times_code_length_chips[4]; | ||||
|     four_times_code_length_chips[0] = code_length_chips; | ||||
|     four_times_code_length_chips[1] = code_length_chips; | ||||
|     four_times_code_length_chips[2] = code_length_chips; | ||||
|     four_times_code_length_chips[3] = code_length_chips; | ||||
|  | ||||
|     _code_length_chips = _mm_loadu_si128((__m128i*)&four_times_code_length_chips); //load float to all four float values in m128 register | ||||
|     _code_length_chips_minus1 = _mm_loadu_si128((__m128i*)&four_times_code_length_chips_minus1); //load float to all four float values in m128 register | ||||
|  | ||||
|     __m128i negative_indexes, overflow_indexes,_code_phase_out_int, _code_phase_out_int_neg,_code_phase_out_int_over; | ||||
|  | ||||
|     __m128i zero=_mm_setzero_si128(); | ||||
|  | ||||
|     __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; | ||||
|     __m128 _4output_index = _mm_load_ps(init_idx_float); | ||||
|     __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; | ||||
|     __m128 _4constant_float = _mm_load_ps(init_4constant_float); | ||||
|  | ||||
|     int current_vector = 0; | ||||
|     int sample_idx = 0; | ||||
|     for(number = 0; number < quarterPoints; number++) | ||||
|         { | ||||
|             //common to all outputs | ||||
|             _code_phase_out = _mm_mul_ps(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step | ||||
|  | ||||
|             //output vector dependant (different code phase offset) | ||||
|             for(current_vector = 0; current_vector < num_out_vectors; current_vector++) | ||||
|                 { | ||||
|                     tmp_rem_code_phase_chips = rem_code_phase_chips[current_vector] - 0.5f; // adjust offset to perform correct rounding (chip transition at 0) | ||||
|                     _rem_code_phase = _mm_load1_ps(&tmp_rem_code_phase_chips); //load float to all four float values in m128 register | ||||
|  | ||||
|                     _code_phase_out_with_offset = _mm_add_ps(_code_phase_out, _rem_code_phase); //add the phase offset | ||||
|                     _code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer | ||||
|  | ||||
|                     negative_indexes = _mm_cmplt_epi32 (_code_phase_out_int, zero); //test for negative values | ||||
|                     _code_phase_out_int_neg = _mm_add_epi32(_code_phase_out_int, _code_length_chips); //the negative values branch | ||||
|                     _code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes,_mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int ))); | ||||
|  | ||||
|                     overflow_indexes = _mm_cmpgt_epi32  (_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values | ||||
|                     _code_phase_out_int_over = _mm_sub_epi32(_code_phase_out_int_neg, _code_length_chips); //the negative values branch | ||||
|                     _code_phase_out_int_over = _mm_xor_si128(_code_phase_out_int_neg, _mm_and_si128( overflow_indexes, _mm_xor_si128( _code_phase_out_int_over, _code_phase_out_int_neg ))); | ||||
|  | ||||
|                     _mm_storeu_si128((__m128i*)local_code_chip_index, _code_phase_out_int_over); // Store the results back | ||||
|  | ||||
|                     //todo: optimize the local code lookup table with intrinsics, if possible | ||||
|                     _result[current_vector][sample_idx] = local_code[local_code_chip_index[0]]; | ||||
|                     _result[current_vector][sample_idx + 1] = local_code[local_code_chip_index[1]]; | ||||
|                     _result[current_vector][sample_idx + 2] = local_code[local_code_chip_index[2]]; | ||||
|                     _result[current_vector][sample_idx + 3] = local_code[local_code_chip_index[3]]; | ||||
|                 } | ||||
|             _4output_index = _mm_add_ps(_4output_index, _4constant_float); | ||||
|             sample_idx += 4; | ||||
|         } | ||||
|  | ||||
|     for(number = quarterPoints * 4; number < num_output_samples; number++) | ||||
|         { | ||||
|             for(current_vector = 0; current_vector < num_out_vectors; current_vector++) | ||||
|                 { | ||||
|                     local_code_chip_index[0] = (int)(code_phase_step_chips * (float)(number) + rem_code_phase_chips[current_vector]); | ||||
|                     if (local_code_chip_index[0] < 0.0) local_code_chip_index[0] += code_length_chips - 1; | ||||
|                     if (local_code_chip_index[0] > (code_length_chips - 1)) local_code_chip_index[0] -= code_length_chips; | ||||
|                     _result[current_vector][number] = local_code[local_code_chip_index[0]]; | ||||
|                 } | ||||
|  | ||||
|         } | ||||
|  | ||||
| } | ||||
| #endif /* LV_HAVE_SSE2 */ | ||||
|  | ||||
| #endif /*INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H*/ | ||||
| @@ -57,16 +57,7 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t | ||||
|             test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex()); | ||||
|  | ||||
|     std::vector<volk_gnsssdr_test_case_t> test_cases = boost::assign::list_of | ||||
|         // no one uses these, so don't test them | ||||
|         //VOLK_PROFILE(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex); | ||||
|         //VOLK_PROFILE(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex); | ||||
|         //VOLK_PROFILE(volk_gnsssdr_16i_max_star_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex); | ||||
|         //VOLK_PROFILE(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex); | ||||
|         //VOLK_PROFILE(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex); | ||||
|         //VOLK_PROFILE(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex); | ||||
|         // we need a puppet for this one | ||||
|         //(VOLK_INIT_TEST(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f,   test_params)) | ||||
|         //(VOLK_INIT_TEST(volk_gnsssdr_32f_null_32f, test_params)) | ||||
|  | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_8i_accumulator_s8i, test_params)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_8i_index_max_16u, test_params)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_8i_max_s8i, test_params)) | ||||
| @@ -77,11 +68,13 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_8ic_x2_multiply_8ic, test_params)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_8u_x2_multiply_8u, test_params)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_64f_accumulator_64f, test_params)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_8ic, test_params)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_8ic, test_params_int1)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_16ic, test_params)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_dot_prod_16ic, test_params)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_multiply_16ic, test_params)) | ||||
|         (VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_dot_prod_16ic_xn, volk_gnsssdr_test_params_t(1e-2, test_params.scalar(), test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex()))) | ||||
|         //(VOLK_INIT_TEST(volk_gnsssdr_16ic_resampler_16ic, test_params)) | ||||
|         //(VOLK_INIT_TEST(volk_gnsssdr_16ic_xn_resampler_16ic_xn, test_params)) | ||||
|         ; | ||||
|  | ||||
|     return test_cases; | ||||
|   | ||||
| @@ -49,28 +49,36 @@ void random_floats (t *buf, unsigned n) | ||||
|     buf[i] = uniform (); | ||||
| } | ||||
|  | ||||
| void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n) { | ||||
| void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n) | ||||
| { | ||||
|     if(type.is_complex) n *= 2; | ||||
|     if(type.is_float) { | ||||
|     if(type.is_float) | ||||
|         { | ||||
|             if(type.size == 8) random_floats<double>((double *)data, n); | ||||
|             else random_floats<float>((float *)data, n); | ||||
|     } else { | ||||
|         } | ||||
|     else | ||||
|         { | ||||
|             float int_max = float(uint64_t(2) << (type.size*8)); | ||||
|             if(type.is_signed) int_max /= 2.0; | ||||
|         for(unsigned int i=0; i<n; i++) { | ||||
|             for(unsigned int i = 0; i < n; i++) | ||||
|                 { | ||||
|                     float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max; | ||||
|                     //man i really don't know how to do this in a more clever way, you have to cast down at some point | ||||
|             switch(type.size) { | ||||
|                     switch(type.size) | ||||
|                     { | ||||
|                     case 8: | ||||
|                         if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand; | ||||
|                         else ((uint64_t *)data)[i] = (uint64_t) scaled_rand; | ||||
|                         break; | ||||
|                     case 4: | ||||
|  | ||||
|                                 if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand; | ||||
|                                 else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; | ||||
|  | ||||
|                         break; | ||||
|                     case 2: | ||||
|                 if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand; | ||||
|                         if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand / 11; //sqrt(std::abs(scaled_rand / 2.0)); //// std::cout << "222222222222" << std::endl;} | ||||
|                         else ((uint16_t *)data)[i] = (uint16_t) scaled_rand; | ||||
|                         break; | ||||
|                     case 1: | ||||
| @@ -172,15 +180,21 @@ static void get_signatures_from_name(std::vector<volk_gnsssdr_type_t> &inputsig, | ||||
|             if(side == SIDE_INPUT) inputsig.push_back(type); | ||||
|             else outputsig.push_back(type); | ||||
|         } catch (...){ | ||||
|             if(token[0] == 'x' && (token.size() > 1) && (token[1] > '0' || token[1] < '9')) { //it's a multiplier | ||||
|             if(token[0] == 'x' && (token.size() > 1) && (token[1] > '0' || token[1] < '9')) { std::cout << "multiplier normsl" << std::endl;//it's a multiplier | ||||
|                 if(side == SIDE_INPUT) assert(inputsig.size() > 0); | ||||
|                 else assert(outputsig.size() > 0); | ||||
|                 int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid | ||||
|                 int multiplier = 1; | ||||
|                 try { | ||||
|                         multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid /////////// | ||||
|                 } catch(...) { | ||||
|                         multiplier = 1; std::cout << "multiplier 333333333" << std::endl; | ||||
|                 } | ||||
|                 for(int i=1; i<multiplier; i++) { | ||||
|                         if(side == SIDE_INPUT) inputsig.push_back(inputsig.back()); | ||||
|                         else outputsig.push_back(outputsig.back()); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             else if(side == SIDE_INPUT) { //it's the function name, at least it better be | ||||
|                 side = SIDE_NAME; | ||||
|                 fn_name.append("_"); | ||||
| @@ -268,6 +282,24 @@ inline void run_cast_test3_s8ic(volk_gnsssdr_fn_3arg_s8ic func, std::vector<void | ||||
| } | ||||
|  | ||||
|  | ||||
| // new | ||||
| inline void run_cast_test1_s16ic(volk_gnsssdr_fn_1arg_s16ic func, std::vector<void *> &buffs, lv_16sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) | ||||
| { | ||||
|     while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); | ||||
| } | ||||
|  | ||||
| inline void run_cast_test2_s16ic(volk_gnsssdr_fn_2arg_s16ic func, std::vector<void *> &buffs, lv_16sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) | ||||
| { | ||||
|     while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); | ||||
| } | ||||
|  | ||||
| inline void run_cast_test3_s16ic(volk_gnsssdr_fn_3arg_s16ic func, std::vector<void *> &buffs, lv_16sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) | ||||
| { | ||||
|     while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); | ||||
| } | ||||
|  | ||||
| // end new | ||||
|  | ||||
| inline void run_cast_test8(volk_gnsssdr_fn_8arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) | ||||
| { | ||||
|     while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], vlen, arch.c_str()); | ||||
| @@ -439,14 +471,14 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, | ||||
|                     unsigned int iter, | ||||
|                     std::vector<volk_gnsssdr_test_results_t> *results, | ||||
|                     std::string puppet_master_name, | ||||
|                     bool benchmark_mode | ||||
| ) { | ||||
|                     bool benchmark_mode) | ||||
| { | ||||
|     // Initialize this entry in results vector | ||||
|     results->push_back(volk_gnsssdr_test_results_t()); | ||||
|     results->back().name = name; | ||||
|     results->back().vlen = vlen; | ||||
|     results->back().iter = iter; | ||||
|     std::cout << "RUN_VOLK_TESTS: " << name << "(" << vlen << "," << iter << ")" << std::endl; | ||||
|     std::cout << "RUN_VOLK_GNSSSDR_TESTS: " << name << "(" << vlen << "," << iter << ")" << std::endl; | ||||
|  | ||||
|     // vlen_twiddle will increase vlen for malloc and data generation | ||||
|     // but kernels will still be called with the user provided vlen. | ||||
| @@ -545,9 +577,16 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, | ||||
|             else if(inputsc.size() == 1 && !inputsc[0].is_float) | ||||
|                 { | ||||
|                     if(inputsc[0].is_complex) | ||||
|                         { | ||||
|                             if(inputsc[0].size == 2) | ||||
|                                 { | ||||
|                                     run_cast_test1_s16ic((volk_gnsssdr_fn_1arg_s16ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); | ||||
|                                 } | ||||
|                             else | ||||
|                                 { | ||||
|                                     run_cast_test1_s8ic((volk_gnsssdr_fn_1arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); | ||||
|                                 } | ||||
|                         } | ||||
|                     else | ||||
|                         { | ||||
|                             run_cast_test1_s8i((volk_gnsssdr_fn_1arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); | ||||
| @@ -557,6 +596,7 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, | ||||
|             else throw "unsupported 1 arg function >1 scalars"; | ||||
|             break; | ||||
|         case 2: | ||||
|             //std::cout << "case 2 " << inputsc.size() << std::endl; | ||||
|             if(inputsc.size() == 0) | ||||
|                 { | ||||
|                     run_cast_test2((volk_gnsssdr_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); | ||||
| @@ -576,9 +616,16 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, | ||||
|             else if(inputsc.size() == 1 && !inputsc[0].is_float) | ||||
|                 { | ||||
|                     if(inputsc[0].is_complex) | ||||
|                         { | ||||
|                             if(inputsc[0].size == 2) | ||||
|                                 { | ||||
|                                     run_cast_test2_s16ic((volk_gnsssdr_fn_2arg_s16ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); | ||||
|                                 } | ||||
|                             else | ||||
|                                 { | ||||
|                                     run_cast_test2_s8ic((volk_gnsssdr_fn_2arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); | ||||
|                                 } | ||||
|                         } | ||||
|                     else | ||||
|                         { | ||||
|                             run_cast_test2_s8i((volk_gnsssdr_fn_2arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); | ||||
| @@ -590,6 +637,7 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, | ||||
|         case 3: | ||||
|             if(inputsc.size() == 0) | ||||
|                 { | ||||
|                     // multipliers are here! | ||||
|                     run_cast_test3((volk_gnsssdr_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); | ||||
|                 } | ||||
|             else if(inputsc.size() == 1 && inputsc[0].is_float) | ||||
| @@ -607,9 +655,18 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, | ||||
|             else if(inputsc.size() == 1 && !inputsc[0].is_float) | ||||
|                 { | ||||
|                     if(inputsc[0].is_complex) | ||||
|                         { | ||||
|                             { | ||||
|                                  if(inputsc[0].size == 4) | ||||
|                                      { | ||||
|                                          run_cast_test3_s16ic((volk_gnsssdr_fn_3arg_s16ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); | ||||
|                                      } | ||||
|                                  else | ||||
|                                      { | ||||
|                                          run_cast_test3_s8ic((volk_gnsssdr_fn_3arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); | ||||
|                                      } | ||||
|                              } | ||||
|                         } | ||||
|                     else | ||||
|                         { | ||||
|                             run_cast_test3_s8i((volk_gnsssdr_fn_3arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); | ||||
| @@ -650,7 +707,7 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, | ||||
|     bool fail; | ||||
|     bool fail_global = false; | ||||
|     std::vector<bool> arch_results; | ||||
|     for(size_t i=0; i<arch_list.size(); i++) | ||||
|     for(size_t i = 0; i < arch_list.size(); i++) | ||||
|         { | ||||
|             fail = false; | ||||
|             if(i != generic_offset) | ||||
| @@ -698,16 +755,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, | ||||
|                                             } | ||||
|                                         break; | ||||
|                                     case 4: | ||||
|                                         if(both_sigs[j].is_signed) | ||||
|                                         if(both_sigs[j].is_complex) | ||||
|                                             { | ||||
|                                                 fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); | ||||
|                                             } | ||||
|                                         else | ||||
|                                             { | ||||
|                                                 fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); | ||||
|                                             } | ||||
|                                         break; | ||||
|                                     case 2: | ||||
|                                                 if(both_sigs[j].is_signed) | ||||
|                                                     { | ||||
|                                                         fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); | ||||
| @@ -716,11 +765,47 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, | ||||
|                                                     { | ||||
|                                                         fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); | ||||
|                                                     } | ||||
|                                             } | ||||
|                                         else | ||||
|                                             { | ||||
|                                                 if(both_sigs[j].is_signed) | ||||
|                                                     { | ||||
|                                                         fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); | ||||
|                                                     } | ||||
|                                                 else | ||||
|                                                     { | ||||
|                                                         fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); | ||||
|                                                     } | ||||
|                                             } | ||||
|                                         break; | ||||
|                                     case 2: | ||||
|                                         if(both_sigs[j].is_complex) | ||||
|                                             { | ||||
|                                                 if(both_sigs[j].is_signed) | ||||
|                                                     { | ||||
|                                                         fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); | ||||
|                                                     } | ||||
|                                                 else | ||||
|                                                     { | ||||
|                                                         fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); | ||||
|                                                     } | ||||
|                                             } | ||||
|                                         else | ||||
|                                             { | ||||
|                                                 if(both_sigs[j].is_signed) | ||||
|                                                     { | ||||
|                                                         fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); // | ||||
|                                                     } | ||||
|                                                 else | ||||
|                                                     { | ||||
|                                                         fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); | ||||
|                                                     } | ||||
|                                             } | ||||
|                                         break; | ||||
|                                     case 1: | ||||
|                                         if(both_sigs[j].is_signed) | ||||
|                                             { | ||||
|                                                 fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i); | ||||
|                                                 fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), 3); // check volk_gnsssdr_32fc_convert_8ic ! | ||||
|                                             } | ||||
|                                         else | ||||
|                                             { | ||||
|   | ||||
| @@ -173,9 +173,9 @@ typedef void (*volk_gnsssdr_fn_3arg_s8ic)(void *, void *, void *, lv_8sc_t, unsi | ||||
| //typedef void (*volk_gnsssdr_fn_1arg_s16i)(void *, int16_t, unsigned int, const char*); //one input vector, one scalar int16_t input | ||||
| //typedef void (*volk_gnsssdr_fn_2arg_s16i)(void *, void *, int16_t, unsigned int, const char*); | ||||
| //typedef void (*volk_gnsssdr_fn_3arg_s16i)(void *, void *, void *, int16_t, unsigned int, const char*); | ||||
| //typedef void (*volk_gnsssdr_fn_1arg_s16ic)(void *, lv_16sc_t, unsigned int, const char*); //one input vector, one scalar lv_16sc_t vector input | ||||
| //typedef void (*volk_gnsssdr_fn_2arg_s16ic)(void *, void *, lv_16sc_t, unsigned int, const char*); | ||||
| //typedef void (*volk_gnsssdr_fn_3arg_s16ic)(void *, void *, void *, lv_16sc_t, unsigned int, const char*); | ||||
| typedef void (*volk_gnsssdr_fn_1arg_s16ic)(void *, lv_16sc_t, unsigned int, const char*); //one input vector, one scalar lv_16sc_t vector input | ||||
| typedef void (*volk_gnsssdr_fn_2arg_s16ic)(void *, void *, lv_16sc_t, unsigned int, const char*); | ||||
| typedef void (*volk_gnsssdr_fn_3arg_s16ic)(void *, void *, void *, lv_16sc_t, unsigned int, const char*); | ||||
| typedef void (*volk_gnsssdr_fn_6arg_s16ic)(void *, void *, void *, void *, void *, void *, lv_16sc_t, unsigned int, const char*); | ||||
|  | ||||
| typedef void (*volk_gnsssdr_fn_8arg)(void *, void *, void *, void *, void *, void *, void *, void *, unsigned int, const char*); | ||||
|   | ||||
| @@ -31,18 +31,13 @@ | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  */ | ||||
|  | ||||
| #include "cpu_multicorrelator_16sc.h" | ||||
| #include <cmath> | ||||
| #include <iostream> | ||||
| #include <gnuradio/fxpt.h>  // fixed point sine and cosine | ||||
|  | ||||
| #include "volk_gnsssdr/volk_gnsssdr.h" | ||||
|  | ||||
| #define LV_HAVE_GENERIC | ||||
| #define LV_HAVE_SSE2 | ||||
|  | ||||
| #include "volk_gnsssdr_16ic_xn_resampler_16ic_xn.h" | ||||
| #include "volk_gnsssdr_16ic_xn_dot_prod_16ic_xn.h" | ||||
|  | ||||
| bool cpu_multicorrelator_16sc::init( | ||||
|         int max_signal_length_samples, | ||||
| @@ -99,7 +94,7 @@ void cpu_multicorrelator_16sc::update_local_code(int correlator_length_samples,f | ||||
| 		tmp_code_phases_chips[n] = d_shifts_chips[n] - rem_code_phase_chips; | ||||
| 	} | ||||
|  | ||||
| 	volk_gnsssdr_16ic_xn_resampler_16ic_xn_sse2(d_local_codes_resampled, | ||||
| 	volk_gnsssdr_16ic_xn_resampler_16ic_xn(d_local_codes_resampled, | ||||
| 			d_local_code_in, | ||||
| 			tmp_code_phases_chips, | ||||
| 			code_phase_step_chips, | ||||
| @@ -153,7 +148,7 @@ bool cpu_multicorrelator_16sc::Carrier_wipeoff_multicorrelator_resampler( | ||||
|     //std::cout<<"d_sig_doppler_wiped 16sc="<<d_sig_doppler_wiped[23]<<std::endl; | ||||
|     update_local_code(signal_length_samples, rem_code_phase_chips, code_phase_step_chips); | ||||
|  | ||||
|     volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_a_sse2(d_corr_out, d_sig_doppler_wiped, (const lv_16sc_t**)d_local_codes_resampled, signal_length_samples, d_n_correlators); | ||||
|     volk_gnsssdr_16ic_x2_dot_prod_16ic_xn(d_corr_out, d_sig_doppler_wiped, (const lv_16sc_t**)d_local_codes_resampled, signal_length_samples, d_n_correlators); | ||||
|  | ||||
|     //for (int current_correlator_tap = 0; current_correlator_tap < d_n_correlators; current_correlator_tap++) | ||||
|     //    { | ||||
|   | ||||
| @@ -1,172 +0,0 @@ | ||||
| /*! | ||||
|  * \file volk_gnsssdr_16ic_xn_resampler_16ic_xn.h | ||||
|  * \brief Volk protokernel: resample a 16 bits complex vector | ||||
|  * \authors <ul> | ||||
|  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es | ||||
|  *          </ul> | ||||
|  * | ||||
|  * Volk protokernel that multiplies two 16 bits vectors (8 bits the real part  | ||||
|  * and 8 bits the imaginary part) and accumulates them | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  * | ||||
|  * Copyright (C) 2010-2015  (see AUTHORS file for a list of contributors) | ||||
|  * | ||||
|  * GNSS-SDR is a software defined Global Navigation | ||||
|  *          Satellite Systems receiver | ||||
|  * | ||||
|  * This file is part of GNSS-SDR. | ||||
|  * | ||||
|  * GNSS-SDR is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * GNSS-SDR is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  */ | ||||
|  | ||||
| #ifndef INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H | ||||
| #define INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H | ||||
|  | ||||
| #include <volk_gnsssdr/volk_gnsssdr_common.h> | ||||
| #include <volk_gnsssdr/volk_gnsssdr_complex.h> | ||||
| #include <cmath> | ||||
| //#pragma STDC FENV_ACCESS ON | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
|  | ||||
| //int round_int( float r ) { | ||||
| //    return (r > 0.0) ? (r + 0.5) : (r - 0.5); | ||||
| //} | ||||
| /*! | ||||
|  \brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector | ||||
|  \param cVector The vector where the result will be stored | ||||
|  \param aVector One of the vectors to be multiplied | ||||
|  \param bVector One of the vectors to be multiplied | ||||
|  \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector | ||||
|  */ | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors) | ||||
| { | ||||
|     int local_code_chip_index; | ||||
|     //fesetround(FE_TONEAREST); | ||||
|     for (int current_vector = 0; current_vector < num_out_vectors; current_vector++) | ||||
|     { | ||||
| 		for (unsigned int n = 0; n < num_output_samples; n++) | ||||
| 			{ | ||||
| 				// resample code for current tap | ||||
| 				local_code_chip_index = round(code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips[current_vector]-0.5f); | ||||
| 				if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips; | ||||
| 				if (local_code_chip_index > (code_length_chips-1)) local_code_chip_index -= code_length_chips; | ||||
| 				//std::cout<<"g["<<n<<"]="<<code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips-0.5f<<","<<local_code_chip_index<<" "; | ||||
| 				result[current_vector][n] = local_code[local_code_chip_index]; | ||||
| 			} | ||||
|     } | ||||
|     //std::cout<<std::endl; | ||||
| } | ||||
|  | ||||
| #endif /*LV_HAVE_GENERIC*/ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_SSE2 | ||||
| #include <emmintrin.h> | ||||
| static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int num_output_samples, unsigned int code_length_chips, int num_out_vectors) | ||||
| { | ||||
|  | ||||
| 	_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO | ||||
|     unsigned int number; | ||||
|     const unsigned int quarterPoints = num_output_samples / 4; | ||||
|  | ||||
| 	lv_16sc_t** _result = result; | ||||
|     __attribute__((aligned(16))) int local_code_chip_index[4]; | ||||
|     float tmp_rem_code_phase_chips; | ||||
|     __m128 _rem_code_phase,_code_phase_step_chips; | ||||
|     __m128i _code_length_chips,_code_length_chips_minus1; | ||||
|     __m128 _code_phase_out,_code_phase_out_with_offset; | ||||
|  | ||||
|     _code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register | ||||
|     __attribute__((aligned(16))) int four_times_code_length_chips_minus1[4]; | ||||
|     four_times_code_length_chips_minus1[0]=code_length_chips-1; | ||||
|     four_times_code_length_chips_minus1[1]=code_length_chips-1; | ||||
|     four_times_code_length_chips_minus1[2]=code_length_chips-1; | ||||
|     four_times_code_length_chips_minus1[3]=code_length_chips-1; | ||||
|  | ||||
|     __attribute__((aligned(16))) int four_times_code_length_chips[4]; | ||||
|     four_times_code_length_chips[0]=code_length_chips; | ||||
|     four_times_code_length_chips[1]=code_length_chips; | ||||
|     four_times_code_length_chips[2]=code_length_chips; | ||||
|     four_times_code_length_chips[3]=code_length_chips; | ||||
|  | ||||
|     _code_length_chips = _mm_loadu_si128((__m128i*)&four_times_code_length_chips); //load float to all four float values in m128 register | ||||
|     _code_length_chips_minus1 = _mm_loadu_si128((__m128i*)&four_times_code_length_chips_minus1); //load float to all four float values in m128 register | ||||
|  | ||||
|     __m128i negative_indexes, overflow_indexes,_code_phase_out_int, _code_phase_out_int_neg,_code_phase_out_int_over; | ||||
|  | ||||
|     __m128i zero=_mm_setzero_si128(); | ||||
|  | ||||
|     __attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f }; | ||||
|     __m128 _4output_index=_mm_load_ps(init_idx_float); | ||||
|     __attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f }; | ||||
|     __m128 _4constant_float=_mm_load_ps(init_4constant_float); | ||||
|  | ||||
|     int current_vector=0; | ||||
|     int sample_idx=0; | ||||
|     for(number=0;number < quarterPoints; number++){ | ||||
|     	//common to all outputs | ||||
|     	_code_phase_out = _mm_mul_ps(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step | ||||
|  | ||||
|     	//output vector dependant (different code phase offset) | ||||
|     	for(current_vector=0;current_vector<num_out_vectors;current_vector++) | ||||
|     	{ | ||||
|     		tmp_rem_code_phase_chips=rem_code_phase_chips[current_vector]-0.5f; // adjust offset to perform correct rounding (chip transition at 0) | ||||
|     	    _rem_code_phase =  _mm_load1_ps(&tmp_rem_code_phase_chips); //load float to all four float values in m128 register | ||||
|  | ||||
| 			_code_phase_out_with_offset = _mm_add_ps(_code_phase_out,_rem_code_phase); //add the phase offset | ||||
| 			_code_phase_out_int=_mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer | ||||
|  | ||||
| 			negative_indexes=_mm_cmplt_epi32 (_code_phase_out_int, zero); //test for negative values | ||||
| 			_code_phase_out_int_neg=_mm_add_epi32(_code_phase_out_int,_code_length_chips); //the negative values branch | ||||
| 			_code_phase_out_int_neg=_mm_xor_si128(_code_phase_out_int,_mm_and_si128( negative_indexes,_mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int ))); | ||||
|  | ||||
| 			overflow_indexes=_mm_cmpgt_epi32  (_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values | ||||
| 			_code_phase_out_int_over=_mm_sub_epi32(_code_phase_out_int_neg,_code_length_chips); //the negative values branch | ||||
| 			_code_phase_out_int_over=_mm_xor_si128(_code_phase_out_int_neg,_mm_and_si128( overflow_indexes,_mm_xor_si128( _code_phase_out_int_over, _code_phase_out_int_neg ))); | ||||
|  | ||||
| 			_mm_storeu_si128((__m128i*)local_code_chip_index,_code_phase_out_int_over); // Store the results back | ||||
|  | ||||
| 			//todo: optimize the local code lookup table with intrinsics, if possible | ||||
| 			_result[current_vector][sample_idx]=local_code[local_code_chip_index[0]]; | ||||
| 			_result[current_vector][sample_idx+1]=local_code[local_code_chip_index[1]]; | ||||
| 			_result[current_vector][sample_idx+2]=local_code[local_code_chip_index[2]]; | ||||
| 			_result[current_vector][sample_idx+3]=local_code[local_code_chip_index[3]]; | ||||
|  | ||||
|  | ||||
|     	} | ||||
|     	_4output_index = _mm_add_ps(_4output_index,_4constant_float); | ||||
|     	sample_idx+=4; | ||||
|     } | ||||
|  | ||||
|     for(number = quarterPoints * 4;number < num_output_samples; number++){ | ||||
|  | ||||
|     	for(current_vector=0;current_vector<num_out_vectors;current_vector++) | ||||
|     	{ | ||||
| 			local_code_chip_index[0]=static_cast<int>(code_phase_step_chips*static_cast<float>(number) + rem_code_phase_chips[current_vector]); | ||||
| 			if (local_code_chip_index[0] < 0.0) local_code_chip_index[0] += code_length_chips-1; | ||||
| 			if (local_code_chip_index[0]  > (code_length_chips-1)) local_code_chip_index[0] -= code_length_chips; | ||||
| 			_result[current_vector][number]=local_code[local_code_chip_index[0]]; | ||||
|     	} | ||||
|  | ||||
|     } | ||||
|  | ||||
| } | ||||
| #endif /* LV_HAVE_SSE2 */ | ||||
|  | ||||
| #endif /*INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_H*/ | ||||
		Reference in New Issue
	
	Block a user
	 Carles Fernandez
					Carles Fernandez