mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-30 23:03:05 +00:00 
			
		
		
		
	Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next
This commit is contained in:
		| @@ -48,6 +48,7 @@ Downloading, building and installing [GNU Radio](http://gnuradio.org/redmine/pro | ||||
| ~~~~~~ | ||||
| $ git clone https://github.com/gnuradio/pybombs.git | ||||
| $ cd pybombs | ||||
| $ python setup.py build | ||||
| $ sudo python setup.py install | ||||
| $ pybombs recipes add gr-recipes https://github.com/gnuradio/gr-recipes.git | ||||
| $ pybombs recipes add gr-etcetera https://github.com/gnuradio/gr-etcetera.git | ||||
| @@ -408,7 +409,7 @@ Install Armadillo and dependencies: | ||||
| $ brew tap homebrew/science | ||||
| $ brew install cmake hdf5 arpack superlu | ||||
| $ brew install armadillo | ||||
| $ brew install glog gflags | ||||
| $ brew install glog gflags gnutls | ||||
| ~~~~~~ | ||||
|  | ||||
| #### Build GNSS-SDR | ||||
|   | ||||
| @@ -163,19 +163,26 @@ int gps_l1_ca_pvt_cc::general_work (int noutput_items, gr_vector_int &ninput_ite | ||||
|     Gnss_Synchro **in = (Gnss_Synchro **)  &input_items[0]; //Get the input pointer | ||||
|     //Gnss_Synchro **out = (Gnss_Synchro **)  &output_items[0]; //Get the output pointer | ||||
|  | ||||
|     // ############ 1. READ EPHEMERIS FROM GLOBAL MAP #### | ||||
|     d_ls_pvt->gps_ephemeris_map = global_gps_ephemeris_map.get_map_copy(); | ||||
|  | ||||
|     for (unsigned int i = 0; i < d_nchannels; i++) | ||||
|         { | ||||
|             std::map<int,Gps_Ephemeris>::iterator gps_ephemeris_iter; | ||||
|             gps_ephemeris_iter = d_ls_pvt->gps_ephemeris_map.begin(); | ||||
|             if (in[i][0].Flag_valid_pseudorange == true) | ||||
|                 { | ||||
|                     gnss_pseudoranges_map.insert(std::pair<int,Gnss_Synchro>(in[i][0].PRN, in[i][0])); // store valid pseudoranges in a map | ||||
|                     d_rx_time = in[i][0].d_TOW_at_current_symbol; // all the channels have the same RX timestamp (common RX time pseudoranges) | ||||
|                     d_rtcm_printer->lock_time(gps_ephemeris_iter->second, d_rx_time, in[i][0]); // keep track of locking time | ||||
|                 } | ||||
|             else | ||||
|                 { | ||||
|                     d_rtcm_printer->lock_time(gps_ephemeris_iter->second, 0.0, in[i][0]); | ||||
|                 } | ||||
|         } | ||||
|  | ||||
|     // ############ 1. READ EPHEMERIS/UTC_MODE/IONO FROM GLOBAL MAPS #### | ||||
|  | ||||
|     d_ls_pvt->gps_ephemeris_map = global_gps_ephemeris_map.get_map_copy(); | ||||
|  | ||||
|     // ############ READ UTC_MODEL/IONO FROM GLOBAL MAPS #### | ||||
|     if (global_gps_utc_model_map.size() > 0) | ||||
|         { | ||||
|             // UTC MODEL data is shared for all the GPS satellites. Read always at a locked channel | ||||
| @@ -337,7 +344,7 @@ int gps_l1_ca_pvt_cc::general_work (int noutput_items, gr_vector_int &ninput_ite | ||||
|                                     if (gps_ephemeris_iter != d_ls_pvt->gps_ephemeris_map.end()) | ||||
|                                         { | ||||
|                                             d_rtcm_printer->Print_Rtcm_MT1019(gps_ephemeris_iter->second); | ||||
|                                             d_rtcm_printer->Print_Rtcm_MT1002(gps_ephemeris_iter->second, d_rx_time, gnss_pseudoranges_map); | ||||
|                                             d_rtcm_printer->Print_Rtcm_MSM(1, gps_ephemeris_iter->second, {}, {}, d_rx_time, gnss_pseudoranges_map, 1234, 0, 0, 0, 0, 0); | ||||
|                                         } | ||||
|                                     b_rtcm_writing_started = true; | ||||
|                                 } | ||||
|   | ||||
| @@ -304,3 +304,9 @@ std::string Rtcm_Printer::print_MT1005_test() | ||||
|     std::string test = rtcm->print_MT1005_test(); | ||||
|     return test; | ||||
| } | ||||
|  | ||||
|  | ||||
| unsigned int Rtcm_Printer::lock_time(const Gps_Ephemeris& gps_eph, double obs_time, const Gnss_Synchro & gnss_synchro) | ||||
| { | ||||
|     return rtcm->lock_time(gps_eph, obs_time, gnss_synchro); | ||||
| } | ||||
|   | ||||
| @@ -73,6 +73,7 @@ public: | ||||
|             bool more_messages); | ||||
|  | ||||
|     std::string print_MT1005_test(); //<!  For testing purposes | ||||
|     unsigned int lock_time(const Gps_Ephemeris& gps_eph, double obs_time, const Gnss_Synchro & gnss_synchro); | ||||
|  | ||||
| private: | ||||
|     std::string rtcm_filename; // String with the RTCM log filename | ||||
|   | ||||
| @@ -38,9 +38,6 @@ | ||||
|  | ||||
| #include <volk_gnsssdr/volk_gnsssdr_complex.h> | ||||
| #include <math.h> | ||||
| #include <stdio.h> | ||||
|  | ||||
| #define ROTATOR_RELOAD 512 | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
| @@ -56,20 +53,9 @@ | ||||
| static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points) | ||||
| { | ||||
|     unsigned int i = 0; | ||||
|     int j = 0; | ||||
|     lv_16sc_t tmp16; | ||||
|     lv_32fc_t tmp32; | ||||
|     for(i = 0; i < (unsigned int)(num_points / ROTATOR_RELOAD); ++i) | ||||
|         { | ||||
|             for(j = 0; j < ROTATOR_RELOAD; ++j) | ||||
|                 { | ||||
|                     tmp16 = *inVector++; | ||||
|                     tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
|                     *outVector++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32))); | ||||
|                     (*phase) *= phase_inc; | ||||
|                 } | ||||
|         } | ||||
|     for(i = 0; i < num_points % ROTATOR_RELOAD; ++i) | ||||
|     for(i = 0; i < (unsigned int)(num_points); ++i) | ||||
|         { | ||||
|             tmp16 = *inVector++; | ||||
|             tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase); | ||||
|   | ||||
| @@ -78,7 +78,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu | ||||
|  \param[in]  num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated | ||||
|  \param[in]  num_points    The Number of complex values to be multiplied together, accumulated and stored into result | ||||
|  */ | ||||
| static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| { | ||||
|     lv_16sc_t dotProduct = lv_cmake(0,0); | ||||
|  | ||||
| @@ -86,7 +86,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, | ||||
|  | ||||
|     const lv_16sc_t** _in_a = in_a; | ||||
|     const lv_16sc_t* _in_common = in_common; | ||||
|     lv_16sc_t* _out = out; | ||||
|     lv_16sc_t* _out = result; | ||||
|  | ||||
|     if (sse_iters > 0) | ||||
|         { | ||||
| @@ -100,7 +100,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, | ||||
|             realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 | ||||
|             imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 | ||||
|  | ||||
|             __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result; | ||||
|             __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, results; | ||||
|  | ||||
|             mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); | ||||
|             mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); | ||||
| @@ -141,9 +141,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, | ||||
|                     realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real); | ||||
|                     imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag); | ||||
|  | ||||
|                     result = _mm_or_si128 (realcacc[n_vec], imagcacc[n_vec]); | ||||
|                     results = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]); | ||||
|  | ||||
|                     _mm_store_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector | ||||
|                     _mm_store_si128((__m128i*)dotProductVector, results); // Store the results back into the dot product vector | ||||
|                     dotProduct = lv_cmake(0,0); | ||||
|                     for (int i = 0; i < 4; ++i) | ||||
|                         { | ||||
| @@ -181,7 +181,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, | ||||
|  \param[in]  num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated | ||||
|  \param[in]  num_points    The Number of complex values to be multiplied together, accumulated and stored into result | ||||
|  */ | ||||
| static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| { | ||||
|     lv_16sc_t dotProduct = lv_cmake(0,0); | ||||
|  | ||||
| @@ -189,7 +189,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, | ||||
|  | ||||
|     const lv_16sc_t** _in_a = in_a; | ||||
|     const lv_16sc_t* _in_common = in_common; | ||||
|     lv_16sc_t* _out = out; | ||||
|     lv_16sc_t* _out = result; | ||||
|  | ||||
|     if (sse_iters > 0) | ||||
|         { | ||||
| @@ -203,7 +203,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, | ||||
|             realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 | ||||
|             imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 | ||||
|  | ||||
|             __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result; | ||||
|             __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, results; | ||||
|  | ||||
|             mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); | ||||
|             mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); | ||||
| @@ -244,9 +244,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, | ||||
|                     realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real); | ||||
|                     imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag); | ||||
|  | ||||
|                     result = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]); | ||||
|                     results = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]); | ||||
|  | ||||
|                     _mm_storeu_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector | ||||
|                     _mm_storeu_si128((__m128i*)dotProductVector, results); // Store the results back into the dot product vector | ||||
|                     dotProduct = lv_cmake(0,0); | ||||
|                     for (int i = 0; i < 4; ++i) | ||||
|                         { | ||||
| @@ -284,7 +284,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, | ||||
|  \param[in]  num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated | ||||
|  \param[in]  num_points    The Number of complex values to be multiplied together, accumulated and stored into result | ||||
|  */ | ||||
| static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| { | ||||
|     lv_16sc_t dotProduct = lv_cmake(0,0); | ||||
|  | ||||
| @@ -292,7 +292,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, co | ||||
|  | ||||
|     const lv_16sc_t** _in_a = in_a; | ||||
|     const lv_16sc_t* _in_common = in_common; | ||||
|     lv_16sc_t* _out = out; | ||||
|     lv_16sc_t* _out = result; | ||||
|  | ||||
|     if (neon_iters > 0) | ||||
|         { | ||||
| @@ -319,7 +319,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, co | ||||
|                     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                         { | ||||
|                             a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg | ||||
|                             //__builtin_prefetch(_in_a[n_vec] + 8); | ||||
|                             //__builtin_prefetch(&_in_a[n_vec][number*4] + 8); | ||||
|  | ||||
|                             // multiply the real*real and imag*imag to get real result | ||||
|                             // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r | ||||
|   | ||||
| @@ -49,8 +49,9 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic(lv_16sc_t* | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|     { | ||||
|        in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); | ||||
|        memcpy(in_a[n], in, sizeof(lv_16sc_t) * num_points); | ||||
|        memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); | ||||
|     } | ||||
|  | ||||
|     volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
| @@ -72,6 +73,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* r | ||||
|        in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); | ||||
|        memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); | ||||
|     } | ||||
|  | ||||
|     volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
| @@ -92,8 +94,9 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_sse2(lv_16sc_t* r | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|     { | ||||
|        in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment()); | ||||
|        memcpy(in_a[n], in, sizeof(lv_16sc_t)*num_points); | ||||
|        memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points); | ||||
|     } | ||||
|  | ||||
|     volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
| @@ -114,8 +117,9 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon(lv_16sc_t* res | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|     { | ||||
|        in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment()); | ||||
|        memcpy(in_a[n], in, sizeof(lv_16sc_t)*num_points); | ||||
|        memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points); | ||||
|     } | ||||
|  | ||||
|     volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|   | ||||
| @@ -1,11 +1,14 @@ | ||||
| /*! | ||||
|  * \file volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h | ||||
|  * \brief Volk protokernel: multiplies N 16 bits vectors by a common vector phase rotated and accumulates the results in N 16 bits short complex outputs. | ||||
|  * \brief Volk protokernel: multiplies N 16 bits vectors by a common vector | ||||
|  * phase rotated and accumulates the results in N 16 bits short complex outputs. | ||||
|  * \authors <ul> | ||||
|  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es | ||||
|  *          </ul> | ||||
|  * | ||||
|  * Volk protokernel that multiplies N 16 bits vectors by a common vector, which is phase-rotated by phase offset and phase increment, and accumulates the results in N 16 bits short complex outputs. | ||||
|  * Volk protokernel that multiplies N 16 bits vectors by a common vector, which is | ||||
|  * phase-rotated by phase offset and phase increment, and accumulates the results | ||||
|  * in N 16 bits short complex outputs. | ||||
|  * It is optimized to perform the N tap correlation process in GNSS receivers. | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
| @@ -39,12 +42,15 @@ | ||||
|  | ||||
| #include <volk_gnsssdr/volk_gnsssdr_complex.h> | ||||
| #include <volk_gnsssdr/saturation_arithmetic.h> | ||||
| #include <math.h> | ||||
|  | ||||
| #ifdef LV_HAVE_GENERIC | ||||
| /*! | ||||
|  \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector | ||||
|  \brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector | ||||
|  \param[out]    result        Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored | ||||
|  \param[in]  in_common     Pointer to one of the vectors to be multiplied and accumulated (reference vector) | ||||
|  \param[in]     in_common     Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector) | ||||
|  \param[in]     phase_inc     Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)) | ||||
|  \param[in,out] phase         Initial / final phase | ||||
|  \param[in]     in_a          Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated | ||||
|  \param[in]     num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated | ||||
|  \param[in]     num_points    The Number of complex values to be multiplied together, accumulated and stored into result | ||||
| @@ -78,14 +84,16 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc | ||||
| #include <pmmintrin.h> | ||||
|  | ||||
| /*! | ||||
|  \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector | ||||
|  \brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector | ||||
|  \param[out]    result        Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored | ||||
|  \param[in]  in_common     Pointer to one of the vectors to be multiplied and accumulated (reference vector) | ||||
|  \param[in]     in_common     Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector) | ||||
|  \param[in]     phase_inc     Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)) | ||||
|  \param[in,out] phase         Initial / final phase | ||||
|  \param[in]     in_a          Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated | ||||
|  \param[in]     num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated | ||||
|  \param[in]     num_points    The Number of complex values to be multiplied together, accumulated and stored into result | ||||
|  */ | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| { | ||||
|     lv_16sc_t dotProduct = lv_cmake(0,0); | ||||
|  | ||||
| @@ -93,7 +101,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ | ||||
|  | ||||
|     const lv_16sc_t** _in_a = in_a; | ||||
|     const lv_16sc_t* _in_common = in_common; | ||||
| 	lv_16sc_t* _out = out; | ||||
|     lv_16sc_t* _out = result; | ||||
|  | ||||
|     __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; | ||||
|  | ||||
| @@ -105,7 +113,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ | ||||
|     realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 | ||||
|     imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 | ||||
|  | ||||
| 	__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result; | ||||
|     __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, results; | ||||
|  | ||||
|     mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); | ||||
|     mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); | ||||
| @@ -192,7 +200,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ | ||||
|  | ||||
|                     realcacc[n_vec] = _mm_adds_epi16 (realcacc[n_vec], real); | ||||
|                     imagcacc[n_vec] = _mm_adds_epi16 (imagcacc[n_vec], imag); | ||||
|  | ||||
|                 } | ||||
|         } | ||||
|  | ||||
| @@ -201,9 +208,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ | ||||
|             realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real); | ||||
|             imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag); | ||||
|  | ||||
| 		result = _mm_or_si128 (realcacc[n_vec], imagcacc[n_vec]); | ||||
|             results = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]); | ||||
|  | ||||
| 		_mm_store_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector | ||||
|             _mm_store_si128((__m128i*)dotProductVector, results); // Store the results back into the dot product vector | ||||
|             dotProduct = lv_cmake(0,0); | ||||
|             for (int i = 0; i < 4; ++i) | ||||
|                 { | ||||
| @@ -239,14 +246,16 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ | ||||
| #include <pmmintrin.h> | ||||
|  | ||||
| /*! | ||||
|  \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector | ||||
|  \brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector | ||||
|  \param[out]    result        Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored | ||||
|  \param[in]  in_common     Pointer to one of the vectors to be multiplied and accumulated (reference vector) | ||||
|  \param[in]     in_common     Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector) | ||||
|  \param[in]     phase_inc     Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)) | ||||
|  \param[in,out] phase         Initial / final phase | ||||
|  \param[in]     in_a          Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated | ||||
|  \param[in]     num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated | ||||
|  \param[in]     num_points    The Number of complex values to be multiplied together, accumulated and stored into result | ||||
|  */ | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| { | ||||
|     lv_16sc_t dotProduct = lv_cmake(0,0); | ||||
|  | ||||
| @@ -254,7 +263,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ | ||||
|  | ||||
|     const lv_16sc_t** _in_a = in_a; | ||||
|     const lv_16sc_t* _in_common = in_common; | ||||
| 	lv_16sc_t* _out = out; | ||||
|     lv_16sc_t* _out = result; | ||||
|  | ||||
|     __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; | ||||
|  | ||||
| @@ -266,7 +275,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ | ||||
|     realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 | ||||
|     imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 | ||||
|  | ||||
| 	__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result; | ||||
|     __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, results; | ||||
|  | ||||
|     mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0); | ||||
|     mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255); | ||||
| @@ -353,7 +362,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ | ||||
|  | ||||
|                     realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real); | ||||
|                     imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag); | ||||
|  | ||||
|                 } | ||||
|         } | ||||
|  | ||||
| @@ -362,9 +370,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ | ||||
|             realcacc[n_vec] = _mm_and_si128 (realcacc[n_vec], mask_real); | ||||
|             imagcacc[n_vec] = _mm_and_si128 (imagcacc[n_vec], mask_imag); | ||||
|  | ||||
| 		result = _mm_or_si128 (realcacc[n_vec], imagcacc[n_vec]); | ||||
|             results = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]); | ||||
|  | ||||
| 		_mm_storeu_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector | ||||
|             _mm_storeu_si128((__m128i*)dotProductVector, results); // Store the results back into the dot product vector | ||||
|             dotProduct = lv_cmake(0,0); | ||||
|             for (int i = 0; i < 4; ++i) | ||||
|                 { | ||||
| @@ -392,7 +400,177 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ | ||||
|                             sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); | ||||
|                 } | ||||
|         } | ||||
|  | ||||
| } | ||||
| #endif /* LV_HAVE_SSE3 */ | ||||
|  | ||||
|  | ||||
| #ifdef LV_HAVE_NEON | ||||
| #include <arm_neon.h> | ||||
|  | ||||
| /*! | ||||
|  \brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector | ||||
|  \param[out]    result        Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored | ||||
|  \param[in]     in_common     Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector) | ||||
|  \param[in]     phase_inc     Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)) | ||||
|  \param[in,out] phase         Initial / final phase | ||||
|  \param[in]     in_a          Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated | ||||
|  \param[in]     num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated | ||||
|  \param[in]     num_points    The Number of complex values to be multiplied together, accumulated and stored into result | ||||
|  */ | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points) | ||||
| { | ||||
|     const unsigned int neon_iters = num_points / 4; | ||||
|  | ||||
|     const lv_16sc_t** _in_a = in_a; | ||||
|     const lv_16sc_t* _in_common = in_common; | ||||
|     lv_16sc_t* _out = result; | ||||
|  | ||||
|     lv_16sc_t tmp16_, tmp; | ||||
|     lv_32fc_t tmp32_; | ||||
|  | ||||
|     if (neon_iters > 0) | ||||
|         { | ||||
|             lv_16sc_t dotProduct = lv_cmake(0,0); | ||||
|  | ||||
|             lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc; | ||||
|             __VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) }; | ||||
|             __VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) }; | ||||
|  | ||||
|             float32x4_t _phase4_real = vld1q_f32(__phase4_real); | ||||
|             float32x4_t _phase4_imag = vld1q_f32(__phase4_imag); | ||||
|  | ||||
|             lv_32fc_t phase2 = (lv_32fc_t)(*phase) * phase_inc; | ||||
|             lv_32fc_t phase3 = phase2 * phase_inc; | ||||
|             lv_32fc_t phase4 = phase3 * phase_inc; | ||||
|  | ||||
|             __VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) }; | ||||
|             __VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) }; | ||||
|  | ||||
|             float32x4_t _phase_real = vld1q_f32(__phase_real); | ||||
|             float32x4_t _phase_imag = vld1q_f32(__phase_imag); | ||||
|  | ||||
|             int16x4x2_t a_val, b_val, c_val; | ||||
|             __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; | ||||
|             float32x4_t half = vdupq_n_f32(0.5f); | ||||
|             int16x4x2_t tmp16; | ||||
|             int32x4x2_t tmp32i; | ||||
|             float32x4x2_t tmp32f, tmp32_real, tmp32_imag; | ||||
|             float32x4_t sign, PlusHalf, Round; | ||||
|  | ||||
|             int16x4x2_t* accumulator; | ||||
|             accumulator = (int16x4x2_t*)calloc(num_a_vectors, sizeof(int16x4x2_t)); | ||||
|  | ||||
|             for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                 { | ||||
|                     accumulator[n_vec].val[0] = vdup_n_s16(0); | ||||
|                     accumulator[n_vec].val[1] = vdup_n_s16(0); | ||||
|                 } | ||||
|  | ||||
|             for(unsigned int number = 0; number < neon_iters; number++) | ||||
|                 { | ||||
|                     /* load 4 complex numbers (int 16 bits each component) */ | ||||
|                     tmp16 = vld2_s16((int16_t*)_in_common); | ||||
|                     __builtin_prefetch(_in_common + 8); | ||||
|                     _in_common += 4; | ||||
|  | ||||
|                     /* promote them to int 32 bits */ | ||||
|                     tmp32i.val[0] = vmovl_s16(tmp16.val[0]); | ||||
|                     tmp32i.val[1] = vmovl_s16(tmp16.val[1]); | ||||
|  | ||||
|                     /* promote them to float 32 bits */ | ||||
|                     tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]); | ||||
|                     tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]); | ||||
|  | ||||
|                     /* complex multiplication of four complex samples (float 32 bits each component) */ | ||||
|                     tmp32_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real); | ||||
|                     tmp32_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag); | ||||
|                     tmp32_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag); | ||||
|                     tmp32_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real); | ||||
|  | ||||
|                     tmp32f.val[0] = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]); | ||||
|                     tmp32f.val[1] = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]); | ||||
|  | ||||
|                     /* downcast results to int32 */ | ||||
|                     /* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */ | ||||
|                     sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31))); | ||||
|                     PlusHalf = vaddq_f32(tmp32f.val[0], half); | ||||
|                     Round = vsubq_f32(PlusHalf, sign); | ||||
|                     tmp32i.val[0] = vcvtq_s32_f32(Round); | ||||
|  | ||||
|                     sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31))); | ||||
|                     PlusHalf = vaddq_f32(tmp32f.val[1], half); | ||||
|                     Round = vsubq_f32(PlusHalf, sign); | ||||
|                     tmp32i.val[1] = vcvtq_s32_f32(Round); | ||||
|  | ||||
|                     /* downcast results to int16 */ | ||||
|                     tmp16.val[0] = vqmovn_s32(tmp32i.val[0]); | ||||
|                     tmp16.val[1] = vqmovn_s32(tmp32i.val[1]); | ||||
|  | ||||
|                     /* compute next four phases */ | ||||
|                     tmp32_real.val[0] = vmulq_f32(_phase_real, _phase4_real); | ||||
|                     tmp32_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag); | ||||
|                     tmp32_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag); | ||||
|                     tmp32_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real); | ||||
|  | ||||
|                     _phase_real = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]); | ||||
|                     _phase_imag = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]); | ||||
|  | ||||
|                     for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                         { | ||||
|                             a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg | ||||
|                             __builtin_prefetch(&_in_a[n_vec][number*4] + 8); | ||||
|  | ||||
|                             // multiply the real*real and imag*imag to get real result | ||||
|                             // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r | ||||
|                             b_val.val[0] = vmul_s16(a_val.val[0], tmp16.val[0]); | ||||
|                             // a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i | ||||
|                             b_val.val[1] = vmul_s16(a_val.val[1], tmp16.val[1]); | ||||
|                             c_val.val[0] = vsub_s16(b_val.val[0], b_val.val[1]); | ||||
|  | ||||
|                             // Multiply cross terms to get the imaginary result | ||||
|                             // a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i | ||||
|                             b_val.val[0] = vmul_s16(a_val.val[0], tmp16.val[1]); | ||||
|                             // a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r | ||||
|                             b_val.val[1] = vmul_s16(a_val.val[1], tmp16.val[0]); | ||||
|                             c_val.val[1] = vadd_s16(b_val.val[0], b_val.val[1]); | ||||
|  | ||||
|                             accumulator[n_vec].val[0] = vadd_s16(accumulator[n_vec].val[0], c_val.val[0]); | ||||
|                             accumulator[n_vec].val[1] = vadd_s16(accumulator[n_vec].val[1], c_val.val[1]); | ||||
|                         } | ||||
|                 } | ||||
|  | ||||
|             for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                 { | ||||
|                     vst2_s16((int16_t*)dotProductVector, accumulator[n_vec]); // Store the results back into the dot product vector | ||||
|                     dotProduct = lv_cmake(0,0); | ||||
|                     for (int i = 0; i < 4; ++i) | ||||
|                         { | ||||
|                             dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), | ||||
|                                     sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); | ||||
|                         } | ||||
|                     _out[n_vec] = dotProduct; | ||||
|                 } | ||||
|             free(accumulator); | ||||
|             vst1q_f32((float32_t*)__phase_real, _phase_real); | ||||
|             vst1q_f32((float32_t*)__phase_imag, _phase_imag); | ||||
|  | ||||
|             (*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]); | ||||
|         } | ||||
|  | ||||
|     for (unsigned int n = neon_iters * 4; n < num_points; n++) | ||||
|         { | ||||
|             tmp16_ = in_common[n]; | ||||
|             tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase); | ||||
|             tmp16_ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_))); | ||||
|             (*phase) *= phase_inc; | ||||
|             for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) | ||||
|                 { | ||||
|                     tmp = tmp16_ * in_a[n_vec][n]; | ||||
|                     _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); | ||||
|                 } | ||||
|         } | ||||
| } | ||||
|  | ||||
| #endif /* LV_HAVE_NEON */ | ||||
|  | ||||
| #endif /*INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H*/ | ||||
|   | ||||
| @@ -57,7 +57,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic(lv_ | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|         { | ||||
|             in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); | ||||
|        memcpy(in_a[n], in, sizeof(lv_16sc_t) * num_points); | ||||
|             memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); | ||||
|         } | ||||
|     volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(result, local_code, phase_inc[0], phase,(const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
| @@ -88,6 +88,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_1 | ||||
|             in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); | ||||
|             memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); | ||||
|         } | ||||
|  | ||||
|     volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
| @@ -118,6 +119,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_1 | ||||
|             in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); | ||||
|             memcpy(in_a[n], in, sizeof(lv_16sc_t) * num_points); | ||||
|         } | ||||
|  | ||||
|     volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
| @@ -129,6 +131,37 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_1 | ||||
|  | ||||
| #endif // SSE3 | ||||
|  | ||||
| #ifdef LV_HAVE_NEON | ||||
|  | ||||
| static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points) | ||||
| { | ||||
|     // phases must be normalized. Phase rotator expects a complex exponential input! | ||||
|     float rem_carrier_phase_in_rad = 0.345; | ||||
|     float phase_step_rad = 0.123; | ||||
|     lv_32fc_t phase[1]; | ||||
|     phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad)); | ||||
|     lv_32fc_t phase_inc[1]; | ||||
|     phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad)); | ||||
|  | ||||
|     int num_a_vectors = 3; | ||||
|     lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|         { | ||||
|             in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment()); | ||||
|             memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points); | ||||
|         } | ||||
|  | ||||
|     volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points); | ||||
|  | ||||
|     for(unsigned int n = 0; n < num_a_vectors; n++) | ||||
|         { | ||||
|             volk_gnsssdr_free(in_a[n]); | ||||
|         } | ||||
|     volk_gnsssdr_free(in_a); | ||||
| } | ||||
|  | ||||
| #endif // NEON | ||||
|  | ||||
| #endif  // INCLUDED_volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_H | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -87,7 +87,7 @@ endif(NOT DEFINED _XOPEN_SOURCE) | ||||
| ######################################################################## | ||||
| # detect x86 flavor of CPU | ||||
| ######################################################################## | ||||
| if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(i.86|x86|x86_64|amd64)$") | ||||
| if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(i.86|x86|x86_64|amd64|AMD64)$") | ||||
|     message(STATUS "x86* CPU detected") | ||||
|     set(CPU_IS_x86 TRUE) | ||||
| endif() | ||||
| @@ -106,9 +106,13 @@ macro(check_arch arch_name) | ||||
|     set(flags ${ARGN}) | ||||
|     set(have_${arch_name} TRUE) | ||||
|     foreach(flag ${flags}) | ||||
|         if ( (${COMPILER_NAME} STREQUAL "MSVC") AND (${flag} STREQUAL "/arch:SSE2" OR ${flag} STREQUAL "/arch:SSE" )) | ||||
|             # SSE/SSE2 is supported in MSVC since VS 2005 but flag not available when compiling 64-bit so do not check | ||||
|         else() | ||||
|             include(CheckCXXCompilerFlag) | ||||
|             set(have_flag have${flag}) | ||||
|         execute_process( #make the have_flag have nice alphanum chars (just for looks/not necessary) | ||||
|             #make the have_flag have nice alphanum chars (just for looks/not necessary) | ||||
|             execute_process( | ||||
|                 COMMAND ${PYTHON_EXECUTABLE} -c "import re; print(re.sub('\\W', '_', '${have_flag}'))" | ||||
|                 OUTPUT_VARIABLE have_flag OUTPUT_STRIP_TRAILING_WHITESPACE | ||||
|             ) | ||||
| @@ -120,6 +124,7 @@ macro(check_arch arch_name) | ||||
|             if (NOT ${have_flag}) | ||||
|                 set(have_${arch_name} FALSE) | ||||
|             endif() | ||||
|         endif() | ||||
|     endforeach() | ||||
|     if (have_${arch_name}) | ||||
|         list(APPEND available_archs ${arch_name}) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Carles Fernandez
					Carles Fernandez