Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next

2025-10-30 23:03:05 +00:00 · 2016-02-15 14:48:18 +01:00
parent 43c36990ef b2a654c646
commit 3385d7a3e1
10 changed files with 580 additions and 359 deletions
--- a/README.md
+++ b/README.md
@@ -48,6 +48,7 @@ Downloading, building and installing [GNU Radio](http://gnuradio.org/redmine/pro
 ~~~~~~
 $ git clone https://github.com/gnuradio/pybombs.git
 $ cd pybombs
+$ python setup.py build
 $ sudo python setup.py install
 $ pybombs recipes add gr-recipes https://github.com/gnuradio/gr-recipes.git
 $ pybombs recipes add gr-etcetera https://github.com/gnuradio/gr-etcetera.git
@@ -408,7 +409,7 @@ Install Armadillo and dependencies:
 $ brew tap homebrew/science
 $ brew install cmake hdf5 arpack superlu
 $ brew install armadillo
-$ brew install glog gflags
+$ brew install glog gflags gnutls
 ~~~~~~

 #### Build GNSS-SDR
--- a/src/algorithms/PVT/gnuradio_blocks/gps_l1_ca_pvt_cc.cc
+++ b/src/algorithms/PVT/gnuradio_blocks/gps_l1_ca_pvt_cc.cc
@@ -163,19 +163,26 @@ int gps_l1_ca_pvt_cc::general_work (int noutput_items, gr_vector_int &ninput_ite
    Gnss_Synchro **in = (Gnss_Synchro **)  &input_items[0]; //Get the input pointer
    //Gnss_Synchro **out = (Gnss_Synchro **)  &output_items[0]; //Get the output pointer

+    // ############ 1. READ EPHEMERIS FROM GLOBAL MAP ####
+    d_ls_pvt->gps_ephemeris_map = global_gps_ephemeris_map.get_map_copy();
+
    for (unsigned int i = 0; i < d_nchannels; i++)
        {
+            std::map<int,Gps_Ephemeris>::iterator gps_ephemeris_iter;
+            gps_ephemeris_iter = d_ls_pvt->gps_ephemeris_map.begin();
            if (in[i][0].Flag_valid_pseudorange == true)
                {
                    gnss_pseudoranges_map.insert(std::pair<int,Gnss_Synchro>(in[i][0].PRN, in[i][0])); // store valid pseudoranges in a map
                    d_rx_time = in[i][0].d_TOW_at_current_symbol; // all the channels have the same RX timestamp (common RX time pseudoranges)
+                    d_rtcm_printer->lock_time(gps_ephemeris_iter->second, d_rx_time, in[i][0]); // keep track of locking time
+                }
+            else
+                {
+                    d_rtcm_printer->lock_time(gps_ephemeris_iter->second, 0.0, in[i][0]);
                }
        }

-    // ############ 1. READ EPHEMERIS/UTC_MODE/IONO FROM GLOBAL MAPS ####
-
-    d_ls_pvt->gps_ephemeris_map = global_gps_ephemeris_map.get_map_copy();
-
+    // ############ READ UTC_MODEL/IONO FROM GLOBAL MAPS ####
    if (global_gps_utc_model_map.size() > 0)
        {
            // UTC MODEL data is shared for all the GPS satellites. Read always at a locked channel
@@ -337,7 +344,7 @@ int gps_l1_ca_pvt_cc::general_work (int noutput_items, gr_vector_int &ninput_ite
                                    if (gps_ephemeris_iter != d_ls_pvt->gps_ephemeris_map.end())
                                        {
                                            d_rtcm_printer->Print_Rtcm_MT1019(gps_ephemeris_iter->second);
-                                            d_rtcm_printer->Print_Rtcm_MT1002(gps_ephemeris_iter->second, d_rx_time, gnss_pseudoranges_map);
+                                            d_rtcm_printer->Print_Rtcm_MSM(1, gps_ephemeris_iter->second, {}, {}, d_rx_time, gnss_pseudoranges_map, 1234, 0, 0, 0, 0, 0);
                                        }
                                    b_rtcm_writing_started = true;
                                }
--- a/src/algorithms/PVT/libs/rtcm_printer.cc
+++ b/src/algorithms/PVT/libs/rtcm_printer.cc
@@ -304,3 +304,9 @@ std::string Rtcm_Printer::print_MT1005_test()
    std::string test = rtcm->print_MT1005_test();
    return test;
 }
+
+
+unsigned int Rtcm_Printer::lock_time(const Gps_Ephemeris& gps_eph, double obs_time, const Gnss_Synchro & gnss_synchro)
+{
+    return rtcm->lock_time(gps_eph, obs_time, gnss_synchro);
+}
--- a/src/algorithms/PVT/libs/rtcm_printer.h
+++ b/src/algorithms/PVT/libs/rtcm_printer.h
@@ -73,6 +73,7 @@ public:
            bool more_messages);

    std::string print_MT1005_test(); //<!  For testing purposes
+    unsigned int lock_time(const Gps_Ephemeris& gps_eph, double obs_time, const Gnss_Synchro & gnss_synchro);

 private:
    std::string rtcm_filename; // String with the RTCM log filename
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h
@@ -38,9 +38,6 @@

 #include <volk_gnsssdr/volk_gnsssdr_complex.h>
 #include <math.h>
-#include <stdio.h>
-
-#define ROTATOR_RELOAD 512


 #ifdef LV_HAVE_GENERIC
@@ -56,20 +53,9 @@
 static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
 {
    unsigned int i = 0;
-    int j = 0;
    lv_16sc_t tmp16;
    lv_32fc_t tmp32;
-    for(i = 0; i < (unsigned int)(num_points / ROTATOR_RELOAD); ++i)
-        {
-            for(j = 0; j < ROTATOR_RELOAD; ++j)
-                {
-                    tmp16 = *inVector++;
-                    tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
-                    *outVector++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
-                    (*phase) *= phase_inc;
-                }
-        }
-    for(i = 0; i < num_points % ROTATOR_RELOAD; ++i)
+    for(i = 0; i < (unsigned int)(num_points); ++i)
        {
            tmp16 = *inVector++;
            tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
@@ -78,7 +78,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu
 \param[in]  num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
 \param[in]  num_points    The Number of complex values to be multiplied together, accumulated and stored into result
 */
-static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
+static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
 {
    lv_16sc_t dotProduct = lv_cmake(0,0);

@@ -86,7 +86,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,

    const lv_16sc_t** _in_a = in_a;
    const lv_16sc_t* _in_common = in_common;
-    lv_16sc_t* _out = out;
+    lv_16sc_t* _out = result;

    if (sse_iters > 0)
        {
@@ -100,7 +100,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,
            realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
            imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0

-            __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
+            __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, results;

            mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
            mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255);
@@ -141,9 +141,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,
                    realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real);
                    imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag);

-                    result = _mm_or_si128 (realcacc[n_vec], imagcacc[n_vec]);
+                    results = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);

-                    _mm_store_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector
+                    _mm_store_si128((__m128i*)dotProductVector, results); // Store the results back into the dot product vector
                    dotProduct = lv_cmake(0,0);
                    for (int i = 0; i < 4; ++i)
                        {
@@ -181,7 +181,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,
 \param[in]  num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
 \param[in]  num_points    The Number of complex values to be multiplied together, accumulated and stored into result
 */
-static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
+static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
 {
    lv_16sc_t dotProduct = lv_cmake(0,0);

@@ -189,7 +189,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,

    const lv_16sc_t** _in_a = in_a;
    const lv_16sc_t* _in_common = in_common;
-    lv_16sc_t* _out = out;
+    lv_16sc_t* _out = result;

    if (sse_iters > 0)
        {
@@ -203,7 +203,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
            realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
            imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0

-            __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
+            __m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, results;

            mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
            mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255);
@@ -244,9 +244,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
                    realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real);
                    imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag);

-                    result = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);
+                    results = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);

-                    _mm_storeu_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector
+                    _mm_storeu_si128((__m128i*)dotProductVector, results); // Store the results back into the dot product vector
                    dotProduct = lv_cmake(0,0);
                    for (int i = 0; i < 4; ++i)
                        {
@@ -284,7 +284,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
 \param[in]  num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
 \param[in]  num_points    The Number of complex values to be multiplied together, accumulated and stored into result
 */
-static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
+static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
 {
    lv_16sc_t dotProduct = lv_cmake(0,0);

@@ -292,7 +292,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, co

    const lv_16sc_t** _in_a = in_a;
    const lv_16sc_t* _in_common = in_common;
-    lv_16sc_t* _out = out;
+    lv_16sc_t* _out = result;

    if (neon_iters > 0)
        {
@@ -319,7 +319,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, co
                    for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
                        {
                            a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
-                            //__builtin_prefetch(_in_a[n_vec] + 8);
+                            //__builtin_prefetch(&_in_a[n_vec][number*4] + 8);

                            // multiply the real*real and imag*imag to get real result
                            // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h
@@ -49,8 +49,9 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic(lv_16sc_t*
    for(unsigned int n = 0; n < num_a_vectors; n++)
    {
       in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
-       memcpy(in_a[n], in, sizeof(lv_16sc_t) * num_points);
+       memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
    }
+
    volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points);

    for(unsigned int n = 0; n < num_a_vectors; n++)
@@ -72,6 +73,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* r
       in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
       memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
    }
+
    volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points);

    for(unsigned int n = 0; n < num_a_vectors; n++)
@@ -92,8 +94,9 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_sse2(lv_16sc_t* r
    for(unsigned int n = 0; n < num_a_vectors; n++)
    {
       in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment());
-       memcpy(in_a[n], in, sizeof(lv_16sc_t)*num_points);
+       memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points);
    }
+
    volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points);

    for(unsigned int n = 0; n < num_a_vectors; n++)
@@ -114,8 +117,9 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon(lv_16sc_t* res
    for(unsigned int n = 0; n < num_a_vectors; n++)
    {
       in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment());
-       memcpy(in_a[n], in, sizeof(lv_16sc_t)*num_points);
+       memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points);
    }
+
    volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points);

    for(unsigned int n = 0; n < num_a_vectors; n++)
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h
@@ -1,11 +1,14 @@
 /*!
 * \file volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
- * \brief Volk protokernel: multiplies N 16 bits vectors by a common vector phase rotated and accumulates the results in N 16 bits short complex outputs.
+ * \brief Volk protokernel: multiplies N 16 bits vectors by a common vector
+ * phase rotated and accumulates the results in N 16 bits short complex outputs.
 * \authors <ul>
 *          <li> Javier Arribas, 2015. jarribas(at)cttc.es
 *          </ul>
 *
- * Volk protokernel that multiplies N 16 bits vectors by a common vector, which is phase-rotated by phase offset and phase increment, and accumulates the results in N 16 bits short complex outputs.
+ * Volk protokernel that multiplies N 16 bits vectors by a common vector, which is
+ * phase-rotated by phase offset and phase increment, and accumulates the results
+ * in N 16 bits short complex outputs.
 * It is optimized to perform the N tap correlation process in GNSS receivers.
 *
 * -------------------------------------------------------------------------
@@ -39,12 +42,15 @@

 #include <volk_gnsssdr/volk_gnsssdr_complex.h>
 #include <volk_gnsssdr/saturation_arithmetic.h>
+#include <math.h>

 #ifdef LV_HAVE_GENERIC
 /*!
- \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
+ \brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
 \param[out]    result        Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
- \param[in]  in_common     Pointer to one of the vectors to be multiplied and accumulated (reference vector)
+ \param[in]     in_common     Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
+ \param[in]     phase_inc     Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
+ \param[in,out] phase         Initial / final phase
 \param[in]     in_a          Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
 \param[in]     num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
 \param[in]     num_points    The Number of complex values to be multiplied together, accumulated and stored into result
@@ -78,14 +84,16 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc
 #include <pmmintrin.h>

 /*!
- \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
+ \brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
 \param[out]    result        Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
- \param[in]  in_common     Pointer to one of the vectors to be multiplied and accumulated (reference vector)
+ \param[in]     in_common     Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
+ \param[in]     phase_inc     Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
+ \param[in,out] phase         Initial / final phase
 \param[in]     in_a          Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
 \param[in]     num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
 \param[in]     num_points    The Number of complex values to be multiplied together, accumulated and stored into result
 */
-static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
+static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
 {
    lv_16sc_t dotProduct = lv_cmake(0,0);

@@ -93,7 +101,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_

    const lv_16sc_t** _in_a = in_a;
    const lv_16sc_t* _in_common = in_common;
-	lv_16sc_t* _out = out;
+    lv_16sc_t* _out = result;

    __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];

@@ -105,7 +113,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
    realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
    imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0

-	__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
+    __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, results;

    mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
    mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255);
@@ -192,7 +200,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_

                    realcacc[n_vec] = _mm_adds_epi16 (realcacc[n_vec], real);
                    imagcacc[n_vec] = _mm_adds_epi16 (imagcacc[n_vec], imag);
-
                }
        }

@@ -201,9 +208,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
            realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real);
            imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag);

-		result = _mm_or_si128 (realcacc[n_vec], imagcacc[n_vec]);
+            results = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);

-		_mm_store_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector
+            _mm_store_si128((__m128i*)dotProductVector, results); // Store the results back into the dot product vector
            dotProduct = lv_cmake(0,0);
            for (int i = 0; i < 4; ++i)
                {
@@ -239,14 +246,16 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
 #include <pmmintrin.h>

 /*!
- \brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
+ \brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
 \param[out]    result        Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
- \param[in]  in_common     Pointer to one of the vectors to be multiplied and accumulated (reference vector)
+ \param[in]     in_common     Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
+ \param[in]     phase_inc     Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
+ \param[in,out] phase         Initial / final phase
 \param[in]     in_a          Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
 \param[in]     num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
 \param[in]     num_points    The Number of complex values to be multiplied together, accumulated and stored into result
 */
-static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
+static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
 {
    lv_16sc_t dotProduct = lv_cmake(0,0);

@@ -254,7 +263,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_

    const lv_16sc_t** _in_a = in_a;
    const lv_16sc_t* _in_common = in_common;
-	lv_16sc_t* _out = out;
+    lv_16sc_t* _out = result;

    __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];

@@ -266,7 +275,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
    realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
    imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0

-	__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
+    __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, results;

    mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
    mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255);
@@ -353,7 +362,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_

                    realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
                    imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
-
                }
        }

@@ -362,9 +370,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
            realcacc[n_vec] = _mm_and_si128 (realcacc[n_vec], mask_real);
            imagcacc[n_vec] = _mm_and_si128 (imagcacc[n_vec], mask_imag);

-		result = _mm_or_si128 (realcacc[n_vec], imagcacc[n_vec]);
+            results = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);

-		_mm_storeu_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector
+            _mm_storeu_si128((__m128i*)dotProductVector, results); // Store the results back into the dot product vector
            dotProduct = lv_cmake(0,0);
            for (int i = 0; i < 4; ++i)
                {
@@ -392,7 +400,177 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
                            sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
                }
        }
-
 }
 #endif /* LV_HAVE_SSE3 */
+
+
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+
+/*!
+ \brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
+ \param[out]    result        Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
+ \param[in]     in_common     Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
+ \param[in]     phase_inc     Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
+ \param[in,out] phase         Initial / final phase
+ \param[in]     in_a          Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
+ \param[in]     num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
+ \param[in]     num_points    The Number of complex values to be multiplied together, accumulated and stored into result
+ */
+static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a,  int num_a_vectors, unsigned int num_points)
+{
+    const unsigned int neon_iters = num_points / 4;
+
+    const lv_16sc_t** _in_a = in_a;
+    const lv_16sc_t* _in_common = in_common;
+    lv_16sc_t* _out = result;
+
+    lv_16sc_t tmp16_, tmp;
+    lv_32fc_t tmp32_;
+
+    if (neon_iters > 0)
+        {
+            lv_16sc_t dotProduct = lv_cmake(0,0);
+
+            lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
+            __VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
+            __VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
+
+            float32x4_t _phase4_real = vld1q_f32(__phase4_real);
+            float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
+
+            lv_32fc_t phase2 = (lv_32fc_t)(*phase) * phase_inc;
+            lv_32fc_t phase3 = phase2 * phase_inc;
+            lv_32fc_t phase4 = phase3 * phase_inc;
+
+            __VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
+            __VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
+
+            float32x4_t _phase_real = vld1q_f32(__phase_real);
+            float32x4_t _phase_imag = vld1q_f32(__phase_imag);
+
+            int16x4x2_t a_val, b_val, c_val;
+            __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
+            float32x4_t half = vdupq_n_f32(0.5f);
+            int16x4x2_t tmp16;
+            int32x4x2_t tmp32i;
+            float32x4x2_t tmp32f, tmp32_real, tmp32_imag;
+            float32x4_t sign, PlusHalf, Round;
+
+            int16x4x2_t* accumulator;
+            accumulator = (int16x4x2_t*)calloc(num_a_vectors, sizeof(int16x4x2_t));
+
+            for(int n_vec = 0; n_vec < num_a_vectors; n_vec++)
+                {
+                    accumulator[n_vec].val[0] = vdup_n_s16(0);
+                    accumulator[n_vec].val[1] = vdup_n_s16(0);
+                }
+
+            for(unsigned int number = 0; number < neon_iters; number++)
+                {
+                    /* load 4 complex numbers (int 16 bits each component) */
+                    tmp16 = vld2_s16((int16_t*)_in_common);
+                    __builtin_prefetch(_in_common + 8);
+                    _in_common += 4;
+
+                    /* promote them to int 32 bits */
+                    tmp32i.val[0] = vmovl_s16(tmp16.val[0]);
+                    tmp32i.val[1] = vmovl_s16(tmp16.val[1]);
+
+                    /* promote them to float 32 bits */
+                    tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]);
+                    tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]);
+
+                    /* complex multiplication of four complex samples (float 32 bits each component) */
+                    tmp32_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real);
+                    tmp32_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag);
+                    tmp32_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag);
+                    tmp32_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real);
+
+                    tmp32f.val[0] = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]);
+                    tmp32f.val[1] = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]);
+
+                    /* downcast results to int32 */
+                    /* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */
+                    sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31)));
+                    PlusHalf = vaddq_f32(tmp32f.val[0], half);
+                    Round = vsubq_f32(PlusHalf, sign);
+                    tmp32i.val[0] = vcvtq_s32_f32(Round);
+
+                    sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31)));
+                    PlusHalf = vaddq_f32(tmp32f.val[1], half);
+                    Round = vsubq_f32(PlusHalf, sign);
+                    tmp32i.val[1] = vcvtq_s32_f32(Round);
+
+                    /* downcast results to int16 */
+                    tmp16.val[0] = vqmovn_s32(tmp32i.val[0]);
+                    tmp16.val[1] = vqmovn_s32(tmp32i.val[1]);
+
+                    /* compute next four phases */
+                    tmp32_real.val[0] = vmulq_f32(_phase_real, _phase4_real);
+                    tmp32_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag);
+                    tmp32_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag);
+                    tmp32_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real);
+
+                    _phase_real = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]);
+                    _phase_imag = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]);
+
+                    for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
+                        {
+                            a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
+                            __builtin_prefetch(&_in_a[n_vec][number*4] + 8);
+
+                            // multiply the real*real and imag*imag to get real result
+                            // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r
+                            b_val.val[0] = vmul_s16(a_val.val[0], tmp16.val[0]);
+                            // a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i
+                            b_val.val[1] = vmul_s16(a_val.val[1], tmp16.val[1]);
+                            c_val.val[0] = vsub_s16(b_val.val[0], b_val.val[1]);
+
+                            // Multiply cross terms to get the imaginary result
+                            // a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i
+                            b_val.val[0] = vmul_s16(a_val.val[0], tmp16.val[1]);
+                            // a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r
+                            b_val.val[1] = vmul_s16(a_val.val[1], tmp16.val[0]);
+                            c_val.val[1] = vadd_s16(b_val.val[0], b_val.val[1]);
+
+                            accumulator[n_vec].val[0] = vadd_s16(accumulator[n_vec].val[0], c_val.val[0]);
+                            accumulator[n_vec].val[1] = vadd_s16(accumulator[n_vec].val[1], c_val.val[1]);
+                        }
+                }
+
+            for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
+                {
+                    vst2_s16((int16_t*)dotProductVector, accumulator[n_vec]); // Store the results back into the dot product vector
+                    dotProduct = lv_cmake(0,0);
+                    for (int i = 0; i < 4; ++i)
+                        {
+                            dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])),
+                                    sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
+                        }
+                    _out[n_vec] = dotProduct;
+                }
+            free(accumulator);
+            vst1q_f32((float32_t*)__phase_real, _phase_real);
+            vst1q_f32((float32_t*)__phase_imag, _phase_imag);
+
+            (*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]);
+        }
+
+    for (unsigned int n = neon_iters * 4; n < num_points; n++)
+        {
+            tmp16_ = in_common[n];
+            tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase);
+            tmp16_ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_)));
+            (*phase) *= phase_inc;
+            for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
+                {
+                    tmp = tmp16_ * in_a[n_vec][n];
+                    _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
+                }
+        }
+}
+
+#endif /* LV_HAVE_NEON */
+
 #endif /*INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H*/
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic.h
@@ -57,7 +57,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic(lv_
    for(unsigned int n = 0; n < num_a_vectors; n++)
        {
            in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
-       memcpy(in_a[n], in, sizeof(lv_16sc_t) * num_points);
+            memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
        }
    volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(result, local_code, phase_inc[0], phase,(const lv_16sc_t**) in_a, num_a_vectors, num_points);

@@ -88,6 +88,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_1
            in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
            memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
        }
+
    volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points);

    for(unsigned int n = 0; n < num_a_vectors; n++)
@@ -118,6 +119,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_1
            in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
            memcpy(in_a[n], in, sizeof(lv_16sc_t) * num_points);
        }
+
    volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points);

    for(unsigned int n = 0; n < num_a_vectors; n++)
@@ -129,6 +131,37 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_1

 #endif // SSE3

+#ifdef LV_HAVE_NEON
+
+static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
+{
+    // phases must be normalized. Phase rotator expects a complex exponential input!
+    float rem_carrier_phase_in_rad = 0.345;
+    float phase_step_rad = 0.123;
+    lv_32fc_t phase[1];
+    phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad));
+    lv_32fc_t phase_inc[1];
+    phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad));
+
+    int num_a_vectors = 3;
+    lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    for(unsigned int n = 0; n < num_a_vectors; n++)
+        {
+            in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
+            memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
+        }
+
+    volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points);
+
+    for(unsigned int n = 0; n < num_a_vectors; n++)
+        {
+            volk_gnsssdr_free(in_a[n]);
+        }
+    volk_gnsssdr_free(in_a);
+}
+
+#endif // NEON
+
 #endif  // INCLUDED_volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_H


--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/CMakeLists.txt
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/CMakeLists.txt
@@ -87,7 +87,7 @@ endif(NOT DEFINED _XOPEN_SOURCE)
 ########################################################################
 # detect x86 flavor of CPU
 ########################################################################
-if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(i.86|x86|x86_64|amd64)$")
+if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(i.86|x86|x86_64|amd64|AMD64)$")
    message(STATUS "x86* CPU detected")
    set(CPU_IS_x86 TRUE)
 endif()
@@ -106,9 +106,13 @@ macro(check_arch arch_name)
    set(flags ${ARGN})
    set(have_${arch_name} TRUE)
    foreach(flag ${flags})
+        if ( (${COMPILER_NAME} STREQUAL "MSVC") AND (${flag} STREQUAL "/arch:SSE2" OR ${flag} STREQUAL "/arch:SSE" ))
+            # SSE/SSE2 is supported in MSVC since VS 2005 but flag not available when compiling 64-bit so do not check
+        else()
            include(CheckCXXCompilerFlag)
            set(have_flag have${flag})
-        execute_process( #make the have_flag have nice alphanum chars (just for looks/not necessary)
+            #make the have_flag have nice alphanum chars (just for looks/not necessary)
+            execute_process(
                COMMAND ${PYTHON_EXECUTABLE} -c "import re; print(re.sub('\\W', '_', '${have_flag}'))"
                OUTPUT_VARIABLE have_flag OUTPUT_STRIP_TRAILING_WHITESPACE
            )
@@ -120,6 +124,7 @@ macro(check_arch arch_name)
            if (NOT ${have_flag})
                set(have_${arch_name} FALSE)
            endif()
+        endif()
    endforeach()
    if (have_${arch_name})
        list(APPEND available_archs ${arch_name})