Updated volk_gnsssdr_module for real codes

Added 16i and 32f resamplers and 32fc_32f and 16ic_16i rotator dot product to enable use of real (rather than complex) local code replicas
2025-07-22 20:02:55 +00:00 · 2017-09-11 15:15:27 +01:00 · 2017-09-11 15:15:27 +01:00 · 676c1506da
commit 676c1506da
parent 3f557eeb41
13 changed files with 4257 additions and 29 deletions
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_avx_intrinsics.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_avx_intrinsics.h
@ -61,6 +61,14 @@ _mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2){
  return _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
 }

+static inline __m256 _mm256_complexnormalise_ps( __m256 z ){
+    __m256 tmp1 = _mm256_mul_ps(z, z);
+    __m256 tmp2 = _mm256_hadd_ps(tmp1, tmp1);
+    tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
+    tmp2 = _mm256_sqrt_ps(tmp1);
+    return _mm256_div_ps(z, tmp2);
+}
+
 static inline __m256
 _mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2){
  return _mm256_sqrt_ps(_mm256_magnitudesquared_ps(cplxValue1, cplxValue2));
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_resamplerxnpuppet_16i.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_resamplerxnpuppet_16i.h
@ -0,0 +1,282 @@
+/*!
+ * \file volk_gnsssdr_16i_resamplerxnpuppet_16i.h
+ * \brief VOLK_GNSSSDR puppet for the multiple 16-bit vector resampler kernel.
+ * \authors <ul>
+ *          <li> Cillian O'Driscoll 2017 cillian.odriscoll at gmail dot com
+ *          </ul>
+ *
+ * VOLK_GNSSSDR puppet for integrating the multiple resampler into the test system
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2017  (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ *          Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_16i_resamplerxnpuppet_16i_H
+#define INCLUDED_volk_gnsssdr_16i_resamplerxnpuppet_16i_H
+
+#include "volk_gnsssdr/volk_gnsssdr_16i_xn_resampler_16i_xn.h"
+#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
+#include <volk_gnsssdr/volk_gnsssdr_complex.h>
+#include <volk_gnsssdr/volk_gnsssdr.h>
+#include <string.h>
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_generic(int16_t* result, const int16_t* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    unsigned int n;
+    float rem_code_phase_chips = -0.234;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1  };
+    int16_t** result_aux =  (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_16i_xn_resampler_16i_xn_generic(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif /* LV_HAVE_GENERIC */
+ 
+#ifdef LV_HAVE_SSE3
+static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_a_sse3(int16_t* result, const int16_t* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+    int16_t** result_aux =  (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+#ifdef LV_HAVE_SSE3
+static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_u_sse3(int16_t* result, const int16_t* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+    int16_t** result_aux =  (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+
+#ifdef LV_HAVE_SSE4_1
+static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_u_sse4_1(int16_t* result, const int16_t* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+    int16_t** result_aux =  (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse4_1(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+
+#ifdef LV_HAVE_SSE4_1
+static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_a_sse4_1(int16_t* result, const int16_t* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+    int16_t** result_aux =  (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse4_1(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+
+#ifdef LV_HAVE_AVX
+static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_u_avx(int16_t* result, const int16_t* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+    int16_t** result_aux =  (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_16i_xn_resampler_16i_xn_u_avx(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+
+#ifdef LV_HAVE_AVX
+static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_a_avx(int16_t* result, const int16_t* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+    int16_t** result_aux =  (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_16i_xn_resampler_16i_xn_a_avx(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+
+#ifdef LV_HAVE_NEON
+static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_neon(int16_t* result, const int16_t* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+    int16_t** result_aux =  (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_16i_xn_resampler_16i_xn_neon(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+#endif // INCLUDED_volk_gnsssdr_16i_resamplerpuppet_16i_H
+
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_xn_resampler_16i_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_xn_resampler_16i_xn.h
@ -0,0 +1,608 @@
+/*!
+ * \file volk_gnsssdr_16i_xn_resampler_16i_xn.h
+ * \brief VOLK_GNSSSDR kernel: Resamples N 16 bits integer short vectors using zero hold resample algorithm.
+ * \authors <ul>
+ *          <li> Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com
+ *          </ul>
+ *
+ * VOLK_GNSSSDR kernel that resamples N 16 bits integer short complex vectors using zero hold resample algorithm.
+ * It resamples a single GNSS local code signal replica into N vectors fractional-resampled and fractional-delayed
+ * (i.e. it creates the Early, Prompt, and Late code replicas)
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2017  (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ *          Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+/*!
+ * \page volk_gnsssdr_16i_xn_resampler_16i_xn
+ *
+ * \b Overview
+ *
+ * Resamples a complex vector (16-bit integer each component), providing \p num_out_vectors outputs.
+ *
+ * <b>Dispatcher Prototype</b>
+ * \code
+ * void volk_gnsssdr_16i_xn_resampler_16i_xn(int16_t** result, const int16_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+ * \endcode
+ *
+ * \b Inputs
+ * \li local_code:            Vector to be resampled.
+ * \li rem_code_phase_chips:  Remnant code phase [chips].
+ * \li code_phase_step_chips: Phase increment per sample [chips/sample].
+ * \li shifts_chips:          Vector of floats that defines the spacing (in chips) between the replicas of \p local_code
+ * \li code_length_chips:     Code length in chips.
+ * \li num_out_vectors:       Number of output vectors.
+ * \li num_points:            The number of data values to be in the resampled vector.
+ *
+ * \b Outputs
+ * \li result:                Pointer to a vector of pointers where the results will be stored.
+ *
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_16i_xn_resampler_16i_xn_H
+#define INCLUDED_volk_gnsssdr_16i_xn_resampler_16i_xn_H
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <volk_gnsssdr/volk_gnsssdr_common.h>
+#include <volk_gnsssdr/volk_gnsssdr_complex.h>
+
+
+#ifdef LV_HAVE_GENERIC
+
+static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_generic(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    int local_code_chip_index;
+    int current_correlator_tap;
+    int n;
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            for (n = 0; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index < 0) local_code_chip_index += (int)code_length_chips * (abs(local_code_chip_index) / code_length_chips + 1);
+                    local_code_chip_index = local_code_chip_index % code_length_chips;
+                    result[current_correlator_tap][n] = local_code[local_code_chip_index];
+                }
+        }
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+#ifdef LV_HAVE_SSE4_1
+#include <smmintrin.h>
+static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse4_1(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    int16_t** _result = result;
+    const unsigned int quarterPoints = num_points / 4;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m128 fours = _mm_set1_ps(4.0f);
+    const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
+    const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
+    int local_code_chip_index_;
+
+    const __m128i zeros = _mm_setzero_si128();
+    const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
+    const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
+    __m128i local_code_chip_index_reg, aux_i, negatives, i;
+    __m128 aux, aux2, shifts_chips_reg, c, cTrunc, base;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
+            for(n = 0; n < quarterPoints; n++)
+                {
+                    aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm_add_ps(aux, aux2);
+                    // floor
+                    aux = _mm_floor_ps(aux);
+
+                    // fmod
+                    c = _mm_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm_cvttps_epi32(c);
+                    cTrunc = _mm_cvtepi32_ps(i);
+                    base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
+
+                    negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
+                    aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
+                    local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
+                    _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 4; ++k)
+                        {
+                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm_add_ps(indexn, fours);
+                }
+            for(n = quarterPoints * 4; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif 
+
+
+#ifdef LV_HAVE_SSE4_1
+#include <smmintrin.h>
+static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse4_1(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    int16_t** _result = result;
+    const unsigned int quarterPoints = num_points / 4;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m128 fours = _mm_set1_ps(4.0f);
+    const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
+    const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
+    int local_code_chip_index_;
+
+    const __m128i zeros = _mm_setzero_si128();
+    const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
+    const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
+    __m128i local_code_chip_index_reg, aux_i, negatives, i;
+    __m128 aux, aux2, shifts_chips_reg, c, cTrunc, base;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
+            for(n = 0; n < quarterPoints; n++)
+                {
+                    aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm_add_ps(aux, aux2);
+                    // floor
+                    aux = _mm_floor_ps(aux);
+
+                    // fmod
+                    c = _mm_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm_cvttps_epi32(c);
+                    cTrunc = _mm_cvtepi32_ps(i);
+                    base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
+
+                    negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
+                    aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
+                    local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
+                    _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 4; ++k)
+                        {
+                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm_add_ps(indexn, fours);
+                }
+            for(n = quarterPoints * 4; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif
+
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse3(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    int16_t** _result = result;
+    const unsigned int quarterPoints = num_points / 4;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m128 ones = _mm_set1_ps(1.0f);
+    const __m128 fours = _mm_set1_ps(4.0f);
+    const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
+    const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
+    int local_code_chip_index_;
+
+    const __m128i zeros = _mm_setzero_si128();
+    const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
+    const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
+    __m128i local_code_chip_index_reg, aux_i, negatives, i;
+    __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
+            for(n = 0; n < quarterPoints; n++)
+                {
+                    aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm_add_ps(aux, aux2);
+                    // floor
+                    i = _mm_cvttps_epi32(aux);
+                    fi = _mm_cvtepi32_ps(i);
+                    igx = _mm_cmpgt_ps(fi, aux);
+                    j = _mm_and_ps(igx, ones);
+                    aux = _mm_sub_ps(fi, j);
+                    // fmod
+                    c = _mm_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm_cvttps_epi32(c);
+                    cTrunc = _mm_cvtepi32_ps(i);
+                    base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
+
+                    negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
+                    aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
+                    local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
+                    _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 4; ++k)
+                        {
+                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm_add_ps(indexn, fours);
+                }
+            for(n = quarterPoints * 4; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif
+
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse3(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    int16_t** _result = result;
+    const unsigned int quarterPoints = num_points / 4;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m128 ones = _mm_set1_ps(1.0f);
+    const __m128 fours = _mm_set1_ps(4.0f);
+    const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
+    const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
+    int local_code_chip_index_;
+
+    const __m128i zeros = _mm_setzero_si128();
+    const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
+    const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
+    __m128i local_code_chip_index_reg, aux_i, negatives, i;
+    __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
+            for(n = 0; n < quarterPoints; n++)
+                {
+                    aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm_add_ps(aux, aux2);
+                    // floor
+                    i = _mm_cvttps_epi32(aux);
+                    fi = _mm_cvtepi32_ps(i);
+                    igx = _mm_cmpgt_ps(fi, aux);
+                    j = _mm_and_ps(igx, ones);
+                    aux = _mm_sub_ps(fi, j);
+                    // fmod
+                    c = _mm_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm_cvttps_epi32(c);
+                    cTrunc = _mm_cvtepi32_ps(i);
+                    base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
+
+                    negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
+                    aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
+                    local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
+                    _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 4; ++k)
+                        {
+                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm_add_ps(indexn, fours);
+                }
+            for(n = quarterPoints * 4; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif
+
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_avx(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    int16_t** _result = result;
+    const unsigned int avx_iters = num_points / 8;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m256 eights = _mm256_set1_ps(8.0f);
+    const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
+    const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
+    int local_code_chip_index_;
+
+    const __m256 zeros = _mm256_setzero_ps();
+    const __m256 code_length_chips_reg_f = _mm256_set1_ps((float)code_length_chips);
+    const __m256 n0 = _mm256_set_ps(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
+
+    __m256i local_code_chip_index_reg, i;
+    __m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            indexn = n0;
+            for(n = 0; n < avx_iters; n++)
+                {
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][8 * n + 7], 1, 0);
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&local_code_chip_index[8], 1, 3);
+                    aux = _mm256_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm256_add_ps(aux, aux2);
+                    // floor
+                    aux = _mm256_floor_ps(aux);
+
+                    // fmod
+                    c = _mm256_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm256_cvttps_epi32(c);
+                    cTrunc = _mm256_cvtepi32_ps(i);
+                    base = _mm256_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm256_cvttps_epi32(_mm256_sub_ps(aux, base));
+
+                    // no negatives
+                    c = _mm256_cvtepi32_ps(local_code_chip_index_reg);
+                    negatives = _mm256_cmp_ps(c, zeros, 0x01 );
+                    aux3 = _mm256_and_ps(code_length_chips_reg_f, negatives);
+                    aux = _mm256_add_ps(c, aux3);
+                    local_code_chip_index_reg = _mm256_cvttps_epi32(aux);
+
+                    _mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 8; ++k)
+                        {
+                            _result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm256_add_ps(indexn, eights);
+                }
+        }
+    _mm256_zeroupper();
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            for(n = avx_iters * 8; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif
+
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_avx(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    int16_t** _result = result;
+    const unsigned int avx_iters = num_points / 8;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m256 eights = _mm256_set1_ps(8.0f);
+    const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
+    const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
+    int local_code_chip_index_;
+
+    const __m256 zeros = _mm256_setzero_ps();
+    const __m256 code_length_chips_reg_f = _mm256_set1_ps((float)code_length_chips);
+    const __m256 n0 = _mm256_set_ps(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
+
+    __m256i local_code_chip_index_reg, i;
+    __m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            indexn = n0;
+            for(n = 0; n < avx_iters; n++)
+                {
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][8 * n + 7], 1, 0);
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&local_code_chip_index[8], 1, 3);
+                    aux = _mm256_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm256_add_ps(aux, aux2);
+                    // floor
+                    aux = _mm256_floor_ps(aux);
+
+                    // fmod
+                    c = _mm256_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm256_cvttps_epi32(c);
+                    cTrunc = _mm256_cvtepi32_ps(i);
+                    base = _mm256_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm256_cvttps_epi32(_mm256_sub_ps(aux, base));
+
+                    // no negatives
+                    c = _mm256_cvtepi32_ps(local_code_chip_index_reg);
+                    negatives = _mm256_cmp_ps(c, zeros, 0x01 );
+                    aux3 = _mm256_and_ps(code_length_chips_reg_f, negatives);
+                    aux = _mm256_add_ps(c, aux3);
+                    local_code_chip_index_reg = _mm256_cvttps_epi32(aux);
+
+                    _mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 8; ++k)
+                        {
+                            _result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm256_add_ps(indexn, eights);
+                }
+        }
+    _mm256_zeroupper();
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            for(n = avx_iters * 8; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif
+
+
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_neon(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    int16_t** _result = result;
+    const unsigned int neon_iters = num_points / 4;
+    const int32x4_t ones = vdupq_n_s32(1);
+    const float32x4_t fours = vdupq_n_f32(4.0f);
+    const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
+    const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
+    int32_t local_code_chip_index_;
+
+    const int32x4_t zeros = vdupq_n_s32(0);
+    const float32x4_t code_length_chips_reg_f = vdupq_n_f32((float)code_length_chips);
+    const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
+    int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
+    float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
+    __VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
+    uint32x4_t igx;
+    reciprocal = vrecpeq_f32(code_length_chips_reg_f);
+    reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
+    reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal); // this refinement is required!
+    float32x4_t n0 = vld1q_f32((float*)vec);
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = vdupq_n_f32((float)shifts_chips[current_correlator_tap]);
+            aux2 = vsubq_f32(shifts_chips_reg, rem_code_phase_chips_reg);
+            indexn = n0;
+            for(n = 0; n < neon_iters; n++)
+                {
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][4 * n + 3], 1, 0);
+                    __VOLK_GNSSSDR_PREFETCH(&local_code_chip_index[4]);
+                    aux = vmulq_f32(code_phase_step_chips_reg, indexn);
+                    aux = vaddq_f32(aux, aux2);
+
+                    //floor
+                    i = vcvtq_s32_f32(aux);
+                    fi = vcvtq_f32_s32(i);
+                    igx = vcgtq_f32(fi, aux);
+                    j = vcvtq_f32_s32(vandq_s32(vreinterpretq_s32_u32(igx), ones));
+                    aux = vsubq_f32(fi, j);
+
+                    // fmod
+                    c = vmulq_f32(aux, reciprocal);
+                    i =  vcvtq_s32_f32(c);
+                    cTrunc = vcvtq_f32_s32(i);
+                    base = vmulq_f32(cTrunc, code_length_chips_reg_f);
+                    aux = vsubq_f32(aux, base);
+                    local_code_chip_index_reg = vcvtq_s32_f32(aux);
+
+                    negatives = vreinterpretq_s32_u32(vcltq_s32(local_code_chip_index_reg, zeros));
+                    aux_i = vandq_s32(code_length_chips_reg_i, negatives);
+                    local_code_chip_index_reg = vaddq_s32(local_code_chip_index_reg, aux_i);
+
+                    vst1q_s32((int32_t*)local_code_chip_index, local_code_chip_index_reg);
+
+                    for(k = 0; k < 4; ++k)
+                        {
+                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = vaddq_f32(indexn, fours);
+                }
+            for(n = neon_iters * 4; n < num_points; n++)
+                {
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][n], 1, 0);
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+
+#endif
+
+
+#endif /*INCLUDED_volk_gnsssdr_16i_xn_resampler_16i_xn_H*/
+
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn.h
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic.h
@ -0,0 +1,384 @@
+/*!
+ * \file volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic.h
+ * \brief Volk puppet for the multiple 16-bit complex dot product kernel.
+ * \authors <ul>
+ *          <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
+ *          </ul>
+ *
+ * Volk puppet for integrating the resampler into volk's test system
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2015  (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ *          Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_H
+#define INCLUDED_volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_H
+
+#include "volk_gnsssdr/volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn.h"
+#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
+#include <volk_gnsssdr/volk_gnsssdr.h>
+#include <string.h>
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code,  const lv_16sc_t* in, unsigned int num_points)
+{
+    // phases must be normalized. Phase rotator expects a complex exponential input!
+    float rem_carrier_phase_in_rad = 0.345;
+    float phase_step_rad = 0.1;
+    lv_32fc_t phase[1];
+    phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    lv_32fc_t phase_inc[1];
+    phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    unsigned int n;
+    int num_a_vectors = 3;
+    int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        }
+    volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_generic(result, local_code, phase_inc[0], phase,(const int16_t**) in_a, num_a_vectors, num_points);
+
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            volk_gnsssdr_free(in_a[n]);
+        }
+    volk_gnsssdr_free(in_a);
+}
+
+#endif  // Generic
+
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_generic_reload(lv_16sc_t* result, const lv_16sc_t* local_code,  const lv_16sc_t* in, unsigned int num_points)
+{
+    // phases must be normalized. Phase rotator expects a complex exponential input!
+    float rem_carrier_phase_in_rad = 0.345;
+    float phase_step_rad = 0.1;
+    lv_32fc_t phase[1];
+    phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    lv_32fc_t phase_inc[1];
+    phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    unsigned int n;
+    int num_a_vectors = 3;
+    int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        }
+    volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_generic_reload(result, local_code, phase_inc[0], phase,(const int16_t**) in_a, num_a_vectors, num_points);
+
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            volk_gnsssdr_free(in_a[n]);
+        }
+    volk_gnsssdr_free(in_a);
+}
+
+#endif  // Generic
+
+
+#ifdef LV_HAVE_SSE3
+static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_a_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
+{
+    // phases must be normalized. Phase rotator expects a complex exponential input!
+    float rem_carrier_phase_in_rad = 0.345;
+    float phase_step_rad = 0.1;
+    lv_32fc_t phase[1];
+    phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    lv_32fc_t phase_inc[1];
+    phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    unsigned int n;
+    int num_a_vectors = 3;
+    int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        }
+
+    volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_sse3(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
+
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            volk_gnsssdr_free(in_a[n]);
+        }
+    volk_gnsssdr_free(in_a);
+}
+
+#endif // SSE3
+
+
+//#ifdef LV_HAVE_SSE3
+//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_a_sse3_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
+//{
+    //// phases must be normalized. Phase rotator expects a complex exponential input!
+    //float rem_carrier_phase_in_rad = 0.345;
+    //float phase_step_rad = 0.1;
+    //lv_32fc_t phase[1];
+    //phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    //lv_32fc_t phase_inc[1];
+    //phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    //unsigned int n;
+    //int num_a_vectors = 3;
+    //int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    //for(n = 0; n < num_a_vectors; n++)
+        //{
+            //in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            //memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        //}
+
+    //volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_sse3_reload(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
+
+    //for(n = 0; n < num_a_vectors; n++)
+        //{
+            //volk_gnsssdr_free(in_a[n]);
+        //}
+    //volk_gnsssdr_free(in_a);
+//}
+
+//#endif // SSE3
+
+
+#ifdef LV_HAVE_SSE3
+static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_u_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
+{
+    // phases must be normalized. Phase rotator expects a complex exponential input!
+    float rem_carrier_phase_in_rad = 0.345;
+    float phase_step_rad = 0.1;
+    lv_32fc_t phase[1];
+    phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    lv_32fc_t phase_inc[1];
+    phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    unsigned int n;
+    int num_a_vectors = 3;
+    int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        }
+
+    volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_sse3(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
+
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            volk_gnsssdr_free(in_a[n]);
+        }
+    volk_gnsssdr_free(in_a);
+}
+
+#endif // SSE3
+
+
+#ifdef LV_HAVE_AVX2
+static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_a_avx2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
+{
+    // phases must be normalized. Phase rotator expects a complex exponential input!
+    float rem_carrier_phase_in_rad = 0.345;
+    float phase_step_rad = 0.1;
+    lv_32fc_t phase[1];
+    phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    lv_32fc_t phase_inc[1];
+    phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    unsigned int n;
+    int num_a_vectors = 3;
+    int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        }
+
+    volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
+
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            volk_gnsssdr_free(in_a[n]);
+        }
+    volk_gnsssdr_free(in_a);
+}
+
+#endif // AVX2
+
+
+//#ifdef LV_HAVE_AVX2
+//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_a_avx2_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
+//{
+    //// phases must be normalized. Phase rotator expects a complex exponential input!
+    //float rem_carrier_phase_in_rad = 0.345;
+    //float phase_step_rad = 0.1;
+    //lv_32fc_t phase[1];
+    //phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    //lv_32fc_t phase_inc[1];
+    //phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    //unsigned int n;
+    //int num_a_vectors = 3;
+    //int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    //for(n = 0; n < num_a_vectors; n++)
+        //{
+            //in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            //memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        //}
+
+    //volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2_reload(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
+
+    //for(n = 0; n < num_a_vectors; n++)
+        //{
+            //volk_gnsssdr_free(in_a[n]);
+        //}
+    //volk_gnsssdr_free(in_a);
+//}
+
+//#endif // AVX2
+
+
+#ifdef LV_HAVE_AVX2
+static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_u_avx2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
+{
+    // phases must be normalized. Phase rotator expects a complex exponential input!
+    float rem_carrier_phase_in_rad = 0.345;
+    float phase_step_rad = 0.1;
+    lv_32fc_t phase[1];
+    phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    lv_32fc_t phase_inc[1];
+    phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    unsigned int n;
+    int num_a_vectors = 3;
+    int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        }
+
+    volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
+
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            volk_gnsssdr_free(in_a[n]);
+        }
+    volk_gnsssdr_free(in_a);
+}
+
+#endif // AVX2
+
+
+//#ifdef LV_HAVE_AVX2
+//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_u_avx2_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
+//{
+    //// phases must be normalized. Phase rotator expects a complex exponential input!
+    //float rem_carrier_phase_in_rad = 0.345;
+    //float phase_step_rad = 0.1;
+    //lv_32fc_t phase[1];
+    //phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    //lv_32fc_t phase_inc[1];
+    //phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    //unsigned int n;
+    //int num_a_vectors = 3;
+    //int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    //for(n = 0; n < num_a_vectors; n++)
+        //{
+            //in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            //memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        //}
+
+    //volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2_reload(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
+
+    //for(n = 0; n < num_a_vectors; n++)
+        //{
+            //volk_gnsssdr_free(in_a[n]);
+        //}
+    //volk_gnsssdr_free(in_a);
+//}
+
+//#endif // AVX2
+
+
+//#ifdef LV_HAVE_NEON
+//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
+//{
+    //// phases must be normalized. Phase rotator expects a complex exponential input!
+    //float rem_carrier_phase_in_rad = 0.345;
+    //float phase_step_rad = 0.1;
+    //lv_32fc_t phase[1];
+    //phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    //lv_32fc_t phase_inc[1];
+    //phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    //unsigned int n;
+    //int num_a_vectors = 3;
+    //int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    //for(n = 0; n < num_a_vectors; n++)
+        //{
+            //in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            //memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        //}
+
+    //volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_neon(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
+
+    //for(n = 0; n < num_a_vectors; n++)
+        //{
+            //volk_gnsssdr_free(in_a[n]);
+        //}
+    //volk_gnsssdr_free(in_a);
+//}
+
+//#endif // NEON
+
+
+//#ifdef LV_HAVE_NEON
+//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
+//{
+    //// phases must be normalized. Phase rotator expects a complex exponential input!
+    //float rem_carrier_phase_in_rad = 0.345;
+    //float phase_step_rad = 0.1;
+    //lv_32fc_t phase[1];
+    //phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    //lv_32fc_t phase_inc[1];
+    //phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    //unsigned int n;
+    //int num_a_vectors = 3;
+    //int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
+    //for(n = 0; n < num_a_vectors; n++)
+        //{
+            //in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
+            //memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
+        //}
+
+    //volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_neon_vma(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
+
+    //for(n = 0; n < num_a_vectors; n++)
+        //{
+            //volk_gnsssdr_free(in_a[n]);
+        //}
+    //volk_gnsssdr_free(in_a);
+//}
+
+//#endif // NEON
+
+#endif  // INCLUDED_volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_H
+
+
+
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h
@ -44,8 +44,8 @@
 #ifdef LV_HAVE_GENERIC
 static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    unsigned int n;
    float rem_code_phase_chips = -0.234;
@ -74,8 +74,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_generic(lv_16sc_t* r
 #ifdef LV_HAVE_SSE3
 static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -103,8 +103,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse3(lv_16sc_t* re
 #ifdef LV_HAVE_SSE3
 static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -133,8 +133,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse3(lv_16sc_t* re
 #ifdef LV_HAVE_SSE4_1
 static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse4_1(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -163,8 +163,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse4_1(lv_16sc_t*
 #ifdef LV_HAVE_SSE4_1
 static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse4_1(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -193,8 +193,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse4_1(lv_16sc_t*
 #ifdef LV_HAVE_AVX
 static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_avx(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -223,8 +223,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_avx(lv_16sc_t* res
 #ifdef LV_HAVE_AVX
 static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_avx(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -253,8 +253,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_avx(lv_16sc_t* res
 #ifdef LV_HAVE_NEON
 static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_resamplerxnpuppet_32f.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_resamplerxnpuppet_32f.h
@ -0,0 +1,279 @@
+/*!
+ * \file volk_gnsssdr_32f_resamplerxnpuppet_32f.h
+ * \brief VOLK_GNSSSDR puppet for the multiple 32-bit float vector resampler kernel.
+ * \authors <ul>
+ *          <li> Cillian O'Driscoll 2017 cillian.odriscoll at gmail dot com
+ *          </ul>
+ *
+ * VOLK_GNSSSDR puppet for integrating the multiple resampler into the test system
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2017  (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ *          Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_32f_resamplerxnpuppet_32f_H
+#define INCLUDED_volk_gnsssdr_32f_resamplerxnpuppet_32f_H
+
+#include "volk_gnsssdr/volk_gnsssdr_32f_xn_resampler_32f_xn.h"
+#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
+#include <volk_gnsssdr/volk_gnsssdr_complex.h>
+#include <volk_gnsssdr/volk_gnsssdr.h>
+#include <string.h>
+
+
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_generic(float* result, const float* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1  };
+
+    float** result_aux =  (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_32f_xn_resampler_32f_xn_generic(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_SSE3
+static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_a_sse3(float* result, const float* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+
+    float** result_aux =  (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+#ifdef LV_HAVE_SSE3
+static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_u_sse3(float* result, const float* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+
+    float** result_aux =  (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+
+#ifdef LV_HAVE_SSE4_1
+static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_u_sse4_1(float* result, const float* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+
+    float** result_aux =  (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse4_1(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+#ifdef LV_HAVE_SSE4_1
+static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_a_sse4_1(float* result, const float* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+
+    float** result_aux =  (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse4_1(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+
+#endif
+
+#ifdef LV_HAVE_AVX
+static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_a_avx(float* result, const float* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+
+    float** result_aux =  (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_32f_xn_resampler_32f_xn_a_avx(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+#endif
+
+
+#ifdef LV_HAVE_AVX
+static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_u_avx(float* result, const float* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+
+    float** result_aux =  (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_32f_xn_resampler_32f_xn_u_avx(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+#endif
+
+#ifdef LV_HAVE_NEON
+static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_neon(float* result, const float* local_code, unsigned int num_points)
+{
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
+    int num_out_vectors = 3;
+    float rem_code_phase_chips = -0.234;
+    unsigned int n;
+    float shifts_chips[3] = { -0.1, 0.0, 0.1 };
+
+    float** result_aux =  (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_out_vectors; n++)
+    {
+       result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
+    }
+
+    volk_gnsssdr_32f_xn_resampler_32f_xn_neon(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
+
+    memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
+
+    for(n = 0; n < num_out_vectors; n++)
+    {
+        volk_gnsssdr_free(result_aux[n]);
+    }
+    volk_gnsssdr_free(result_aux);
+}
+#endif
+
+#endif // INCLUDED_volk_gnsssdr_32f_resamplerpuppet_32f_H
+
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_xn_resampler_32f_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_xn_resampler_32f_xn.h
@ -0,0 +1,610 @@
+/*!
+ * \file volk_gnsssdr_32f_xn_resampler_32f_xn.h
+ * \brief VOLK_GNSSSDR kernel: Resamples N complex 32-bit float vectors using zero hold resample algorithm.
+ * \authors <ul>
+ *          <li> Cillian O'Driscoll, 2017. cillian.odirscoll(at)gmail.com
+ *          </ul>
+ *
+ * VOLK_GNSSSDR kernel that resamples N 32-bit float vectors using zero hold resample algorithm.
+ * It is optimized to resample a single GNSS local code signal replica into N vectors fractional-resampled and fractional-delayed
+ * (i.e. it creates the Early, Prompt, and Late code replicas)
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2017  (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ *          Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+/*!
+ * \page volk_gnsssdr_32f_xn_resampler_32f_xn
+ *
+ * \b Overview
+ *
+ * Resamples a 32-bit floating point vector , providing \p num_out_vectors outputs.
+ *
+ * <b>Dispatcher Prototype</b>
+ * \code
+ * void volk_gnsssdr_32f_xn_resampler_32f_xn(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+ * \endcode
+ *
+ * \b Inputs
+ * \li local_code:            Vector to be resampled.
+ * \li rem_code_phase_chips:  Remnant code phase [chips].
+ * \li code_phase_step_chips: Phase increment per sample [chips/sample].
+ * \li shifts_chips:          Vector of floats that defines the spacing (in chips) between the replicas of \p local_code
+ * \li code_length_chips:     Code length in chips.
+ * \li num_out_vectors        Number of output vectors.
+ * \li num_points:            The number of data values to be in the resampled vector.
+ *
+ * \b Outputs
+ * \li result:                Pointer to a vector of pointers where the results will be stored.
+ *
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_32f_xn_resampler_32f_xn_H
+#define INCLUDED_volk_gnsssdr_32f_xn_resampler_32f_xn_H
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h> /* abs */
+#include <stdint.h> /* int64_t */
+#include <stdio.h>
+#include <volk_gnsssdr/volk_gnsssdr_common.h>
+#include <volk_gnsssdr/volk_gnsssdr_complex.h>
+
+
+#ifdef LV_HAVE_GENERIC
+
+static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_generic(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    int local_code_chip_index;
+    int current_correlator_tap;
+    int n;
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            for (n = 0; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index < 0) local_code_chip_index += (int)code_length_chips * (abs(local_code_chip_index) / code_length_chips + 1);
+                    local_code_chip_index = local_code_chip_index % code_length_chips;
+                    result[current_correlator_tap][n] = local_code[local_code_chip_index];
+                }
+        }
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    float** _result = result;
+    const unsigned int quarterPoints = num_points / 4;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m128 ones = _mm_set1_ps(1.0f);
+    const __m128 fours = _mm_set1_ps(4.0f);
+    const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
+    const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
+    int local_code_chip_index_;
+
+    const __m128i zeros = _mm_setzero_si128();
+    const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
+    const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
+    __m128i local_code_chip_index_reg, aux_i, negatives, i;
+    __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
+            for(n = 0; n < quarterPoints; n++)
+                {
+                    aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm_add_ps(aux, aux2);
+                    // floor
+                    i = _mm_cvttps_epi32(aux);
+                    fi = _mm_cvtepi32_ps(i);
+                    igx = _mm_cmpgt_ps(fi, aux);
+                    j = _mm_and_ps(igx, ones);
+                    aux = _mm_sub_ps(fi, j);
+                    // fmod
+                    c = _mm_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm_cvttps_epi32(c);
+                    cTrunc = _mm_cvtepi32_ps(i);
+                    base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
+
+                    negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
+                    aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
+                    local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
+                    _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 4; ++k)
+                        {
+                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm_add_ps(indexn, fours);
+                }
+            for(n = quarterPoints * 4; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif 
+
+
+#ifdef LV_HAVE_SSE3
+#include <pmmintrin.h>
+static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    float** _result = result;
+    const unsigned int quarterPoints = num_points / 4;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m128 ones = _mm_set1_ps(1.0f);
+    const __m128 fours = _mm_set1_ps(4.0f);
+    const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
+    const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
+    int local_code_chip_index_;
+
+    const __m128i zeros = _mm_setzero_si128();
+    const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
+    const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
+    __m128i local_code_chip_index_reg, aux_i, negatives, i;
+    __m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
+            for(n = 0; n < quarterPoints; n++)
+                {
+                    aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm_add_ps(aux, aux2);
+                    // floor
+                    i = _mm_cvttps_epi32(aux);
+                    fi = _mm_cvtepi32_ps(i);
+                    igx = _mm_cmpgt_ps(fi, aux);
+                    j = _mm_and_ps(igx, ones);
+                    aux = _mm_sub_ps(fi, j);
+                    // fmod
+                    c = _mm_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm_cvttps_epi32(c);
+                    cTrunc = _mm_cvtepi32_ps(i);
+                    base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
+
+                    negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
+                    aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
+                    local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
+                    _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 4; ++k)
+                        {
+                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm_add_ps(indexn, fours);
+                }
+            for(n = quarterPoints * 4; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+#endif
+
+
+#ifdef LV_HAVE_SSE4_1
+#include <smmintrin.h>
+static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse4_1(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    float** _result = result;
+    const unsigned int quarterPoints = num_points / 4;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m128 fours = _mm_set1_ps(4.0f);
+    const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
+    const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
+    int local_code_chip_index_;
+
+    const __m128i zeros = _mm_setzero_si128();
+    const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
+    const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
+    __m128i local_code_chip_index_reg, aux_i, negatives, i;
+    __m128 aux, aux2, shifts_chips_reg, c, cTrunc, base;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
+            for(n = 0; n < quarterPoints; n++)
+                {
+                    aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm_add_ps(aux, aux2);
+                    // floor
+                    aux = _mm_floor_ps(aux);
+
+                    // fmod
+                    c = _mm_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm_cvttps_epi32(c);
+                    cTrunc = _mm_cvtepi32_ps(i);
+                    base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
+
+                    negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
+                    aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
+                    local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
+                    _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 4; ++k)
+                        {
+                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm_add_ps(indexn, fours);
+                }
+            for(n = quarterPoints * 4; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif 
+
+
+#ifdef LV_HAVE_SSE4_1
+#include <smmintrin.h>
+static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse4_1(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    float** _result = result;
+    const unsigned int quarterPoints = num_points / 4;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m128 fours = _mm_set1_ps(4.0f);
+    const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
+    const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
+    int local_code_chip_index_;
+
+    const __m128i zeros = _mm_setzero_si128();
+    const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
+    const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
+    __m128i local_code_chip_index_reg, aux_i, negatives, i;
+    __m128 aux, aux2, shifts_chips_reg, c, cTrunc, base;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            __m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
+            for(n = 0; n < quarterPoints; n++)
+                {
+                    aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm_add_ps(aux, aux2);
+                    // floor
+                    aux = _mm_floor_ps(aux);
+
+                    // fmod
+                    c = _mm_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm_cvttps_epi32(c);
+                    cTrunc = _mm_cvtepi32_ps(i);
+                    base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
+
+                    negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
+                    aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
+                    local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
+                    _mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 4; ++k)
+                        {
+                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm_add_ps(indexn, fours);
+                }
+            for(n = quarterPoints * 4; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif
+
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_avx(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    float** _result = result;
+    const unsigned int avx_iters = num_points / 8;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m256 eights = _mm256_set1_ps(8.0f);
+    const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
+    const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
+    int local_code_chip_index_;
+
+    const __m256 zeros = _mm256_setzero_ps();
+    const __m256 code_length_chips_reg_f = _mm256_set1_ps((float)code_length_chips);
+    const __m256 n0 = _mm256_set_ps(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
+
+    __m256i local_code_chip_index_reg, i;
+    __m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            indexn = n0;
+            for(n = 0; n < avx_iters; n++)
+                {
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][8 * n + 7], 1, 0);
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&local_code_chip_index[8], 1, 3);
+                    aux = _mm256_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm256_add_ps(aux, aux2);
+                    // floor
+                    aux = _mm256_floor_ps(aux);
+
+                    // fmod
+                    c = _mm256_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm256_cvttps_epi32(c);
+                    cTrunc = _mm256_cvtepi32_ps(i);
+                    base = _mm256_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm256_cvttps_epi32(_mm256_sub_ps(aux, base));
+
+                    // no negatives
+                    c = _mm256_cvtepi32_ps(local_code_chip_index_reg);
+                    negatives = _mm256_cmp_ps(c, zeros, 0x01 );
+                    aux3 = _mm256_and_ps(code_length_chips_reg_f, negatives);
+                    aux = _mm256_add_ps(c, aux3);
+                    local_code_chip_index_reg = _mm256_cvttps_epi32(aux);
+
+                    _mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 8; ++k)
+                        {
+                            _result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm256_add_ps(indexn, eights);
+                }
+        }
+    _mm256_zeroupper();
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            for(n = avx_iters * 8; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif
+
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_avx(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    float** _result = result;
+    const unsigned int avx_iters = num_points / 8;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const __m256 eights = _mm256_set1_ps(8.0f);
+    const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
+    const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
+    int local_code_chip_index_;
+
+    const __m256 zeros = _mm256_setzero_ps();
+    const __m256 code_length_chips_reg_f = _mm256_set1_ps((float)code_length_chips);
+    const __m256 n0 = _mm256_set_ps(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
+
+    __m256i local_code_chip_index_reg, i;
+    __m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn;
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]);
+            aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
+            indexn = n0;
+            for(n = 0; n < avx_iters; n++)
+                {
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][8 * n + 7], 1, 0);
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&local_code_chip_index[8], 1, 3);
+                    aux = _mm256_mul_ps(code_phase_step_chips_reg, indexn);
+                    aux = _mm256_add_ps(aux, aux2);
+                    // floor
+                    aux = _mm256_floor_ps(aux);
+
+                    // fmod
+                    c = _mm256_div_ps(aux, code_length_chips_reg_f);
+                    i = _mm256_cvttps_epi32(c);
+                    cTrunc = _mm256_cvtepi32_ps(i);
+                    base = _mm256_mul_ps(cTrunc, code_length_chips_reg_f);
+                    local_code_chip_index_reg = _mm256_cvttps_epi32(_mm256_sub_ps(aux, base));
+
+                    // no negatives
+                    c = _mm256_cvtepi32_ps(local_code_chip_index_reg);
+                    negatives = _mm256_cmp_ps(c, zeros, 0x01 );
+                    aux3 = _mm256_and_ps(code_length_chips_reg_f, negatives);
+                    aux = _mm256_add_ps(c, aux3);
+                    local_code_chip_index_reg = _mm256_cvttps_epi32(aux);
+
+                    _mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg);
+                    for(k = 0; k < 8; ++k)
+                        {
+                            _result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = _mm256_add_ps(indexn, eights);
+                }
+        }
+    _mm256_zeroupper();
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            for(n = avx_iters * 8; n < num_points; n++)
+                {
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif
+
+
+#ifdef LV_HAVE_NEON
+#include <arm_neon.h>
+
+static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_neon(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
+{
+    float** _result = result;
+    const unsigned int neon_iters = num_points / 4;
+    int current_correlator_tap;
+    unsigned int n;
+    unsigned int k;
+    const int32x4_t ones = vdupq_n_s32(1);
+    const float32x4_t fours = vdupq_n_f32(4.0f);
+    const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
+    const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
+
+    __VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
+    int32_t local_code_chip_index_;
+
+    const int32x4_t zeros = vdupq_n_s32(0);
+    const float32x4_t code_length_chips_reg_f = vdupq_n_f32((float)code_length_chips);
+    const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
+    int32x4_t local_code_chip_index_reg, aux_i,  negatives, i;
+    float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
+    __VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
+    uint32x4_t igx;
+    reciprocal = vrecpeq_f32(code_length_chips_reg_f);
+    reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
+    reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal); // this refinement is required!
+    float32x4_t n0 = vld1q_f32((float*)vec);
+
+    for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
+        {
+            shifts_chips_reg = vdupq_n_f32((float)shifts_chips[current_correlator_tap]);
+            aux2 = vsubq_f32(shifts_chips_reg, rem_code_phase_chips_reg);
+            indexn = n0;
+            for(n = 0; n < neon_iters; n++)
+                {
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][4 * n + 3], 1, 0);
+                    __VOLK_GNSSSDR_PREFETCH(&local_code_chip_index[4]);
+                    aux = vmulq_f32(code_phase_step_chips_reg, indexn);
+                    aux = vaddq_f32(aux, aux2);
+
+                    //floor
+                    i = vcvtq_s32_f32(aux);
+                    fi = vcvtq_f32_s32(i);
+                    igx = vcgtq_f32(fi, aux);
+                    j = vcvtq_f32_s32(vandq_s32(vreinterpretq_s32_u32(igx), ones));
+                    aux = vsubq_f32(fi, j);
+
+                    // fmod
+                    c = vmulq_f32(aux, reciprocal);
+                    i =  vcvtq_s32_f32(c);
+                    cTrunc = vcvtq_f32_s32(i);
+                    base = vmulq_f32(cTrunc, code_length_chips_reg_f);
+                    aux = vsubq_f32(aux, base);
+                    local_code_chip_index_reg = vcvtq_s32_f32(aux);
+
+                    negatives = vreinterpretq_s32_u32(vcltq_s32(local_code_chip_index_reg, zeros));
+                    aux_i = vandq_s32(code_length_chips_reg_i, negatives);
+                    local_code_chip_index_reg = vaddq_s32(local_code_chip_index_reg, aux_i);
+
+                    vst1q_s32((int32_t*)local_code_chip_index, local_code_chip_index_reg);
+
+                    for(k = 0; k < 4; ++k)
+                        {
+                            _result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
+                        }
+                    indexn = vaddq_f32(indexn, fours);
+                }
+            for(n = neon_iters * 4; n < num_points; n++)
+                {
+                    __VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][n], 1, 0);
+                    // resample code for current tap
+                    local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
+                    //Take into account that in multitap correlators, the shifts can be negative!
+                    if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
+                    local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
+                    _result[current_correlator_tap][n] = local_code[local_code_chip_index_];
+                }
+        }
+}
+
+#endif
+
+#endif /*INCLUDED_volk_gnsssdr_32f_xn_resampler_32f_xn_H*/
+
+
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h
@ -0,0 +1,320 @@
+/*!
+ * \file volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h
+ * \brief VOLK_GNSSSDR kernel: multiplies N complex (32-bit float per component) vectors
+ * by a common vector, phase rotated and accumulates the results in N float complex outputs.
+ * \authors <ul>
+ *          <li> Cillian O'Driscoll 2016. cillian.odriscoll(at)gmail.com
+ *          </ul>
+ *
+ * VOLK_GNSSSDR kernel that multiplies N 32 bits complex vectors by a common vector, which is
+ * phase-rotated by phase offset and phase increment, and accumulates the results
+ * in N 32 bits float complex outputs.
+ * It is optimized to perform the N tap correlation process in GNSS receivers.
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2016  (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ *          Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+/*!
+ * \page volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn
+ *
+ * \b Overview
+ *
+ * Rotates and multiplies the reference complex vector with an arbitrary number of other real vectors,
+ * accumulates the results and stores them in the output vector.
+ * The rotation is done at a fixed rate per sample, from an initial \p phase offset.
+ * This function can be used for Doppler wipe-off and multiple correlator.
+ *
+ * <b>Dispatcher Prototype</b>
+ * \code
+ * void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points);
+ * \endcode
+ *
+ * \b Inputs
+ * \li in_common:     Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector).
+ * \li phase_inc:     Phase increment = lv_cmake(cos(phase_step_rad), sin(phase_step_rad))
+ * \li phase:         Initial phase = lv_cmake(cos(initial_phase_rad), sin(initial_phase_rad))
+ * \li in_a:          Pointer to an array of pointers to multiple vectors to be multiplied and accumulated.
+ * \li num_a_vectors: Number of vectors to be multiplied by the reference vector and accumulated.
+ * \li num_points:    Number of complex values to be multiplied together, accumulated and stored into \p result.
+ *
+ * \b Outputs
+ * \li phase:         Final phase.
+ * \li result:        Vector of \p num_a_vectors components with the multiple vectors of \p in_a rotated, multiplied by \p in_common and accumulated.
+ *
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_H
+#define INCLUDED_volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_H
+
+
+#include <volk_gnsssdr/volk_gnsssdr.h>
+#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
+#include <volk_gnsssdr/volk_gnsssdr_complex.h>
+#include <volk_gnsssdr/saturation_arithmetic.h>
+#include <math.h>
+#include <stdio.h>
+
+#ifdef LV_HAVE_GENERIC
+
+static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points)
+{
+    lv_32fc_t tmp32_1, tmp32_2;
+    int n_vec;
+    unsigned int n;
+    for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
+        {
+            result[n_vec] = lv_cmake(0,0);
+        }
+    for (n = 0; n < num_points; n++)
+        {
+            tmp32_1 = *in_common++ * (*phase);//if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
+
+            // Regenerate phase
+            if (n % 256 == 0)
+                {
+                    //printf("Phase before regeneration %i: %f,%f  Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
+#ifdef __cplusplus
+                    (*phase) /= std::abs((*phase));
+#else
+                    (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
+#endif
+                    //printf("Phase after regeneration %i: %f,%f  Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
+                }
+
+            (*phase) *= phase_inc;
+            for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
+                {
+                    tmp32_2 = tmp32_1 * in_a[n_vec][n];
+                    result[n_vec] += tmp32_2;
+                }
+        }
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+
+#ifdef LV_HAVE_GENERIC
+
+static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points)
+{
+    lv_32fc_t tmp32_1, tmp32_2;
+    const unsigned int ROTATOR_RELOAD = 256;
+    int n_vec;
+    unsigned int n;
+    unsigned int j;
+    for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
+        {
+            result[n_vec] = lv_cmake(0,0);
+        }
+
+    for (n = 0; n < num_points / ROTATOR_RELOAD; n++)
+        {
+            for (j = 0; j < ROTATOR_RELOAD; j++)
+                {
+                    tmp32_1 = *in_common++ * (*phase);
+                    (*phase) *= phase_inc;
+                    for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
+                        {
+                            tmp32_2 = tmp32_1 * in_a[n_vec][n * ROTATOR_RELOAD + j];
+                            result[n_vec] += tmp32_2;
+                        }
+                }
+            /* Regenerate phase */
+#ifdef __cplusplus
+            (*phase) /= std::abs((*phase));
+#else
+            //(*phase) /= cabsf((*phase));
+            (*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
+#endif
+        }
+
+    for (j = 0; j < num_points % ROTATOR_RELOAD; j++)
+        {
+            tmp32_1 = *in_common++ * (*phase);
+            (*phase) *= phase_inc;
+            for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
+                {
+                    tmp32_2 = tmp32_1 * in_a[n_vec][(num_points / ROTATOR_RELOAD) * ROTATOR_RELOAD + j];
+                    result[n_vec] += tmp32_2;
+                }
+        }
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+#ifdef LV_HAVE_AVX
+#include <immintrin.h>
+#include <volk_gnsssdr/volk_gnsssdr_avx_intrinsics.h>
+static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points)
+{
+    unsigned int number = 0;
+    unsigned int vec_ind = 0;
+    unsigned int i = 0;
+    const unsigned int sixteenthPoints = num_points / 16;
+
+    const float* aPtr = (float*)in_common;
+    const float* bPtr[ num_a_vectors];
+    for( vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind ){
+        bPtr[vec_ind] = in_a[vec_ind];
+    }
+
+    lv_32fc_t _phase = (*phase);
+    lv_32fc_t wo;
+
+    __m256 a0Val, a1Val, a2Val, a3Val;
+    __m256 b0Val[num_a_vectors], b1Val[num_a_vectors], b2Val[num_a_vectors], b3Val[num_a_vectors];
+    __m256 x0Val[num_a_vectors], x1Val[num_a_vectors], x0loVal[num_a_vectors], x0hiVal[num_a_vectors], x1loVal[num_a_vectors], x1hiVal[num_a_vectors];
+    __m256 c0Val[num_a_vectors], c1Val[num_a_vectors], c2Val[num_a_vectors], c3Val[num_a_vectors];
+
+    __m256 dotProdVal0[num_a_vectors];
+    __m256 dotProdVal1[num_a_vectors];
+    __m256 dotProdVal2[num_a_vectors];
+    __m256 dotProdVal3[num_a_vectors];
+
+    for( vec_ind = 0; vec_ind < num_a_vectors; vec_ind++ ){
+        dotProdVal0[vec_ind] = _mm256_setzero_ps();
+        dotProdVal1[vec_ind] = _mm256_setzero_ps();
+        dotProdVal2[vec_ind] = _mm256_setzero_ps();
+        dotProdVal3[vec_ind] = _mm256_setzero_ps();
+    }
+
+    // Set up the complex rotator
+    __m256 z0, z1, z2, z3;
+    __attribute__((aligned(32))) lv_32fc_t phase_vec[16];
+    for( vec_ind = 0; vec_ind < 16; ++vec_ind ){
+        phase_vec[vec_ind] = _phase;
+        _phase *= phase_inc;
+    }
+
+    z0 = _mm256_load_ps( (float *)phase_vec );
+    z1 = _mm256_load_ps( (float *)(phase_vec + 4) );
+    z2 = _mm256_load_ps( (float *)(phase_vec + 8) );
+    z3 = _mm256_load_ps( (float *)(phase_vec + 12) );
+
+    lv_32fc_t dz = phase_inc; dz *= dz; dz *= dz; dz *= dz; dz *= dz; // dz = phase_inc^16;
+
+    for( vec_ind = 0; vec_ind < 4; ++vec_ind ){
+        phase_vec[vec_ind] = dz;
+    }
+
+    __m256 dz_reg = _mm256_load_ps( (float *)phase_vec );
+    dz_reg = _mm256_complexnormalise_ps( dz_reg );
+
+    for(;number < sixteenthPoints; number++){
+
+        a0Val = _mm256_loadu_ps(aPtr);
+        a1Val = _mm256_loadu_ps(aPtr+8);
+        a2Val = _mm256_loadu_ps(aPtr+16);
+        a3Val = _mm256_loadu_ps(aPtr+24);
+
+        a0Val = _mm256_complexmul_ps( a0Val, z0 );
+        a1Val = _mm256_complexmul_ps( a1Val, z1 );
+        a2Val = _mm256_complexmul_ps( a2Val, z2 );
+        a3Val = _mm256_complexmul_ps( a3Val, z3 );
+
+        z0 = _mm256_complexmul_ps( z0, dz_reg );
+        z1 = _mm256_complexmul_ps( z1, dz_reg );
+        z2 = _mm256_complexmul_ps( z2, dz_reg );
+        z3 = _mm256_complexmul_ps( z3, dz_reg );
+
+
+        for( vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind ){
+            x0Val[vec_ind] = _mm256_loadu_ps(bPtr[vec_ind]); // t0|t1|t2|t3|t4|t5|t6|t7
+            x1Val[vec_ind] = _mm256_loadu_ps(bPtr[vec_ind]+8);
+            x0loVal[vec_ind] = _mm256_unpacklo_ps(x0Val[vec_ind], x0Val[vec_ind]); // t0|t0|t1|t1|t4|t4|t5|t5
+            x0hiVal[vec_ind] = _mm256_unpackhi_ps(x0Val[vec_ind], x0Val[vec_ind]); // t2|t2|t3|t3|t6|t6|t7|t7
+            x1loVal[vec_ind] = _mm256_unpacklo_ps(x1Val[vec_ind], x1Val[vec_ind]);
+            x1hiVal[vec_ind] = _mm256_unpackhi_ps(x1Val[vec_ind], x1Val[vec_ind]);
+
+            // TODO: it may be possible to rearrange swizzling to better pipeline data
+            b0Val[vec_ind] = _mm256_permute2f128_ps(x0loVal[vec_ind], x0hiVal[vec_ind], 0x20); // t0|t0|t1|t1|t2|t2|t3|t3
+            b1Val[vec_ind] = _mm256_permute2f128_ps(x0loVal[vec_ind], x0hiVal[vec_ind], 0x31); // t4|t4|t5|t5|t6|t6|t7|t7
+            b2Val[vec_ind] = _mm256_permute2f128_ps(x1loVal[vec_ind], x1hiVal[vec_ind], 0x20);
+            b3Val[vec_ind] = _mm256_permute2f128_ps(x1loVal[vec_ind], x1hiVal[vec_ind], 0x31);
+
+            c0Val[vec_ind] = _mm256_mul_ps(a0Val, b0Val[vec_ind]);
+            c1Val[vec_ind] = _mm256_mul_ps(a1Val, b1Val[vec_ind]);
+            c2Val[vec_ind] = _mm256_mul_ps(a2Val, b2Val[vec_ind]);
+            c3Val[vec_ind] = _mm256_mul_ps(a3Val, b3Val[vec_ind]);
+
+            dotProdVal0[vec_ind] = _mm256_add_ps(c0Val[vec_ind], dotProdVal0[vec_ind]);
+            dotProdVal1[vec_ind] = _mm256_add_ps(c1Val[vec_ind], dotProdVal1[vec_ind]);
+            dotProdVal2[vec_ind] = _mm256_add_ps(c2Val[vec_ind], dotProdVal2[vec_ind]);
+            dotProdVal3[vec_ind] = _mm256_add_ps(c3Val[vec_ind], dotProdVal3[vec_ind]);
+
+            bPtr[vec_ind] += 16;
+        }
+
+        // Force the rotators back onto the unit circle
+        if ((number % 64) == 0)
+        {
+            z0 = _mm256_complexnormalise_ps( z0 );
+            z1 = _mm256_complexnormalise_ps( z1 );
+            z2 = _mm256_complexnormalise_ps( z2 );
+            z3 = _mm256_complexnormalise_ps( z3 );
+        }
+
+        aPtr += 32;
+    }
+    __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
+
+    for( vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind ){
+        dotProdVal0[vec_ind] = _mm256_add_ps(dotProdVal0[vec_ind], dotProdVal1[vec_ind]);
+        dotProdVal0[vec_ind] = _mm256_add_ps(dotProdVal0[vec_ind], dotProdVal2[vec_ind]);
+        dotProdVal0[vec_ind] = _mm256_add_ps(dotProdVal0[vec_ind], dotProdVal3[vec_ind]);
+
+        _mm256_store_ps((float *)dotProductVector,dotProdVal0[vec_ind]); // Store the results back into the dot product vector
+
+        result[ vec_ind ] = lv_cmake( 0, 0 );
+        for( i = 0; i < 4; ++i ){
+            result[vec_ind] += dotProductVector[i];
+        }
+    }
+
+    z0 = _mm256_complexnormalise_ps( z0 );
+    _mm256_store_ps((float*)phase_vec, z0);
+    _phase  = phase_vec[0];
+    _mm256_zeroupper();
+
+
+    number = sixteenthPoints*16;
+    for(;number < num_points; number++){
+        wo = (*aPtr++)*_phase;
+        _phase *= phase_inc;
+
+        for( vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind ){
+            result[vec_ind] += wo * in_a[vec_ind][number];
+        }
+    }
+
+    *phase = _phase;
+
+}
+
+#endif /* LV_HAVE_AVX */
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_H */
+
+
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc.h
@ -0,0 +1,132 @@
+/*!
+ * \file volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc.h
+ * \brief Volk puppet for the multiple 16-bit complex dot product kernel.
+ * \authors <ul>
+ *          <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
+ *          </ul>
+ *
+ * Volk puppet for integrating the resampler into volk's test system
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2015  (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ *          Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_H
+#define INCLUDED_volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_H
+
+#include "volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h"
+#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
+#include <volk_gnsssdr/volk_gnsssdr.h>
+#include <string.h>
+
+#ifdef LV_HAVE_GENERIC
+
+static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_generic(lv_32fc_t* result, const lv_32fc_t* local_code,  const float* in, unsigned int num_points)
+{
+    // phases must be normalized. Phase rotator expects a complex exponential input!
+    float rem_carrier_phase_in_rad = 0.25;
+    float phase_step_rad = 0.1;
+    lv_32fc_t phase[1];
+    phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    lv_32fc_t phase_inc[1];
+    phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    unsigned int n;
+    int num_a_vectors = 3;
+    float ** in_a = (float **)volk_gnsssdr_malloc(sizeof(float *) * num_a_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            in_a[n] = (float *)volk_gnsssdr_malloc(sizeof(float ) * num_points, volk_gnsssdr_get_alignment());
+            memcpy((float*)in_a[n], (float*)in, sizeof(float) * num_points);
+        }
+    volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic(result, local_code, phase_inc[0], phase, (const float**) in_a, num_a_vectors, num_points);
+
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            volk_gnsssdr_free(in_a[n]);
+        }
+    volk_gnsssdr_free(in_a);
+}
+#endif  // Generic
+
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_generic_reload(lv_32fc_t* result, const lv_32fc_t* local_code,  const float* in, unsigned int num_points)
+{
+    // phases must be normalized. Phase rotator expects a complex exponential input!
+    float rem_carrier_phase_in_rad = 0.25;
+    float phase_step_rad = 0.1;
+    lv_32fc_t phase[1];
+    phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    lv_32fc_t phase_inc[1];
+    phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    unsigned int n;
+    int num_a_vectors = 3;
+    float ** in_a = (float **)volk_gnsssdr_malloc(sizeof(float *) * num_a_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            in_a[n] = (float *)volk_gnsssdr_malloc(sizeof(float ) * num_points, volk_gnsssdr_get_alignment());
+            memcpy((float*)in_a[n], (float*)in, sizeof(float) * num_points);
+        }
+    volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(result, local_code, phase_inc[0], phase, (const float**) in_a, num_a_vectors, num_points);
+
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            volk_gnsssdr_free(in_a[n]);
+        }
+    volk_gnsssdr_free(in_a);
+}
+
+#endif  // Generic
+
+#ifdef LV_HAVE_AVX
+static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_u_avx(lv_32fc_t* result, const lv_32fc_t* local_code,  const float* in, unsigned int num_points)
+{
+    // phases must be normalized. Phase rotator expects a complex exponential input!
+    float rem_carrier_phase_in_rad = 0.25;
+    float phase_step_rad = 0.1;
+    lv_32fc_t phase[1];
+    phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
+    lv_32fc_t phase_inc[1];
+    phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
+    unsigned int n;
+    int num_a_vectors = 3;
+    float ** in_a = (float **)volk_gnsssdr_malloc(sizeof(float *) * num_a_vectors, volk_gnsssdr_get_alignment());
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            in_a[n] = (float *)volk_gnsssdr_malloc(sizeof(float ) * num_points, volk_gnsssdr_get_alignment());
+            memcpy((float*)in_a[n], (float*)in, sizeof(float) * num_points);
+        }
+    volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(result, local_code, phase_inc[0], phase, (const float**) in_a, num_a_vectors, num_points);
+
+    for(n = 0; n < num_a_vectors; n++)
+        {
+            volk_gnsssdr_free(in_a[n]);
+        }
+    volk_gnsssdr_free(in_a);
+}
+
+#endif  // AVX
+
+#endif  // INCLUDED_volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_H
+
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_resamplerxnpuppet_32fc.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_resamplerxnpuppet_32fc.h
@ -46,8 +46,8 @@
 #ifdef LV_HAVE_GENERIC
 static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_generic(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
-    int code_length_chips = 1023;
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -70,14 +70,15 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_generic(lv_32fc_t* r
    volk_gnsssdr_free(result_aux);
 }

+
 #endif /* LV_HAVE_GENERIC */


 #ifdef LV_HAVE_SSE3
 static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
-    int code_length_chips = 1023;
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -105,8 +106,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse3(lv_32fc_t* re
 #ifdef LV_HAVE_SSE3
 static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
-    int code_length_chips = 1023;
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -135,8 +136,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse3(lv_32fc_t* re
 #ifdef LV_HAVE_SSE4_1
 static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse4_1(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
-    int code_length_chips = 1023;
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -164,8 +165,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse4_1(lv_32fc_t*
 #ifdef LV_HAVE_SSE4_1
 static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
-    int code_length_chips = 1023;
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -193,8 +194,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse4_1(lv_32fc_t*
 #ifdef LV_HAVE_AVX
 static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_avx(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
-    int code_length_chips = 1023;
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -222,8 +223,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_avx(lv_32fc_t* res
 #ifdef LV_HAVE_AVX
 static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
-    int code_length_chips = 1023;
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -251,8 +252,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx(lv_32fc_t* res
 #ifdef LV_HAVE_AVX2
 static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_avx2(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
-    int code_length_chips = 1023;
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -280,8 +281,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_avx2(lv_32fc_t* re
 #ifdef LV_HAVE_AVX2
 static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx2(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
-    int code_length_chips = 1023;
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
@ -309,8 +310,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx2(lv_32fc_t* re
 #ifdef LV_HAVE_NEON
 static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_neon(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
 {
-    float code_phase_step_chips = -0.6;
-    int code_length_chips = 1023;
+    int code_length_chips = 2046;
+    float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
    int num_out_vectors = 3;
    float rem_code_phase_chips = -0.234;
    unsigned int n;
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h
@ -89,10 +89,14 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t
        (VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerfastpuppet_16ic, volk_gnsssdr_16ic_resampler_fast_16ic, test_params))
        (VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic, volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn, test_params))
        (VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerxnpuppet_16ic, volk_gnsssdr_16ic_xn_resampler_16ic_xn, test_params))
+        (VOLK_INIT_PUPP(volk_gnsssdr_16i_resamplerxnpuppet_16i, volk_gnsssdr_16i_xn_resampler_16i_xn, test_params))
        (VOLK_INIT_PUPP(volk_gnsssdr_32fc_resamplerxnpuppet_32fc, volk_gnsssdr_32fc_xn_resampler_32fc_xn, test_params))
+        (VOLK_INIT_PUPP(volk_gnsssdr_32f_resamplerxnpuppet_32f, volk_gnsssdr_32f_xn_resampler_32f_xn, test_params))
        (VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_dot_prod_16ic_xn, test_params))
        (VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn, test_params_int16))
+        (VOLK_INIT_PUPP(volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn, test_params_int16))
        (VOLK_INIT_PUPP(volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc, volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn, test_params_int1))
+        (VOLK_INIT_PUPP(volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc, volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn, test_params_int1))
        ;

    return test_cases;
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/qa_utils.cc
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/qa_utils.cc
@ -717,7 +717,7 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
                                            {
                                                if(both_sigs[j].is_signed)
                                                    {
-                                                        fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
+                                                        fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
                                                    }
                                                else
                                                    {