Created macros for 8ic_cw_epl_corr_32fc

Created macros for 8ic_cw_epl_corr_32fc proto-kernels
2025-10-09 21:07:38 +00:00 · 2014-09-22 18:18:20 +02:00
parent d71ae159dc
commit c55be47565
5 changed files with 182 additions and 448 deletions
--- a/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros.h
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros.h
@@ -54,6 +54,16 @@
        output_ps = _mm_cvtepi32_ps(output_i32);
        #endif /* CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */

+        #ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1
+        #define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_ps)\
+        input_i_1 = _mm_cvtepi8_epi32(input);\
+        input = _mm_srli_si128 (input, 4);\
+        input_i_2 = _mm_cvtepi8_epi32(input);\
+        input = _mm_srli_si128 (input, 4);\
+        output_i32 = _mm_add_epi32 (input_i_1, input_i_2);\
+        output_ps = _mm_cvtepi32_ps(output_i32);
+        #endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */
+
    #endif /* LV_HAVE_SSE4_1 */

    #ifdef LV_HAVE_SSE2
@@ -71,6 +81,32 @@
        imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
        #endif /* CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2 */

+        #ifndef CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2
+        #define CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(input, mult1, real, imag)\
+        imag = _mm_srli_si128 (input, 1);\
+        imag = _mm_and_si128 (imag, mult1);\
+        real = _mm_and_si128 (input, mult1);
+        #endif /* CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2 */
+
+        #ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2
+        #define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2(input, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\
+        input_i_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), input);\
+        input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\
+        input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\
+        input_i_1 = _mm_srai_epi32(input_i_1, 24);\
+        input_i_2 = _mm_srai_epi32(input_i_2, 24);\
+        output_i32 = _mm_add_epi32(input_i_1, input_i_2);\
+        output_ps_1 = _mm_cvtepi32_ps(output_i32);\
+        \
+        input_i_1 = _mm_unpackhi_epi8(_mm_setzero_si128(), input);\
+        input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\
+        input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\
+        input_i_1 = _mm_srai_epi32(input_i_1, 24);\
+        input_i_2 = _mm_srai_epi32(input_i_2, 24);\
+        output_i32 = _mm_add_epi32(input_i_1, input_i_2);\
+        output_ps_2 = _mm_cvtepi32_ps(output_i32);
+        #endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2 */
+
    #endif /* LV_HAVE_AVX */

    #ifdef LV_HAVE_GENERIC
--- a/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h
@@ -0,0 +1,100 @@
+/*!
+ * \file CommonMacros_8ic_cw_corr_32fc.h
+ * \brief Common macros used inside the 8ic_cw_corr_32fc volk protokernels.
+ * \authors <ul>
+ *          <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *          </ul>
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014  (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ *          Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * -------------------------------------------------------------------------
+ */
+#ifndef INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H
+#define INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H
+#include "CommonMacros/CommonMacros.h"
+
+    #ifdef LV_HAVE_SSE4_1
+      /*!
+        \brief Macros for U_SSE4_1
+      */
+
+        #ifndef CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1
+        #define CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\
+        CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)\
+        CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
+        \
+        imag_output = _mm_slli_si128 (imag_output, 1);\
+        output = _mm_blendv_epi8 (imag_output, real_output, mult1);\
+        \
+        CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(output, input_i_1, input_i_2, output_i32, output_ps_1)\
+        CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(output, input_i_1, input_i_2, output_i32, output_ps_2)
+        #endif /* CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1 */
+
+    #endif /* LV_HAVE_SSE4_1 */
+
+    #ifdef LV_HAVE_SSE2
+    /*!
+     \brief Macros for U_SSE2
+     */
+
+        #ifndef CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2
+        #define CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\
+        CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)\
+        CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
+        \
+        real_output = _mm_and_si128 (real_output, mult1);\
+        imag_output = _mm_and_si128 (imag_output, mult1);\
+        imag_output = _mm_slli_si128 (imag_output, 1);\
+        output = _mm_or_si128 (real_output, imag_output);\
+        \
+        CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2(output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
+        #endif /* CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2 */
+
+    #endif /* LV_HAVE_SSE2 */
+
+    #ifdef LV_HAVE_GENERIC
+    /*!
+     \brief Macros for U_GENERIC
+     */
+
+    #endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H */
+
+
+#ifndef INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H
+#define INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H
+
+    #ifdef LV_HAVE_SSE4_1
+    /*!
+     \brief Macros for A_SSE4_1
+     */
+
+    #endif /* LV_HAVE_SSE4_1 */
+
+    #ifdef LV_HAVE_GENERIC
+    /*!
+     \brief Macros for A_GENERIC
+     */
+
+    #endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H */
--- a/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/README.txt
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/README.txt
@@ -24,8 +24,8 @@ The variables that the macro needs are specified when calling it in order to avo
 Workflow
 ####################################################################

-In order to use the macros easily, I usually test the code without macros inside a test proto-kernel, where you are able to test it, debug it and use breakpoints.
-When it works I place it inside a macro an test it again.
+In order to use the macros easily, I usually test the code without macros inside a testing proto-kernel, where you are able to test it, debug it and use breakpoints.
+When it works I place code inside a macro an I test it again.

 ####################################################################
 Why macros
--- a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3.h
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3.h
@@ -55,6 +55,8 @@

 #ifdef LV_HAVE_SSE4_1
 #include "smmintrin.h"
+#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
+#include "CommonMacros/CommonMacros.h"
 /*!
 \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
 \param input The input signal input
@@ -75,8 +77,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse4_1(lv_32fc_t* E
    __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
    
    __m128 E_code_acc, P_code_acc, L_code_acc;
-    __m128i output_i_1, output_i_2, output_i_3, output_i_4;
-    __m128 output_ps_1, output_ps_2, output_ps_3, output_ps_4;
+    __m128i input_i_1, input_i_2, output_i32;
+    __m128 output_ps_1, output_ps_2;
    
    const lv_8sc_t* input_ptr = input;
    const lv_8sc_t* carrier_ptr = carrier;
@@ -106,126 +108,34 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse4_1(lv_32fc_t* E
            x = _mm_lddqu_si128((__m128i*)input_ptr);
            y = _mm_lddqu_si128((__m128i*)carrier_ptr);
            
-            imagx = _mm_srli_si128 (x, 1);
-            imagx = _mm_and_si128 (imagx, mult1);
-            realx = _mm_and_si128 (x, mult1);
+            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
+            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (realx, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
-            
-            real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+            CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
            
            //Get early values
            y = _mm_lddqu_si128((__m128i*)E_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_blendv_epi8 (imag_output, real_output, mult1);
-            
-            output_i_1 = _mm_cvtepi8_epi32(output);
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output = _mm_srli_si128 (output, 4);
-            output_i_2 = _mm_cvtepi8_epi32(output);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output = _mm_srli_si128 (output, 4);
-            output_i_3 = _mm_cvtepi8_epi32(output);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output = _mm_srli_si128 (output, 4);
-            output_i_4 = _mm_cvtepi8_epi32(output);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            E_code_acc = _mm_add_ps (E_code_acc, output_ps_1);
            E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
-            E_code_acc = _mm_add_ps (E_code_acc, output_ps_3);
-            E_code_acc = _mm_add_ps (E_code_acc, output_ps_4);
            
            //Get prompt values
            y = _mm_lddqu_si128((__m128i*)P_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_blendv_epi8 (imag_output, real_output, mult1);
-            
-            output_i_1 = _mm_cvtepi8_epi32(output);
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output = _mm_srli_si128 (output, 4);
-            output_i_2 = _mm_cvtepi8_epi32(output);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output = _mm_srli_si128 (output, 4);
-            output_i_3 = _mm_cvtepi8_epi32(output);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output = _mm_srli_si128 (output, 4);
-            output_i_4 = _mm_cvtepi8_epi32(output);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            P_code_acc = _mm_add_ps (P_code_acc, output_ps_1);
            P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
-            P_code_acc = _mm_add_ps (P_code_acc, output_ps_3);
-            P_code_acc = _mm_add_ps (P_code_acc, output_ps_4);
            
            //Get late values
            y = _mm_lddqu_si128((__m128i*)L_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_blendv_epi8 (imag_output, real_output, mult1);
-            
-            output_i_1 = _mm_cvtepi8_epi32(output);
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output = _mm_srli_si128 (output, 4);
-            output_i_2 = _mm_cvtepi8_epi32(output);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output = _mm_srli_si128 (output, 4);
-            output_i_3 = _mm_cvtepi8_epi32(output);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output = _mm_srli_si128 (output, 4);
-            output_i_4 = _mm_cvtepi8_epi32(output);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            L_code_acc = _mm_add_ps (L_code_acc, output_ps_1);
            L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
-            L_code_acc = _mm_add_ps (L_code_acc, output_ps_3);
-            L_code_acc = _mm_add_ps (L_code_acc, output_ps_4);
            
            input_ptr += 8;
            carrier_ptr += 8;
@@ -265,6 +175,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse4_1(lv_32fc_t* E

 #ifdef LV_HAVE_SSE2
 #include "emmintrin.h"
+#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
+#include "CommonMacros/CommonMacros.h"
 /*!
 \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
 \param input The input signal input
@@ -285,8 +197,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
    __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
    
    __m128 E_code_acc, P_code_acc, L_code_acc;
-    __m128i output_i_aux_1, output_i_aux_2, output_i_1, output_i_2, output_i_3, output_i_4;
-    __m128 output_ps_1, output_ps_2, output_ps_3, output_ps_4;
+    __m128i input_i_1, input_i_2, output_i32;
+    __m128 output_ps_1, output_ps_2;
    
    const lv_8sc_t* input_ptr = input;
    const lv_8sc_t* carrier_ptr = carrier;
@@ -316,148 +228,34 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_o
            x = _mm_lddqu_si128((__m128i*)input_ptr);
            y = _mm_lddqu_si128((__m128i*)carrier_ptr);
            
-            imagx = _mm_srli_si128 (x, 1);
-            imagx = _mm_and_si128 (imagx, mult1);
-            realx = _mm_and_si128 (x, mult1);
+            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
+            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (realx, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
-            
-            real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+            CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
            
            //Get early values
            y = _mm_lddqu_si128((__m128i*)E_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            real_output = _mm_and_si128 (real_output, mult1);
-            imag_output = _mm_and_si128 (imag_output, mult1);
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_or_si128 (real_output, imag_output);
-            
-            output_i_aux_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), output);
-            output_i_aux_2 = _mm_unpackhi_epi8(_mm_setzero_si128(), output);
-            output_i_1 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_2 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_3 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_2);
-            output_i_4 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_2);
-            
-            //Shift, MAINTAINING THE SIGN!!!
-            output_i_1 = _mm_srai_epi32(output_i_1, 24);
-            output_i_2 = _mm_srai_epi32(output_i_2, 24);
-            output_i_3 = _mm_srai_epi32(output_i_3, 24);
-            output_i_4 = _mm_srai_epi32(output_i_4, 24);
-            
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            E_code_acc = _mm_add_ps (E_code_acc, output_ps_1);
            E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
-            E_code_acc = _mm_add_ps (E_code_acc, output_ps_3);
-            E_code_acc = _mm_add_ps (E_code_acc, output_ps_4);
            
            //Get prompt values
            y = _mm_lddqu_si128((__m128i*)P_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            real_output = _mm_and_si128 (real_output, mult1);
-            imag_output = _mm_and_si128 (imag_output, mult1);
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_or_si128 (real_output, imag_output);
-            
-            output_i_aux_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), output);
-            output_i_aux_2 = _mm_unpackhi_epi8(_mm_setzero_si128(), output);
-            output_i_1 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_2 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_3 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_2);
-            output_i_4 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_2);
-            
-            output_i_1 = _mm_srai_epi32(output_i_1, 24);
-            output_i_2 = _mm_srai_epi32(output_i_2, 24);
-            output_i_3 = _mm_srai_epi32(output_i_3, 24);
-            output_i_4 = _mm_srai_epi32(output_i_4, 24);
-            
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            P_code_acc = _mm_add_ps (P_code_acc, output_ps_1);
            P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
-            P_code_acc = _mm_add_ps (P_code_acc, output_ps_3);
-            P_code_acc = _mm_add_ps (P_code_acc, output_ps_4);
            
            //Get late values
            y = _mm_lddqu_si128((__m128i*)L_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            real_output = _mm_and_si128 (real_output, mult1);
-            imag_output = _mm_and_si128 (imag_output, mult1);
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_or_si128 (real_output, imag_output);
-            
-            output_i_aux_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), output);
-            output_i_aux_2 = _mm_unpackhi_epi8(_mm_setzero_si128(), output);
-            output_i_1 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_2 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_3 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_2);
-            output_i_4 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_2);
-            
-            output_i_1 = _mm_srai_epi32(output_i_1, 24);
-            output_i_2 = _mm_srai_epi32(output_i_2, 24);
-            output_i_3 = _mm_srai_epi32(output_i_3, 24);
-            output_i_4 = _mm_srai_epi32(output_i_4, 24);
-            
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            L_code_acc = _mm_add_ps (L_code_acc, output_ps_1);
            L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
-            L_code_acc = _mm_add_ps (L_code_acc, output_ps_3);
-            L_code_acc = _mm_add_ps (L_code_acc, output_ps_4);

            input_ptr += 8;
            carrier_ptr += 8;
@@ -545,6 +343,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_

 #ifdef LV_HAVE_SSE4_1
 #include "smmintrin.h"
+#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
+#include "CommonMacros/CommonMacros.h"
 /*!
 \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
 \param input The input signal input
@@ -565,8 +365,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse4_1(lv_32fc_t* E
    __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
    
    __m128 E_code_acc, P_code_acc, L_code_acc;
-    __m128i output_i_1, output_i_2, output_i_3, output_i_4;
-    __m128 output_ps_1, output_ps_2, output_ps_3, output_ps_4;
+    __m128i input_i_1, input_i_2, output_i32;
+    __m128 output_ps_1, output_ps_2;
    
    const lv_8sc_t* input_ptr = input;
    const lv_8sc_t* carrier_ptr = carrier;
@@ -596,126 +396,34 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse4_1(lv_32fc_t* E
            x = _mm_load_si128((__m128i*)input_ptr);
            y = _mm_load_si128((__m128i*)carrier_ptr);
            
-            imagx = _mm_srli_si128 (x, 1);
-            imagx = _mm_and_si128 (imagx, mult1);
-            realx = _mm_and_si128 (x, mult1);
+            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
+            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (realx, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
-            
-            real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+            CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
            
            //Get early values
            y = _mm_load_si128((__m128i*)E_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_blendv_epi8 (imag_output, real_output, mult1);
-            
-            output_i_1 = _mm_cvtepi8_epi32(output);
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output = _mm_srli_si128 (output, 4);
-            output_i_2 = _mm_cvtepi8_epi32(output);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output = _mm_srli_si128 (output, 4);
-            output_i_3 = _mm_cvtepi8_epi32(output);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output = _mm_srli_si128 (output, 4);
-            output_i_4 = _mm_cvtepi8_epi32(output);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            E_code_acc = _mm_add_ps (E_code_acc, output_ps_1);
            E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
-            E_code_acc = _mm_add_ps (E_code_acc, output_ps_3);
-            E_code_acc = _mm_add_ps (E_code_acc, output_ps_4);
            
            //Get prompt values
            y = _mm_load_si128((__m128i*)P_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_blendv_epi8 (imag_output, real_output, mult1);
-            
-            output_i_1 = _mm_cvtepi8_epi32(output);
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output = _mm_srli_si128 (output, 4);
-            output_i_2 = _mm_cvtepi8_epi32(output);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output = _mm_srli_si128 (output, 4);
-            output_i_3 = _mm_cvtepi8_epi32(output);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output = _mm_srli_si128 (output, 4);
-            output_i_4 = _mm_cvtepi8_epi32(output);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            P_code_acc = _mm_add_ps (P_code_acc, output_ps_1);
            P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
-            P_code_acc = _mm_add_ps (P_code_acc, output_ps_3);
-            P_code_acc = _mm_add_ps (P_code_acc, output_ps_4);
            
            //Get late values
            y = _mm_load_si128((__m128i*)L_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_blendv_epi8 (imag_output, real_output, mult1);
-            
-            output_i_1 = _mm_cvtepi8_epi32(output);
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output = _mm_srli_si128 (output, 4);
-            output_i_2 = _mm_cvtepi8_epi32(output);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output = _mm_srli_si128 (output, 4);
-            output_i_3 = _mm_cvtepi8_epi32(output);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output = _mm_srli_si128 (output, 4);
-            output_i_4 = _mm_cvtepi8_epi32(output);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            L_code_acc = _mm_add_ps (L_code_acc, output_ps_1);
            L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
-            L_code_acc = _mm_add_ps (L_code_acc, output_ps_3);
-            L_code_acc = _mm_add_ps (L_code_acc, output_ps_4);
            
            input_ptr += 8;
            carrier_ptr += 8;
@@ -755,6 +463,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse4_1(lv_32fc_t* E

 #ifdef LV_HAVE_SSE2
 #include "emmintrin.h"
+#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
+#include "CommonMacros/CommonMacros.h"
 /*!
 \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
 \param input The input signal input
@@ -775,8 +485,8 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse2(lv_32fc_t* E_o
    __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
    
    __m128 E_code_acc, P_code_acc, L_code_acc;
-    __m128i output_i_aux_1, output_i_aux_2, output_i_1, output_i_2, output_i_3, output_i_4;
-    __m128 output_ps_1, output_ps_2, output_ps_3, output_ps_4;
+    __m128i input_i_1, input_i_2, output_i32;
+    __m128 output_ps_1, output_ps_2;
    
    const lv_8sc_t* input_ptr = input;
    const lv_8sc_t* carrier_ptr = carrier;
@@ -806,153 +516,40 @@ static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse2(lv_32fc_t* E_o
            x = _mm_load_si128((__m128i*)input_ptr);
            y = _mm_load_si128((__m128i*)carrier_ptr);
            
-            imagx = _mm_srli_si128 (x, 1);
-            imagx = _mm_and_si128 (imagx, mult1);
-            realx = _mm_and_si128 (x, mult1);
+            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
+            CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (realx, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
-            
-            real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+            CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
            
            //Get early values
            y = _mm_load_si128((__m128i*)E_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            real_output = _mm_and_si128 (real_output, mult1);
-            imag_output = _mm_and_si128 (imag_output, mult1);
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_or_si128 (real_output, imag_output);
-            
-            output_i_aux_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), output);
-            output_i_aux_2 = _mm_unpackhi_epi8(_mm_setzero_si128(), output);
-            output_i_1 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_2 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_3 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_2);
-            output_i_4 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_2);
-            
-            output_i_1 = _mm_srai_epi32(output_i_1, 24);
-            output_i_2 = _mm_srai_epi32(output_i_2, 24);
-            output_i_3 = _mm_srai_epi32(output_i_3, 24);
-            output_i_4 = _mm_srai_epi32(output_i_4, 24);
-            
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            E_code_acc = _mm_add_ps (E_code_acc, output_ps_1);
            E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
-            E_code_acc = _mm_add_ps (E_code_acc, output_ps_3);
-            E_code_acc = _mm_add_ps (E_code_acc, output_ps_4);
            
            //Get prompt values
            y = _mm_load_si128((__m128i*)P_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            real_output = _mm_and_si128 (real_output, mult1);
-            imag_output = _mm_and_si128 (imag_output, mult1);
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_or_si128 (real_output, imag_output);
-            
-            output_i_aux_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), output);
-            output_i_aux_2 = _mm_unpackhi_epi8(_mm_setzero_si128(), output);
-            output_i_1 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_2 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_3 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_2);
-            output_i_4 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_2);
-            
-            output_i_1 = _mm_srai_epi32(output_i_1, 24);
-            output_i_2 = _mm_srai_epi32(output_i_2, 24);
-            output_i_3 = _mm_srai_epi32(output_i_3, 24);
-            output_i_4 = _mm_srai_epi32(output_i_4, 24);
-            
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            P_code_acc = _mm_add_ps (P_code_acc, output_ps_1);
            P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
-            P_code_acc = _mm_add_ps (P_code_acc, output_ps_3);
-            P_code_acc = _mm_add_ps (P_code_acc, output_ps_4);
            
            //Get late values
            y = _mm_load_si128((__m128i*)L_code_ptr);
            
-            imagy = _mm_srli_si128 (y, 1);
-            imagy = _mm_and_si128 (imagy, mult1);
-            realy = _mm_and_si128 (y, mult1);
-            
-            realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-            imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-            realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-            imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-            
-            real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-            imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-            
-            real_output = _mm_and_si128 (real_output, mult1);
-            imag_output = _mm_and_si128 (imag_output, mult1);
-            imag_output = _mm_slli_si128 (imag_output, 1);
-            output = _mm_or_si128 (real_output, imag_output);
-            
-            output_i_aux_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), output);
-            output_i_aux_2 = _mm_unpackhi_epi8(_mm_setzero_si128(), output);
-            output_i_1 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_2 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_1);
-            output_i_3 = _mm_unpacklo_epi16(_mm_setzero_si128(), output_i_aux_2);
-            output_i_4 = _mm_unpackhi_epi16(_mm_setzero_si128(), output_i_aux_2);
-            
-            output_i_1 = _mm_srai_epi32(output_i_1, 24);
-            output_i_2 = _mm_srai_epi32(output_i_2, 24);
-            output_i_3 = _mm_srai_epi32(output_i_3, 24);
-            output_i_4 = _mm_srai_epi32(output_i_4, 24);
-            
-            output_ps_1 = _mm_cvtepi32_ps(output_i_1);
-            output_ps_2 = _mm_cvtepi32_ps(output_i_2);
-            output_ps_3 = _mm_cvtepi32_ps(output_i_3);
-            output_ps_4 = _mm_cvtepi32_ps(output_i_4);
+            CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
            
            L_code_acc = _mm_add_ps (L_code_acc, output_ps_1);
            L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
-            L_code_acc = _mm_add_ps (L_code_acc, output_ps_3);
-            L_code_acc = _mm_add_ps (L_code_acc, output_ps_4);
-
+            
            input_ptr += 8;
            carrier_ptr += 8;
            E_code_ptr += 8;
-            L_code_ptr += 8;
            P_code_ptr += 8;
+            L_code_ptr += 8;
        }
        
        __VOLK_ATTR_ALIGNED(16) lv_32fc_t E_dotProductVector[2];
--- a/src/algorithms/tracking/gnuradio_blocks/galileo_e1_dll_pll_veml_tracking_cc.cc
+++ b/src/algorithms/tracking/gnuradio_blocks/galileo_e1_dll_pll_veml_tracking_cc.cc
@@ -376,6 +376,8 @@ int galileo_e1_dll_pll_veml_tracking_cc::general_work (int noutput_items,gr_vect
                    d_Very_Late,
                    is_unaligned());
            
+            volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5(d_Very_Early, d_Early, d_Prompt, d_Late, d_Very_Late, in, d_carr_sign, d_very_early_code, d_early_code, d_prompt_code, d_late_code, d_very_late_code, d_current_prn_length_samples);
+            
            volk_gnsssdr_32fc_convert_16ic(d_very_early_code16, d_very_early_code, d_current_prn_length_samples);
            volk_gnsssdr_32fc_convert_16ic(d_early_code16, d_early_code, d_current_prn_length_samples);
            volk_gnsssdr_32fc_convert_16ic(d_prompt_code16, d_prompt_code, d_current_prn_length_samples);
@@ -386,7 +388,6 @@ int galileo_e1_dll_pll_veml_tracking_cc::general_work (int noutput_items,gr_vect
            
            volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5(d_Very_Early, d_Early, d_Prompt, d_Late, d_Very_Late, in16, d_carr_sign16, d_very_early_code16, d_early_code16, d_prompt_code16, d_late_code16, d_very_late_code16, d_current_prn_length_samples);
            
-            volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5(d_Very_Early, d_Early, d_Prompt, d_Late, d_Very_Late, in, d_carr_sign, d_very_early_code, d_early_code, d_prompt_code, d_late_code, d_very_late_code, d_current_prn_length_samples);
            
            // ################## PLL ##########################################################
            // PLL discriminator