diff --git a/CMakeLists.txt b/CMakeLists.txt
index def234f87..5263bf35a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -353,6 +353,11 @@ endif()
################################################################################
# volk_gnsssdr module - GNSS-SDR's own VOLK library
################################################################################
+find_package(VolkGnssSdr)
+
+
+if(NOT VOLK_GNSSSDR_FOUND)
+message("+++++++HEllloooo")
ExternalProject_Add(volk_gnsssdr_module
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr_module
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr
@@ -377,7 +382,7 @@ set(VOLK_GNSSSDR_LIBRARIES volk_gnsssdr)
add_custom_command(TARGET volk_gnsssdr_module POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr_module/build/apps/volk_gnsssdr_profile
${CMAKE_SOURCE_DIR}/install/volk_gnsssdr_profile)
-
+endif(NOT VOLK_GNSSSDR_FOUND)
################################################################################
diff --git a/cmake/Modules/FindVolkGnssSdr.cmake b/cmake/Modules/FindVolkGnssSdr.cmake
new file mode 100644
index 000000000..b5890966b
--- /dev/null
+++ b/cmake/Modules/FindVolkGnssSdr.cmake
@@ -0,0 +1,32 @@
+########################################################################
+# Find VOLK (Vector-Optimized Library of Kernels) GNSS-SDR library
+########################################################################
+
+INCLUDE(FindPkgConfig)
+PKG_CHECK_MODULES(PC_VOLK_GNSSSDR volk_gnsssdr)
+
+FIND_PATH(
+ VOLK_GNSSSDR_INCLUDE_DIRS
+ NAMES volk_gnsssdr/volk_gnsssdr.h
+ HINTS $ENV{VOLK_GNSSSDR_DIR}/include
+ ${PC_VOLK_GNSSSDR_INCLUDEDIR}
+ PATHS /usr/local/include
+ /usr/include
+ ${GNURADIO_INSTALL_PREFIX}/include
+)
+
+FIND_LIBRARY(
+ VOLK_GNSSSDR_LIBRARIES
+ NAMES volk_gnsssdr
+ HINTS $ENV{VOLK_GNSSSDR_DIR}/lib
+ ${PC_VOLK_GNSSSDR_LIBDIR}
+ PATHS /usr/local/lib
+ /usr/local/lib64
+ /usr/lib
+ /usr/lib64
+ ${GNURADIO_INSTALL_PREFIX}/lib
+)
+
+INCLUDE(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK_GNSSSDR DEFAULT_MSG VOLK_GNSSSDR_LIBRARIES VOLK_GNSSSDR_INCLUDE_DIRS)
+MARK_AS_ADVANCED(VOLK_GNSSSDR_LIBRARIES VOLK_GNSSSDR_INCLUDE_DIRS)
\ No newline at end of file
diff --git a/src/algorithms/libs/volk_gnsssdr_module/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr_module/CMakeLists.txt
deleted file mode 100644
index 0ea86676c..000000000
--- a/src/algorithms/libs/volk_gnsssdr_module/CMakeLists.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (C) 2012-2014 (see AUTHORS file for a list of contributors)
-#
-# This file is part of GNSS-SDR.
-#
-# GNSS-SDR is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# GNSS-SDR is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNSS-SDR. If not, see .
-#
-
-###############################################################################
-# Volk_gnsssdr module
-#In order to use volk_gnsssr module it is necessary to add:
-# 1) include_directories(..${VOLK_GNSSSDR_INCLUDE_DIRS}..)
-# 2) target_link_libraries(..${VOLK_GNSSSDR_LIBRARIES}..)
-###############################################################################
-
-#message(STATUS " START OF: Setup volk_gnsssdr as a subproject.")
-
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11")
-#set (CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr/install" CACHE PATH "Install prefix for volk_gnsssdr" FORCE )
-
-add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr)
-
-set(VOLK_GNSSSDR_INCLUDE_DIRS
- ${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr/include
- ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr/include
- CACHE INTERNAL ""
-)
-
-set(VOLK_GNSSSDR_LIBRARIES
- volk_gnsssdr
- CACHE INTERNAL ""
-)
-
-#message(STATUS " * INCLUDES: ${VOLK_GNSSSDR_INCLUDE_DIRS} ")
-#message(STATUS " * LIBS: ${VOLK_GNSSSDR_LIBRARIES} ")
-#message(STATUS " END OF: Setup volk_gnsssdr as a subproject.")
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/apps/volk_gnsssdr_profile.cc b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/apps/volk_gnsssdr_profile.cc
index c433acc19..9d810498d 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/apps/volk_gnsssdr_profile.cc
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/apps/volk_gnsssdr_profile.cc
@@ -32,7 +32,8 @@
namespace fs = boost::filesystem;
-void write_json(std::ofstream &json_file, std::vector results) {
+void write_json(std::ofstream &json_file, std::vector results)
+{
json_file << "{" << std::endl;
json_file << " \"volk_tests\": [" << std::endl;
size_t len = results.size();
@@ -43,31 +44,34 @@ void write_json(std::ofstream &json_file, std::vector tpair;
- BOOST_FOREACH(tpair pair, result.results) {
+ BOOST_FOREACH(tpair pair, result.results)
+ {
volk_gnsssdr_test_time_t time = pair.second;
json_file << " \"" << time.name << "\": {" << std::endl;
json_file << " \"name\": \"" << time.name << "\"," << std::endl;
json_file << " \"time\": " << time.time << "," << std::endl;
json_file << " \"units\": \"" << time.units << "\"" << std::endl;
json_file << " }" ;
- if(ri+1 != results_len) {
- json_file << ",";
- }
+ if(ri+1 != results_len)
+ {
+ json_file << ",";
+ }
json_file << std::endl;
ri++;
}
json_file << " }" << std::endl;
json_file << " }";
- if(i+1 != len) {
- json_file << ",";
- }
+ if(i+1 != len)
+ {
+ json_file << ",";
+ }
json_file << std::endl;
i++;
}
@@ -75,63 +79,67 @@ void write_json(std::ofstream &json_file, std::vector()->default_value( false )
- ->implicit_value( true ),
- "Run all kernels (benchmark mode)")
- ("tests-regex,R",
- boost::program_options::value(),
- "Run tests matching regular expression.")
- ("json,j",
- boost::program_options::value(),
- "JSON output file")
- ;
-
+ ("help,h", "Print help messages")
+ ("benchmark,b",
+ boost::program_options::value()->default_value( false )
+ ->implicit_value( true ),
+ "Run all kernels (benchmark mode)")
+ ("tests-regex,R",
+ boost::program_options::value(),
+ "Run tests matching regular expression.")
+ ("json,j",
+ boost::program_options::value(),
+ "JSON output file")
+ ;
+
// Handle the options that were given
boost::program_options::variables_map vm;
bool benchmark_mode;
std::string kernel_regex;
bool store_results = true;
std::ofstream json_file;
-
+
try {
- boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
- boost::program_options::notify(vm);
- benchmark_mode = vm.count("benchmark")?vm["benchmark"].as():false;
- if ( vm.count("tests-regex" ) ) {
- kernel_regex = vm["tests-regex"].as();
- store_results = false;
- std::cout << "Warning: using a regexp will not save results to a config" << std::endl;
- }
- else {
- kernel_regex = ".*";
- store_results = true;
- }
- } catch (boost::program_options::error& error) {
- std::cerr << "Error: " << error.what() << std::endl << std::endl;
- std::cerr << desc << std::endl;
- return 1;
+ boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
+ boost::program_options::notify(vm);
+ benchmark_mode = vm.count("benchmark")?vm["benchmark"].as():false;
+ if ( vm.count("tests-regex" ) )
+ {
+ kernel_regex = vm["tests-regex"].as();
+ store_results = false;
+ std::cout << "Warning: using a regexp will not save results to a config" << std::endl;
+ }
+ else
+ {
+ kernel_regex = ".*";
+ store_results = true;
+ }
+ } catch (boost::program_options::error& error)
+ {
+ std::cerr << "Error: " << error.what() << std::endl << std::endl;
+ std::cerr << desc << std::endl;
+ return 1;
}
/** --help option
*/
if ( vm.count("help") )
- {
- std::cout << "The GNSS-SDR VOLK profiler." << std::endl
- << desc << std::endl;
- return 0;
- }
-
+ {
+ std::cout << "The GNSS-SDR VOLK profiler." << std::endl
+ << desc << std::endl;
+ return 0;
+ }
+
if ( vm.count("json") )
- {
- json_file.open( vm["json"].as().c_str() );
- }
-
-
+ {
+ json_file.open( vm["json"].as().c_str() );
+ }
+
+
// Run tests
std::vector results;
@@ -147,78 +155,82 @@ int main(int argc, char *argv[]) {
//VOLK_PROFILE(volk_gnsssdr_32u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_64u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, lv_32fc_t(1.0, 0.5), 204602, 1000, &results, benchmark_mode, kernel_regex);
-
+
//GNSS-SDR PROTO-KERNELS
//lv_32fc_t sfv = lv_cmake((float)1, (float)2);
//example: VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, sfv, 204602, 1000, &results, benchmark_mode, kernel_regex);
-
- //CAN NOT BE TESTED YET BECAUSE VOLK MODULE DOES NOT SUPPORT IT:
- //VOLK_PROFILE(volk_gnsssdr_s32f_x2_update_local_carrier_32fc, 1e-4, 0, 16007, 1, &results, benchmark_mode, kernel_regex);
- //VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc, 1e-4, 0, 7, 1, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_TEST_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-
- VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-
- VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-
- VOLK_PROFILE(volk_gnsssdr_32fc_convert_16ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_32fc_convert_8ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_32fc_s32f_convert_8ic, 1e-4, 5, 16000, 250, &results, benchmark_mode, kernel_regex);
-
- VOLK_PROFILE(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8i_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8i_max_s8i, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
- VOLK_PROFILE(volk_gnsssdr_64f_accumulator_64f, 1e-4, 0, 16000, 1000, &results, benchmark_mode, kernel_regex);
-
+ //CAN NOT BE TESTED YET BECAUSE VOLK MODULE DOES NOT SUPPORT IT:
+ //VOLK_PROFILE(volk_gnsssdr_s32f_x2_update_local_carrier_32fc, 1e-4, 0, 16007, 1, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc, 1e-4, 0, 7, 1, &results, benchmark_mode, kernel_regex);
+
+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_TEST_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+
+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+
+ VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+
+ VOLK_PROFILE(volk_gnsssdr_32fc_convert_16ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_convert_8ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_s32f_convert_8ic, 1e-4, 5, 16000, 250, &results, benchmark_mode, kernel_regex);
+
+ VOLK_PROFILE(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8i_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8i_max_s8i, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_64f_accumulator_64f, 1e-4, 0, 16000, 1000, &results, benchmark_mode, kernel_regex);
+
// Until we can update the config on a kernel by kernel basis
// do not overwrite volk_config when using a regex.
- if(store_results) {
- char path[1024];
- volk_gnsssdr_get_config_path(path);
-
- const fs::path config_path(path);
-
- if (not fs::exists(config_path.branch_path()))
+ if(store_results)
{
- std::cout << "Creating " << config_path.branch_path() << "..." << std::endl;
- fs::create_directories(config_path.branch_path());
+ char path[1024];
+ volk_gnsssdr_get_config_path(path);
+
+ const fs::path config_path(path);
+
+ if (not fs::exists(config_path.branch_path()))
+ {
+ std::cout << "Creating " << config_path.branch_path() << "..." << std::endl;
+ fs::create_directories(config_path.branch_path());
+ }
+
+ std::cout << "Writing " << config_path << "..." << std::endl;
+ std::ofstream config(config_path.string().c_str());
+ if(!config.is_open())
+ { //either we don't have write access or we don't have the dir yet
+ std::cout << "Error opening file " << config_path << std::endl;
+ }
+
+ config << "\
+ #this file is generated by volk_profile.\n\
+ #the function name is followed by the preferred architecture.\n\
+ ";
+
+ BOOST_FOREACH(volk_gnsssdr_test_results_t result, results)
+ {
+ config << result.config_name << " "
+ << result.best_arch_a << " "
+ << result.best_arch_u << std::endl;
+ }
+ config.close();
}
-
- std::cout << "Writing " << config_path << "..." << std::endl;
- std::ofstream config(config_path.string().c_str());
- if(!config.is_open()) { //either we don't have write access or we don't have the dir yet
- std::cout << "Error opening file " << config_path << std::endl;
+ else
+ {
+ std::cout << "Warning: config not generated" << std::endl;
}
-
- config << "\
- #this file is generated by volk_profile.\n\
- #the function name is followed by the preferred architecture.\n\
- ";
-
- BOOST_FOREACH(volk_gnsssdr_test_results_t result, results) {
- config << result.config_name << " "
- << result.best_arch_a << " "
- << result.best_arch_u << std::endl;
- }
- config.close();
- }
- else {
- std::cout << "Warning: config not generated" << std::endl;
- }
}
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_arch_defs.py b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_arch_defs.py
index 3c75e1374..422c84935 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_arch_defs.py
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_arch_defs.py
@@ -1,18 +1,21 @@
+#!/usr/bin/env python
#
-# Copyright 2012 Free Software Foundation, Inc.
+# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
-# This program is free software: you can redistribute it and/or modify
+# This file is part of GNSS-SDR.
+#
+# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
-# This program is distributed in the hope that it will be useful,
+# GNSS-SDR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
+# along with GNSS-SDR. If not, see .
#
archs = list()
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py
index c16d9792b..15675ae97 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_machine_defs.py b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_machine_defs.py
index 7d81513fd..8058e12e5 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_machine_defs.py
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/gen/volk_gnsssdr_machine_defs.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_malloc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_malloc.h
index 33b2f5772..a05ccb184 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_malloc.h
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_malloc.h
@@ -20,8 +20,9 @@
#ifndef INCLUDED_VOLK_MALLOC_H
#define INCLUDED_VOLK_MALLOC_H
-#include
#include
+#include "volk_gnsssdr/volk_gnsssdr_common.h"
+
__VOLK_DECL_BEGIN
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/CMakeLists.txt
index d334c3d4b..ef3a3963c 100644
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/CMakeLists.txt
+++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/CMakeLists.txt
@@ -529,7 +529,7 @@ file(GLOB CommonMacros ${CMAKE_SOURCE_DIR}/kernels/CommonMacros/*.h ${CMAKE_SOUR
if(ENABLE_STATIC_LIBS)
add_library(volk_gnsssdr STATIC ${volk_gnsssdr_sources} ${h_files} ${CommonMacros} ${orc})
else(ENABLE_STATIC_LIBS)
- add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources})
+ add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources} ${h_files} ${CommonMacros} ${orc})
endif(ENABLE_STATIC_LIBS)
source_group("Kernels" FILES ${h_files})
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/patches for generating volk_gnsssdr/2014-10-17_Patch.patch b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/patches for generating volk_gnsssdr/2014-10-17_Patch.patch
deleted file mode 100644
index 88bb4fd35..000000000
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/patches for generating volk_gnsssdr/2014-10-17_Patch.patch
+++ /dev/null
@@ -1,329 +0,0 @@
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/lib/CMakeLists.txt /Users/andres/Desktop/volk_gnsssdr_original/lib/CMakeLists.txt
---- /Users/andres/Desktop/volk_gnsssdr/lib/CMakeLists.txt 2014-10-17 04:26:38.000000000 +0200
-+++ /Users/andres/Desktop/volk_gnsssdr_original/lib/CMakeLists.txt 2014-10-17 04:17:37.000000000 +0200
-@@ -517,7 +517,19 @@ if(MSVC)
- endif()
-
- #create the volk_gnsssdr runtime library
--add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources})
-+
-+#MODIFICATIONS BY GNSS-SDR
-+file(GLOB orc ${CMAKE_SOURCE_DIR}/orc/*.orc)
-+file(GLOB CommonMacros ${CMAKE_SOURCE_DIR}/kernels/CommonMacros/*.h ${CMAKE_SOURCE_DIR}/kernels/CommonMacros/README.txt)
-+
-+#add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources})
-+add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources} ${h_files} ${CommonMacros} ${orc})
-+
-+source_group("Kernels" FILES ${h_files})
-+source_group("Common Macros" FILES ${CommonMacros})
-+source_group("ORC Files" FILES ${orc})
-+#END OF MODIFICATIONS
-+
- target_link_libraries(volk_gnsssdr ${volk_gnsssdr_libraries})
- set_target_properties(volk_gnsssdr PROPERTIES SOVERSION ${LIBVER})
- set_target_properties(volk_gnsssdr PROPERTIES DEFINE_SYMBOL "volk_gnsssdr_EXPORTS")
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/lib/qa_utils.cc /Users/andres/Desktop/volk_gnsssdr_original/lib/qa_utils.cc
---- /Users/andres/Desktop/volk_gnsssdr/lib/qa_utils.cc 2014-10-17 04:26:39.000000000 +0200
-+++ /Users/andres/Desktop/volk_gnsssdr_original/lib/qa_utils.cc 2014-10-17 04:21:03.000000000 +0200
-@@ -217,6 +217,72 @@ inline void run_cast_test3_s32fc(volk_gn
- while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
- }
-
-+//ADDED BY GNSS-SDR. START
-+inline void run_cast_test1_s8i(volk_gnsssdr_fn_1arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test2_s8i(volk_gnsssdr_fn_2arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test3_s8i(volk_gnsssdr_fn_3arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test1_s8ic(volk_gnsssdr_fn_1arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test2_s8ic(volk_gnsssdr_fn_2arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test3_s8ic(volk_gnsssdr_fn_3arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test8(volk_gnsssdr_fn_8arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test8_s8i(volk_gnsssdr_fn_8arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test8_s8ic(volk_gnsssdr_fn_8arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test8_s32f(volk_gnsssdr_fn_8arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test8_s32fc(volk_gnsssdr_fn_8arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test12(volk_gnsssdr_fn_12arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test12_s8i(volk_gnsssdr_fn_12arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test12_s8ic(volk_gnsssdr_fn_12arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test12_s32f(volk_gnsssdr_fn_12arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
-+}
-+
-+inline void run_cast_test12_s32fc(volk_gnsssdr_fn_12arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-+ while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
-+}
-+//ADDED BY GNSS-SDR. END
-+
- // This function is a nop that helps resolve GNU Radio bugs 582 and 583.
- // Without this the cast in run_volk_gnsssdr_tests for tol_i = static_cast(float tol)
- // won't happen on armhf (reported on cortex A9 and A15).
-@@ -426,7 +492,17 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr
- } else {
- run_cast_test1_s32f((volk_gnsssdr_fn_1arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
- }
-- } else throw "unsupported 1 arg function >1 scalars";
-+ }
-+ //ADDED BY GNSS-SDR. START
-+ else if(inputsc.size() == 1 && !inputsc[0].is_float) {
-+ if(inputsc[0].is_complex) {
-+ run_cast_test1_s8ic((volk_gnsssdr_fn_1arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
-+ } else {
-+ run_cast_test1_s8i((volk_gnsssdr_fn_1arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
-+ }
-+ }
-+ //ADDED BY GNSS-SDR. END
-+ else throw "unsupported 1 arg function >1 scalars";
- break;
- case 2:
- if(inputsc.size() == 0) {
-@@ -437,7 +513,17 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr
- } else {
- run_cast_test2_s32f((volk_gnsssdr_fn_2arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
- }
-- } else throw "unsupported 2 arg function >1 scalars";
-+ }
-+ //ADDED BY GNSS-SDR. START
-+ else if(inputsc.size() == 1 && !inputsc[0].is_float) {
-+ if(inputsc[0].is_complex) {
-+ run_cast_test2_s8ic((volk_gnsssdr_fn_2arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
-+ } else {
-+ run_cast_test2_s8i((volk_gnsssdr_fn_2arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
-+ }
-+ }
-+ //ADDED BY GNSS-SDR. END
-+ else throw "unsupported 2 arg function >1 scalars";
- break;
- case 3:
- if(inputsc.size() == 0) {
-@@ -448,11 +534,61 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr
- } else {
- run_cast_test3_s32f((volk_gnsssdr_fn_3arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
- }
-- } else throw "unsupported 3 arg function >1 scalars";
-+ }
-+ //ADDED BY GNSS-SDR. START
-+ else if(inputsc.size() == 1 && !inputsc[0].is_float) {
-+ if(inputsc[0].is_complex) {
-+ run_cast_test3_s8ic((volk_gnsssdr_fn_3arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
-+ } else {
-+ run_cast_test3_s8i((volk_gnsssdr_fn_3arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
-+ }
-+ }
-+ //ADDED BY GNSS-SDR. END
-+ else throw "unsupported 3 arg function >1 scalars";
- break;
- case 4:
- run_cast_test4((volk_gnsssdr_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
- break;
-+ //ADDED BY GNSS-SDR. START
-+ case 8:
-+ if(inputsc.size() == 0) {
-+ run_cast_test8((volk_gnsssdr_fn_8arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
-+ } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-+ if(inputsc[0].is_complex) {
-+ run_cast_test8_s32fc((volk_gnsssdr_fn_8arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
-+ } else {
-+ run_cast_test8_s32f((volk_gnsssdr_fn_8arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
-+ }
-+ }
-+ else if(inputsc.size() == 1 && !inputsc[0].is_float) {
-+ if(inputsc[0].is_complex) {
-+ run_cast_test8_s8ic((volk_gnsssdr_fn_8arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
-+ } else {
-+ run_cast_test8_s8i((volk_gnsssdr_fn_8arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
-+ }
-+ }
-+ else throw "unsupported 8 arg function >1 scalars";
-+ break;
-+ case 12:
-+ if(inputsc.size() == 0) {
-+ run_cast_test12((volk_gnsssdr_fn_12arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
-+ } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-+ if(inputsc[0].is_complex) {
-+ run_cast_test12_s32fc((volk_gnsssdr_fn_12arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
-+ } else {
-+ run_cast_test12_s32f((volk_gnsssdr_fn_12arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
-+ }
-+ }
-+ else if(inputsc.size() == 1 && !inputsc[0].is_float) {
-+ if(inputsc[0].is_complex) {
-+ run_cast_test12_s8ic((volk_gnsssdr_fn_12arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
-+ } else {
-+ run_cast_test12_s8i((volk_gnsssdr_fn_12arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
-+ }
-+ }
-+ else throw "unsupported 12 arg function >1 scalars";
-+ break;
-+ //ADDED BY GNSS-SDR. END
- default:
- throw "no function handler for this signature";
- break;
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/lib/qa_utils.h /Users/andres/Desktop/volk_gnsssdr_original/lib/qa_utils.h
---- /Users/andres/Desktop/volk_gnsssdr/lib/qa_utils.h 2014-10-17 04:26:39.000000000 +0200
-+++ /Users/andres/Desktop/volk_gnsssdr_original/lib/qa_utils.h 2014-10-17 04:21:51.000000000 +0200
-@@ -77,4 +77,26 @@ typedef void (*volk_gnsssdr_fn_1arg_s32f
- typedef void (*volk_gnsssdr_fn_2arg_s32fc)(void *, void *, lv_32fc_t, unsigned int, const char*);
- typedef void (*volk_gnsssdr_fn_3arg_s32fc)(void *, void *, void *, lv_32fc_t, unsigned int, const char*);
-
-+//ADDED BY GNSS-SDR. START
-+typedef void (*volk_gnsssdr_fn_1arg_s8i)(void *, char, unsigned int, const char*); //one input vector, one scalar char input
-+typedef void (*volk_gnsssdr_fn_2arg_s8i)(void *, void *, char, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_3arg_s8i)(void *, void *, void *, char, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_1arg_s8ic)(void *, lv_8sc_t, unsigned int, const char*); //one input vector, one scalar lv_8sc_t vector input
-+typedef void (*volk_gnsssdr_fn_2arg_s8ic)(void *, void *, lv_8sc_t, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_3arg_s8ic)(void *, void *, void *, lv_8sc_t, unsigned int, const char*);
-+
-+typedef void (*volk_gnsssdr_fn_8arg)(void *, void *, void *, void *, void *, void *, void *, void *, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_8arg_s32f)(void *, void *, void *, void *, void *, void *, void *, void *, float, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_8arg_s32fc)(void *, void *, void *, void *, void *, void *, void *, void *, lv_32fc_t, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_8arg_s8i)(void *, void *, void *, void *, void *, void *, void *, void *, char, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_8arg_s8ic)(void *, void *, void *, void *, void *, void *, void *, void *, lv_8sc_t, unsigned int, const char*);
-+
-+typedef void (*volk_gnsssdr_fn_12arg)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_12arg_s32f)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, float, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_12arg_s32fc)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, lv_32fc_t, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_12arg_s8i)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, char, unsigned int, const char*);
-+typedef void (*volk_gnsssdr_fn_12arg_s8ic)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, lv_8sc_t, unsigned int, const char*);
-+//ADDED BY GNSS-SDR. END
-+
-+
- #endif //VOLK_QA_UTILS_H
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.h /Users/andres/Desktop/volk_gnsssdr_original/tmpl/volk_gnsssdr.tmpl.h
---- /Users/andres/Desktop/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.h 2014-10-17 04:26:39.000000000 +0200
-+++ /Users/andres/Desktop/volk_gnsssdr_original/tmpl/volk_gnsssdr.tmpl.h 2014-10-17 04:23:30.000000000 +0200
-@@ -19,8 +19,8 @@
- * Boston, MA 02110-1301, USA.
- */
-
--#ifndef INCLUDED_VOLK_RUNTIME
--#define INCLUDED_VOLK_RUNTIME
-+#ifndef INCLUDED_VOLK_GNSSSDR_RUNTIME
-+#define INCLUDED_VOLK_GNSSSDR_RUNTIME
-
- #include
- #include
-@@ -91,4 +91,4 @@ extern VOLK_API volk_gnsssdr_func_desc_t
-
- __VOLK_DECL_END
-
--#endif /*INCLUDED_VOLK_RUNTIME*/
-+#endif /*INCLUDED_VOLK_GNSSSDR_RUNTIME*/
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/tmpl/volk_gnsssdr_config_fixed.tmpl.h /Users/andres/Desktop/volk_gnsssdr_original/tmpl/volk_gnsssdr_config_fixed.tmpl.h
---- /Users/andres/Desktop/volk_gnsssdr/tmpl/volk_gnsssdr_config_fixed.tmpl.h 2014-10-17 04:26:39.000000000 +0200
-+++ /Users/andres/Desktop/volk_gnsssdr_original/tmpl/volk_gnsssdr_config_fixed.tmpl.h 2014-10-17 04:22:58.000000000 +0200
-@@ -19,11 +19,11 @@
- * Boston, MA 02110-1301, USA.
- */
-
--#ifndef INCLUDED_VOLK_CONFIG_FIXED_H
--#define INCLUDED_VOLK_CONFIG_FIXED_H
-+#ifndef INCLUDED_VOLK_GNSSSDR_CONFIG_FIXED_H
-+#define INCLUDED_VOLK_GNSSSDR_CONFIG_FIXED_H
-
- #for $i, $arch in enumerate($archs)
- #define LV_$(arch.name.upper()) $i
- #end for
-
--#endif /*INCLUDED_VOLK_CONFIG_FIXED*/
-+#endif /*INCLUDED_VOLK_GNSSSDR_CONFIG_FIXED*/
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/tmpl/volk_gnsssdr_cpu.tmpl.h /Users/andres/Desktop/volk_gnsssdr_original/tmpl/volk_gnsssdr_cpu.tmpl.h
---- /Users/andres/Desktop/volk_gnsssdr/tmpl/volk_gnsssdr_cpu.tmpl.h 2014-10-17 04:26:39.000000000 +0200
-+++ /Users/andres/Desktop/volk_gnsssdr_original/tmpl/volk_gnsssdr_cpu.tmpl.h 2014-10-17 04:23:07.000000000 +0200
-@@ -19,8 +19,8 @@
- * Boston, MA 02110-1301, USA.
- */
-
--#ifndef INCLUDED_VOLK_CPU_H
--#define INCLUDED_VOLK_CPU_H
-+#ifndef INCLUDED_VOLK_GNSSSDR_CPU_H
-+#define INCLUDED_VOLK_GNSSSDR_CPU_H
-
- #include
-
-@@ -39,4 +39,4 @@ unsigned int volk_gnsssdr_get_lvarch ();
-
- __VOLK_DECL_END
-
--#endif /*INCLUDED_VOLK_CPU_H*/
-+#endif /*INCLUDED_VOLK_GNSSSDR_CPU_H*/
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/tmpl/volk_gnsssdr_machines.tmpl.h /Users/andres/Desktop/volk_gnsssdr_original/tmpl/volk_gnsssdr_machines.tmpl.h
---- /Users/andres/Desktop/volk_gnsssdr/tmpl/volk_gnsssdr_machines.tmpl.h 2014-10-17 04:26:39.000000000 +0200
-+++ /Users/andres/Desktop/volk_gnsssdr_original/tmpl/volk_gnsssdr_machines.tmpl.h 2014-10-17 04:23:16.000000000 +0200
-@@ -19,8 +19,8 @@
- * Boston, MA 02110-1301, USA.
- */
-
--#ifndef INCLUDED_LIBVOLK_MACHINES_H
--#define INCLUDED_LIBVOLK_MACHINES_H
-+#ifndef INCLUDED_LIBVOLK_GNSSSDR_MACHINES_H
-+#define INCLUDED_LIBVOLK_GNSSSDR_MACHINES_H
-
- #include
- #include
-@@ -52,4 +52,4 @@ extern struct volk_gnsssdr_machine volk_
-
- __VOLK_DECL_END
-
--#endif //INCLUDED_LIBVOLK_MACHINES_H
-+#endif //INCLUDED_LIBVOLK_GNSSSDR_MACHINES_H
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/tmpl/volk_gnsssdr_typedefs.tmpl.h /Users/andres/Desktop/volk_gnsssdr_original/tmpl/volk_gnsssdr_typedefs.tmpl.h
---- /Users/andres/Desktop/volk_gnsssdr/tmpl/volk_gnsssdr_typedefs.tmpl.h 2014-10-17 04:26:39.000000000 +0200
-+++ /Users/andres/Desktop/volk_gnsssdr_original/tmpl/volk_gnsssdr_typedefs.tmpl.h 2014-10-17 04:23:23.000000000 +0200
-@@ -19,8 +19,8 @@
- * Boston, MA 02110-1301, USA.
- */
-
--#ifndef INCLUDED_VOLK_TYPEDEFS
--#define INCLUDED_VOLK_TYPEDEFS
-+#ifndef INCLUDED_VOLK_GNSSSDR_TYPEDEFS
-+#define INCLUDED_VOLK_GNSSSDR_TYPEDEFS
-
- #include
- #include
-@@ -29,4 +29,4 @@
- typedef void (*$(kern.pname))($kern.arglist_types);
- #end for
-
--#endif /*INCLUDED_VOLK_TYPEDEFS*/
-+#endif /*INCLUDED_VOLK_GNSSSDR_TYPEDEFS*/
diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/patches for generating volk_gnsssdr/2014-10-17_Patch_with_protokernels.patch b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/patches for generating volk_gnsssdr/2014-10-17_Patch_with_protokernels.patch
deleted file mode 100644
index 82bb1f5ac..000000000
--- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/patches for generating volk_gnsssdr/2014-10-17_Patch_with_protokernels.patch
+++ /dev/null
@@ -1,57094 +0,0 @@
-Binary files /Users/andres/Desktop/volk_gnsssdr/.DS_Store and /Users/andres/Desktop/volk_gnsssdr_original/.DS_Store differ
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/apps/volk_gnsssdr_profile.cc /Users/andres/Desktop/volk_gnsssdr_original/apps/volk_gnsssdr_profile.cc
---- /Users/andres/Desktop/volk_gnsssdr/apps/volk_gnsssdr_profile.cc 2014-10-17 05:07:25.000000000 +0200
-+++ /Users/andres/Desktop/volk_gnsssdr_original/apps/volk_gnsssdr_profile.cc 2014-10-17 05:01:21.000000000 +0200
-@@ -39,7 +39,7 @@ namespace fs = boost::filesystem;
-
- void write_json(std::ofstream &json_file, std::vector results) {
- json_file << "{" << std::endl;
-- json_file << " \"volk_gnsssdr_tests\": [" << std::endl;
-+ json_file << " \"volk_tests\": [" << std::endl;
- size_t len = results.size();
- size_t i = 0;
- BOOST_FOREACH(volk_gnsssdr_test_results_t &result, results) {
-@@ -48,9 +48,9 @@ void write_json(std::ofstream &json_file
- json_file << " \"vlen\": " << result.vlen << "," << std::endl;
- json_file << " \"iter\": " << result.iter << "," << std::endl;
- json_file << " \"best_arch_a\": \"" << result.best_arch_a
-- << "\"," << std::endl;
-+ << "\"," << std::endl;
- json_file << " \"best_arch_u\": \"" << result.best_arch_u
-- << "\"," << std::endl;
-+ << "\"," << std::endl;
- json_file << " \"results\": {" << std::endl;
- size_t results_len = result.results.size();
- size_t ri = 0;
-@@ -84,26 +84,26 @@ int main(int argc, char *argv[]) {
- // Adding program options
- boost::program_options::options_description desc("Options");
- desc.add_options()
-- ("help,h", "Print help messages")
-- ("benchmark,b",
-- boost::program_options::value()->default_value( false )
-- ->implicit_value( true ),
-- "Run all kernels (benchmark mode)")
-- ("tests-regex,R",
-- boost::program_options::value(),
-- "Run tests matching regular expression.")
-- ("json,j",
-- boost::program_options::value(),
-- "JSON output file")
-- ;
--
-+ ("help,h", "Print help messages")
-+ ("benchmark,b",
-+ boost::program_options::value()->default_value( false )
-+ ->implicit_value( true ),
-+ "Run all kernels (benchmark mode)")
-+ ("tests-regex,R",
-+ boost::program_options::value(),
-+ "Run tests matching regular expression.")
-+ ("json,j",
-+ boost::program_options::value(),
-+ "JSON output file")
-+ ;
-+
- // Handle the options that were given
- boost::program_options::variables_map vm;
- bool benchmark_mode;
- std::string kernel_regex;
- bool store_results = true;
- std::ofstream json_file;
--
-+
- try {
- boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
- boost::program_options::notify(vm);
-@@ -123,20 +123,20 @@ int main(int argc, char *argv[]) {
- return 1;
- }
- /** --help option
--*/
-+ */
- if ( vm.count("help") )
- {
-- std::cout << "The VOLK profiler." << std::endl
-- << desc << std::endl;
-- return 0;
-+ std::cout << "The VOLK profiler." << std::endl
-+ << desc << std::endl;
-+ return 0;
- }
--
-+
- if ( vm.count("json") )
- {
- json_file.open( vm["json"].as().c_str() );
- }
--
--
-+
-+
- // Run tests
- std::vector results;
-
-@@ -152,36 +152,84 @@ int main(int argc, char *argv[]) {
- //VOLK_PROFILE(volk_gnsssdr_32u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
- //VOLK_PROFILE(volk_gnsssdr_64u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
- //VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, lv_32fc_t(1.0, 0.5), 204602, 1000, &results, benchmark_mode, kernel_regex);
--
-+
-+ //GNSS-SDR PROTO-KERNELS
-+ //lv_32fc_t sfv = lv_cmake((float)1, (float)2);
-+ //example: VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, sfv, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+
-+ //CAN NOT BE TESTED YET BECAUSE VOLK MODULE DOES NOT SUPPORT IT:
-+ //VOLK_PROFILE(volk_gnsssdr_s32f_x2_update_local_carrier_32fc, 1e-4, 0, 16007, 1, &results, benchmark_mode, kernel_regex);
-+ //VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc, 1e-4, 0, 7, 1, &results, benchmark_mode, kernel_regex);
-+
-+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_TEST_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+
-+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+
-+ VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+
-+ VOLK_PROFILE(volk_gnsssdr_32fc_convert_16ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32fc_convert_8ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32fc_s32f_convert_8ic, 1e-4, 5, 16000, 250, &results, benchmark_mode, kernel_regex);
-+
-+ /*VOLK_PROFILE(volk_gnsssdr_32f_accumulator_s32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32f_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8i_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8i_max_s8i, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32f_x2_add_32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32fc_conjugate_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32fc_magnitude_squared_32f, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32fc_x2_dot_prod_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32fc_x2_multiply_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_64f_accumulator_64f, 1e-4, 0, 16000, 1000, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_32f_s32f_convert_16i, 1e-4, 1, 204602, 250, &results, benchmark_mode, kernel_regex);
-+ VOLK_PROFILE(volk_gnsssdr_16i_s32f_convert_32f, 1e-4, 1, 204602, 250, &results, benchmark_mode, kernel_regex);*/
-+
- // Until we can update the config on a kernel by kernel basis
-- // do not overwrite volk_gnsssdr_config when using a regex.
-+ // do not overwrite volk_config when using a regex.
- if(store_results) {
- char path[1024];
- volk_gnsssdr_get_config_path(path);
--
-+
- const fs::path config_path(path);
--
-+
- if (not fs::exists(config_path.branch_path()))
- {
- std::cout << "Creating " << config_path.branch_path() << "..." << std::endl;
- fs::create_directories(config_path.branch_path());
- }
--
-+
- std::cout << "Writing " << config_path << "..." << std::endl;
- std::ofstream config(config_path.string().c_str());
- if(!config.is_open()) { //either we don't have write access or we don't have the dir yet
- std::cout << "Error opening file " << config_path << std::endl;
- }
--
-+
- config << "\
--#this file is generated by volk_gnsssdr_profile.\n\
--#the function name is followed by the preferred architecture.\n\
--";
--
-+ #this file is generated by volk_profile.\n\
-+ #the function name is followed by the preferred architecture.\n\
-+ ";
-+
- BOOST_FOREACH(volk_gnsssdr_test_results_t result, results) {
- config << result.config_name << " "
-- << result.best_arch_a << " "
-- << result.best_arch_u << std::endl;
-+ << result.best_arch_a << " "
-+ << result.best_arch_u << std::endl;
- }
- config.close();
- }
-Binary files /Users/andres/Desktop/volk_gnsssdr/kernels/.DS_Store and /Users/andres/Desktop/volk_gnsssdr_original/kernels/.DS_Store differ
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/CommonMacros/CommonMacros.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/CommonMacros/CommonMacros.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/CommonMacros/CommonMacros.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/CommonMacros/CommonMacros.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,174 @@
-+/*!
-+ * \file CommonMacros.h
-+ * \brief Common macros used inside the volk protokernels.
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+#ifndef INCLUDED_gnsssdr_CommonMacros_u_H
-+#define INCLUDED_gnsssdr_CommonMacros_u_H
-+
-+ #ifdef LV_HAVE_SSE4_1
-+ /*!
-+ \brief Macros for U_SSE4_1
-+ */
-+
-+ #ifndef CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1
-+ #define CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(input1, input2, real, imag)\
-+ imag = _mm_srli_si128 (input1, 2);\
-+ imag = _mm_blend_epi16 (input2, imag, 85);\
-+ real = _mm_slli_si128 (input2, 2);\
-+ real = _mm_blend_epi16 (real, input1, 85);
-+ #endif /* CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1 */
-+
-+ #ifndef CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1
-+ #define CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_ps)\
-+ input_i_1 = _mm_cvtepi16_epi32(input);\
-+ input = _mm_srli_si128 (input, 8);\
-+ input_i_2 = _mm_cvtepi16_epi32(input);\
-+ output_i32 = _mm_add_epi32 (input_i_1, input_i_2);\
-+ output_ps = _mm_cvtepi32_ps(output_i32);
-+ #endif /* CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */
-+
-+ #ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1
-+ #define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)\
-+ input_i_1 = _mm_cvtepi8_epi32(input);\
-+ input = _mm_srli_si128 (input, 4);\
-+ input_i_2 = _mm_cvtepi8_epi32(input);\
-+ input = _mm_srli_si128 (input, 4);\
-+ output_i32_1 = _mm_add_epi32 (input_i_1, input_i_2);\
-+ input_i_1 = _mm_cvtepi8_epi32(input);\
-+ input = _mm_srli_si128 (input, 4);\
-+ input_i_2 = _mm_cvtepi8_epi32(input);\
-+ input = _mm_srli_si128 (input, 4);\
-+ output_i32_2 = _mm_add_epi32 (input_i_1, input_i_2);\
-+ output_i32 = _mm_add_epi32 (output_i32_1, output_i32_2);\
-+ output_ps = _mm_cvtepi32_ps(output_i32);
-+ #endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */
-+
-+ #endif /* LV_HAVE_SSE4_1 */
-+
-+ #ifdef LV_HAVE_SSE2
-+ /*!
-+ \brief Macros for U_SSE2
-+ */
-+
-+ #ifdef LV_HAVE_SSSE3
-+ /*!
-+ \brief Macros for U_SSSE3
-+ */
-+
-+ #ifndef CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3
-+ #define CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, x, check_sign_sequence, rearrange_sequence, y_aux, x_abs, real_output, imag_output)\
-+ y_aux = _mm_sign_epi8 (y, x);\
-+ y_aux = _mm_sign_epi8 (y_aux, check_sign_sequence);\
-+ real_output = _mm_maddubs_epi16 (x_abs, y_aux);\
-+ \
-+ y_aux = _mm_shuffle_epi8 (y, rearrange_sequence);\
-+ y_aux = _mm_sign_epi8 (y_aux, x);\
-+ imag_output = _mm_maddubs_epi16 (x_abs, y_aux);
-+ #endif /* CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3 */
-+
-+ #endif /* LV_HAVE_SSSE3 */
-+
-+ #ifndef CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2
-+ #define CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
-+ realx_mult_realy = _mm_mullo_epi16 (realx, realy);\
-+ imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);\
-+ realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);\
-+ imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);\
-+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);\
-+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+ #endif /* CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2 */
-+
-+ #ifndef CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2
-+ #define CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(input, mult1, real, imag)\
-+ imag = _mm_srli_si128 (input, 1);\
-+ imag = _mm_and_si128 (imag, mult1);\
-+ real = _mm_and_si128 (input, mult1);
-+ #endif /* CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2 */
-+
-+ #ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2
-+ #define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2(input, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\
-+ input_i_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), input);\
-+ input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\
-+ input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\
-+ input_i_1 = _mm_srai_epi32(input_i_1, 24);\
-+ input_i_2 = _mm_srai_epi32(input_i_2, 24);\
-+ output_i32 = _mm_add_epi32(input_i_1, input_i_2);\
-+ output_ps_1 = _mm_cvtepi32_ps(output_i32);\
-+ \
-+ input_i_1 = _mm_unpackhi_epi8(_mm_setzero_si128(), input);\
-+ input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\
-+ input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\
-+ input_i_1 = _mm_srai_epi32(input_i_1, 24);\
-+ input_i_2 = _mm_srai_epi32(input_i_2, 24);\
-+ output_i32 = _mm_add_epi32(input_i_1, input_i_2);\
-+ output_ps_2 = _mm_cvtepi32_ps(output_i32);
-+ #endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2 */
-+
-+ #ifndef CM_8IC_CONTROLMINUS128_8IC_U_SSE2
-+ #define CM_8IC_CONTROLMINUS128_8IC_U_SSE2(y, minus128, minus128control)\
-+ minus128control = _mm_cmpeq_epi8 (y, minus128);\
-+ y = _mm_sub_epi8 (y, minus128control);
-+ #endif /* CM_8IC_CONTROLMINUS128_8IC_U_SSE2 */
-+
-+ #endif /* LV_HAVE_SSE2 */
-+
-+ #ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Macros for U_GENERIC
-+ */
-+
-+ #endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_CommonMacros_u_H */
-+
-+
-+#ifndef INCLUDED_gnsssdr_CommonMacros_a_H
-+#define INCLUDED_gnsssdr_CommonMacros_a_H
-+
-+ #ifdef LV_HAVE_SSE4_1
-+ /*!
-+ \brief Macros for A_SSE4_1
-+ */
-+
-+ #endif /* LV_HAVE_SSE4_1 */
-+
-+ #ifdef LV_HAVE_SSE2
-+ /*!
-+ \brief Macros for U_SSE2
-+ */
-+
-+ #endif /* LV_HAVE_SSE2 */
-+
-+ #ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Macros for A_GENERIC
-+ */
-+
-+ #endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_CommonMacros_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,76 @@
-+/*!
-+ * \file CommonMacros_16ic_cw_corr_32fc.h
-+ * \brief Common macros used inside the 16ic_cw_corr_32fc volk protokernels.
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+#ifndef INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_u_H
-+#define INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_u_H
-+#include "CommonMacros/CommonMacros.h"
-+
-+ #ifdef LV_HAVE_SSE4_1
-+ /*!
-+ \brief Macros for U_SSE4_1
-+ */
-+
-+ #ifndef CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1
-+ #define CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)\
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)\
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)\
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
-+ #endif /* CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1 */
-+
-+ #endif /* LV_HAVE_SSE4_1 */
-+
-+ #ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Macros for U_GENERIC
-+ */
-+
-+ #endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_u_H */
-+
-+
-+#ifndef INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_a_H
-+#define INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_a_H
-+
-+ #ifdef LV_HAVE_SSE4_1
-+ /*!
-+ \brief Macros for A_SSE4_1
-+ */
-+
-+ #endif /* LV_HAVE_SSE4_1 */
-+
-+ #ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Macros for A_GENERIC
-+ */
-+
-+ #endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,114 @@
-+/*!
-+ * \file CommonMacros_8ic_cw_corr_32fc.h
-+ * \brief Common macros used inside the 8ic_cw_corr_32fc volk protokernels.
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+#ifndef INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H
-+#define INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H
-+#include "CommonMacros/CommonMacros.h"
-+
-+ #ifdef LV_HAVE_SSE4_1
-+ /*!
-+ \brief Macros for U_SSE4_1
-+ */
-+
-+ #ifndef CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1
-+ #define CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)\
-+ CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)\
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
-+ \
-+ imag_output = _mm_slli_si128 (imag_output, 1);\
-+ output = _mm_blendv_epi8 (imag_output, real_output, mult1);\
-+ \
-+ CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
-+ #endif /* CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1 */
-+
-+ #ifndef CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1
-+ #define CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)\
-+ CM_8IC_CONTROLMINUS128_8IC_U_SSE2(y, minus128, minus128control)\
-+ CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output)\
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)\
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
-+ #endif /* CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1 */
-+
-+ #ifndef CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1
-+ #define CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)\
-+ CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output)\
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)\
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
-+ #endif /* CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1 */
-+
-+ #endif /* LV_HAVE_SSE4_1 */
-+
-+ #ifdef LV_HAVE_SSE2
-+ /*!
-+ \brief Macros for U_SSE2
-+ */
-+
-+ #ifndef CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2
-+ #define CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\
-+ CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)\
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
-+ \
-+ real_output = _mm_and_si128 (real_output, mult1);\
-+ imag_output = _mm_and_si128 (imag_output, mult1);\
-+ imag_output = _mm_slli_si128 (imag_output, 1);\
-+ output = _mm_or_si128 (real_output, imag_output);\
-+ \
-+ CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2(output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
-+ #endif /* CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2 */
-+
-+ #endif /* LV_HAVE_SSE2 */
-+
-+ #ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Macros for U_GENERIC
-+ */
-+
-+ #endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H */
-+
-+
-+#ifndef INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H
-+#define INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H
-+
-+ #ifdef LV_HAVE_SSE4_1
-+ /*!
-+ \brief Macros for A_SSE4_1
-+ */
-+
-+ #endif /* LV_HAVE_SSE4_1 */
-+
-+ #ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Macros for A_GENERIC
-+ */
-+
-+ #endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/CommonMacros/README.txt /Users/andres/Desktop/volk_gnsssdr_original/kernels/CommonMacros/README.txt
---- /Users/andres/Desktop/volk_gnsssdr/kernels/CommonMacros/README.txt 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/CommonMacros/README.txt 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,34 @@
-+####################################################################
-+Common Macros inside volk_gnsssdr module
-+####################################################################
-+
-+First of all, sorry for making you need to read this: macros are evil, they can not be debugged, you do not know where the errors come from, syntax is annoying.. BUT this is the only way I found that allows to share one piece of code between various proto-kernels without performance penalties.
-+Inline functions have been tested, and they introduce a really small time penalty, but it becomes huge because of long loops, with thousands of samples.
-+
-+####################################################################
-+Syntax
-+####################################################################
-+
-+In order to allow better understanding of the code I created the macros with an specific syntax.
-+
-+1) Inside CommonMacros.h you will find macros for common operations. I will explain the syntax with an example:
-+
-+example: CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
-+
-+First of all, you find the characters “CM”, which means CommonMacros. After that the type and the amount of inputs is placed: “_16IC_X4” (16 bits complex integers, four inputs). The syntax for type is the same as the one used with volk protokernels, refer to GNURadio documentation for more help. The it comes the name of the macro (“_SCALAR_PRODUCT”), and after that the type and the amount of outputs (“_16IC_X2”). Finally it is placed the SSE minimum version needed to run (“_U_SSE2”). In the arguments you will find (from left to right) the inputs (four inputs: realx, imagx, realy, imagy), some variables that the macro needs to work (realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy) and finally the outputs (two outputs: real_output, imag_output).
-+The variables that the macro needs are specified when calling it in order to avoid after-compile problems: if you want to use a macro you will need to declare all the variables it needs before, or you will not be able to compile.
-+
-+2) Inside all the other headers, CommonMacros_XXXXXX.h you will find macros for a specific group of proto-kernels. The syntax is the same as the CommonMacros.h
-+
-+####################################################################
-+Workflow
-+####################################################################
-+
-+In order to use the macros easily, I usually test the code without macros inside a testing proto-kernel, where you are able to test it, debug it and use breakpoints.
-+When it works I place code inside a macro an I test it again.
-+
-+####################################################################
-+Why macros
-+####################################################################
-+1) They are the only way I could find for sharing code between proto-kernels without performance penalty.
-+2) It is true that they are really difficult to debug, but if you work with them responsibly it is not so hard. Volk_gnsssdr checks all the SSE proto-kernels implementations results against the generic implementation results, so if your macro is not working you will appreciate it after profiling it.
-\ No newline at end of file
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_s32f_convert_32f.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_16i_s32f_convert_32f.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_s32f_convert_32f.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_16i_s32f_convert_32f.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,241 @@
-+#ifndef INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H
-+#define INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H
-+
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include
-+
-+ /*!
-+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
-+ \param inputVector The 16 bit input data buffer
-+ \param outputVector The floating point output data buffer
-+ \param scalar The value divided against each point in the output buffer
-+ \param num_points The number of data values to be converted
-+ \note Output buffer does NOT need to be properly aligned
-+ */
-+static inline void volk_gnsssdr_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int eighthPoints = num_points / 8;
-+
-+ float* outputVectorPtr = outputVector;
-+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
-+ int16_t* inputPtr = (int16_t*)inputVector;
-+ __m128i inputVal;
-+ __m128i inputVal2;
-+ __m128 ret;
-+
-+ for(;number < eighthPoints; number++){
-+
-+ // Load the 8 values
-+ inputVal = _mm_loadu_si128((__m128i*)inputPtr);
-+
-+ // Shift the input data to the right by 64 bits ( 8 bytes )
-+ inputVal2 = _mm_srli_si128(inputVal, 8);
-+
-+ // Convert the lower 4 values into 32 bit words
-+ inputVal = _mm_cvtepi16_epi32(inputVal);
-+ inputVal2 = _mm_cvtepi16_epi32(inputVal2);
-+
-+ ret = _mm_cvtepi32_ps(inputVal);
-+ ret = _mm_mul_ps(ret, invScalar);
-+ _mm_storeu_ps(outputVectorPtr, ret);
-+ outputVectorPtr += 4;
-+
-+ ret = _mm_cvtepi32_ps(inputVal2);
-+ ret = _mm_mul_ps(ret, invScalar);
-+ _mm_storeu_ps(outputVectorPtr, ret);
-+
-+ outputVectorPtr += 4;
-+
-+ inputPtr += 8;
-+ }
-+
-+ number = eighthPoints * 8;
-+ for(; number < num_points; number++){
-+ outputVector[number] =((float)(inputVector[number])) / scalar;
-+ }
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+
-+ /*!
-+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
-+ \param inputVector The 16 bit input data buffer
-+ \param outputVector The floating point output data buffer
-+ \param scalar The value divided against each point in the output buffer
-+ \param num_points The number of data values to be converted
-+ \note Output buffer does NOT need to be properly aligned
-+ */
-+static inline void volk_gnsssdr_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ float* outputVectorPtr = outputVector;
-+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
-+ int16_t* inputPtr = (int16_t*)inputVector;
-+ __m128 ret;
-+
-+ for(;number < quarterPoints; number++){
-+ ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
-+
-+ ret = _mm_mul_ps(ret, invScalar);
-+ _mm_storeu_ps(outputVectorPtr, ret);
-+
-+ inputPtr += 4;
-+ outputVectorPtr += 4;
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(; number < num_points; number++){
-+ outputVector[number] = (float)(inputVector[number]) / scalar;
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
-+ \param inputVector The 16 bit input data buffer
-+ \param outputVector The floating point output data buffer
-+ \param scalar The value divided against each point in the output buffer
-+ \param num_points The number of data values to be converted
-+ \note Output buffer does NOT need to be properly aligned
-+ */
-+static inline void volk_gnsssdr_16i_s32f_convert_32f_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
-+ float* outputVectorPtr = outputVector;
-+ const int16_t* inputVectorPtr = inputVector;
-+ unsigned int number = 0;
-+
-+ for(number = 0; number < num_points; number++){
-+ *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H */
-+#ifndef INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H
-+#define INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H
-+
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include
-+
-+ /*!
-+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
-+ \param inputVector The 16 bit input data buffer
-+ \param outputVector The floating point output data buffer
-+ \param scalar The value divided against each point in the output buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int eighthPoints = num_points / 8;
-+
-+ float* outputVectorPtr = outputVector;
-+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
-+ int16_t* inputPtr = (int16_t*)inputVector;
-+ __m128i inputVal;
-+ __m128i inputVal2;
-+ __m128 ret;
-+
-+ for(;number < eighthPoints; number++){
-+
-+ // Load the 8 values
-+ inputVal = _mm_loadu_si128((__m128i*)inputPtr);
-+
-+ // Shift the input data to the right by 64 bits ( 8 bytes )
-+ inputVal2 = _mm_srli_si128(inputVal, 8);
-+
-+ // Convert the lower 4 values into 32 bit words
-+ inputVal = _mm_cvtepi16_epi32(inputVal);
-+ inputVal2 = _mm_cvtepi16_epi32(inputVal2);
-+
-+ ret = _mm_cvtepi32_ps(inputVal);
-+ ret = _mm_mul_ps(ret, invScalar);
-+ _mm_storeu_ps(outputVectorPtr, ret);
-+ outputVectorPtr += 4;
-+
-+ ret = _mm_cvtepi32_ps(inputVal2);
-+ ret = _mm_mul_ps(ret, invScalar);
-+ _mm_storeu_ps(outputVectorPtr, ret);
-+
-+ outputVectorPtr += 4;
-+
-+ inputPtr += 8;
-+ }
-+
-+ number = eighthPoints * 8;
-+ for(; number < num_points; number++){
-+ outputVector[number] =((float)(inputVector[number])) / scalar;
-+ }
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+
-+ /*!
-+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
-+ \param inputVector The 16 bit input data buffer
-+ \param outputVector The floating point output data buffer
-+ \param scalar The value divided against each point in the output buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ float* outputVectorPtr = outputVector;
-+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
-+ int16_t* inputPtr = (int16_t*)inputVector;
-+ __m128 ret;
-+
-+ for(;number < quarterPoints; number++){
-+ ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
-+
-+ ret = _mm_mul_ps(ret, invScalar);
-+ _mm_storeu_ps(outputVectorPtr, ret);
-+
-+ inputPtr += 4;
-+ outputVectorPtr += 4;
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(; number < num_points; number++){
-+ outputVector[number] = (float)(inputVector[number]) / scalar;
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
-+ \param inputVector The 16 bit input data buffer
-+ \param outputVector The floating point output data buffer
-+ \param scalar The value divided against each point in the output buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_16i_s32f_convert_32f_a_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
-+ float* outputVectorPtr = outputVector;
-+ const int16_t* inputVectorPtr = inputVector;
-+ unsigned int number = 0;
-+
-+ for(number = 0; number < num_points; number++){
-+ *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,461 @@
-+/*!
-+ * \file volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3.h
-+ * \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 32 bits vectors
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * Volk protokernel that performs the carrier wipe-off mixing and the
-+ * Early, Prompt, and Late correlation with 32 bits vectors (16 bits the
-+ * real part and 16 bits the imaginary part):
-+ * - The carrier wipe-off is done by multiplying the input signal by the
-+ * carrier (multiplication of 32 bits vectors) It returns the input
-+ * signal in base band (BB)
-+ * - Early values are calculated by multiplying the input signal in BB by the
-+ * early code (multiplication of 32 bits vectors), accumulating the results
-+ * - Prompt values are calculated by multiplying the input signal in BB by the
-+ * prompt code (multiplication of 32 bits vectors), accumulating the results
-+ * - Late values are calculated by multiplying the input signal in BB by the
-+ * late code (multiplication of 32 bits vectors), accumulating the results
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_H
-+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include "smmintrin.h"
-+#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
-+#include "CommonMacros/CommonMacros.h"
-+ /*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ const unsigned int sse_iters = num_points / 8;
-+
-+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
-+ __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
-+
-+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
-+ __m128i input_i_1, input_i_2, output_i32;
-+ __m128 real_output_ps, imag_output_ps;
-+
-+ float E_out_real = 0;
-+ float E_out_imag = 0;
-+ float P_out_real = 0;
-+ float P_out_imag = 0;
-+ float L_out_real = 0;
-+ float L_out_imag = 0;
-+
-+ const lv_16sc_t* input_ptr = input;
-+ const lv_16sc_t* carrier_ptr = carrier;
-+
-+ const lv_16sc_t* E_code_ptr = E_code;
-+ lv_32fc_t* E_out_ptr = E_out;
-+ const lv_16sc_t* L_code_ptr = L_code;
-+ lv_32fc_t* L_out_ptr = L_out;
-+ const lv_16sc_t* P_code_ptr = P_code;
-+ lv_32fc_t* P_out_ptr = P_out;
-+
-+ *E_out_ptr = 0;
-+ *P_out_ptr = 0;
-+ *L_out_ptr = 0;
-+
-+ mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
-+
-+ real_E_code_acc = _mm_setzero_ps();
-+ imag_E_code_acc = _mm_setzero_ps();
-+ real_P_code_acc = _mm_setzero_ps();
-+ imag_P_code_acc = _mm_setzero_ps();
-+ real_L_code_acc = _mm_setzero_ps();
-+ imag_L_code_acc = _mm_setzero_ps();
-+
-+ if (sse_iters>0)
-+ {
-+ for(int number = 0;number < sse_iters; number++){
-+
-+ //Perform the carrier wipe-off
-+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
-+ input_ptr += 4;
-+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
-+
-+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+ carrier_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
-+
-+ //Get early values
-+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+ E_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ //Adds the float 32 results
-+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
-+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
-+
-+ //Get prompt values
-+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+ P_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
-+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
-+
-+ //Get late values
-+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+ L_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
-+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
-+
-+ input_ptr += 4;
-+ carrier_ptr += 4;
-+ E_code_ptr += 4;
-+ P_code_ptr += 4;
-+ L_code_ptr += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
-+
-+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
-+
-+ for (int i = 0; i<4; ++i)
-+ {
-+ E_out_real += real_E_dotProductVector[i];
-+ E_out_imag += imag_E_dotProductVector[i];
-+ P_out_real += real_P_dotProductVector[i];
-+ P_out_imag += imag_P_dotProductVector[i];
-+ L_out_real += real_L_dotProductVector[i];
-+ L_out_imag += imag_L_dotProductVector[i];
-+ }
-+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
-+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
-+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
-+ }
-+
-+ lv_16sc_t bb_signal_sample;
-+ for(int i=0; i < num_points%8; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
-+ // Now get early, late, and prompt values for each
-+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
-+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
-+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
-+ }
-+
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ lv_16sc_t bb_signal_sample;
-+ lv_16sc_t tmp1;
-+ lv_16sc_t tmp2;
-+ lv_16sc_t tmp3;
-+
-+ bb_signal_sample = lv_cmake(0, 0);
-+
-+ *E_out = 0;
-+ *P_out = 0;
-+ *L_out = 0;
-+ // perform Early, Prompt and Late correlation
-+
-+ for(int i=0; i < num_points; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input[i] * carrier[i];
-+
-+ tmp1 = bb_signal_sample * E_code[i];
-+ tmp2 = bb_signal_sample * P_code[i];
-+ tmp3 = bb_signal_sample * L_code[i];
-+
-+ // Now get early, late, and prompt values for each
-+ *E_out += (lv_32fc_t)tmp1;
-+ *P_out += (lv_32fc_t)tmp2;
-+ *L_out += (lv_32fc_t)tmp3;
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_H */
-+
-+
-+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_H
-+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include "smmintrin.h"
-+#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
-+#include "CommonMacros/CommonMacros.h"
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ const unsigned int sse_iters = num_points / 8;
-+
-+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
-+ __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
-+
-+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
-+ __m128i input_i_1, input_i_2, output_i32;
-+ __m128 real_output_ps, imag_output_ps;
-+
-+ float E_out_real = 0;
-+ float E_out_imag = 0;
-+ float P_out_real = 0;
-+ float P_out_imag = 0;
-+ float L_out_real = 0;
-+ float L_out_imag = 0;
-+
-+ const lv_16sc_t* input_ptr = input;
-+ const lv_16sc_t* carrier_ptr = carrier;
-+
-+ const lv_16sc_t* E_code_ptr = E_code;
-+ lv_32fc_t* E_out_ptr = E_out;
-+ const lv_16sc_t* L_code_ptr = L_code;
-+ lv_32fc_t* L_out_ptr = L_out;
-+ const lv_16sc_t* P_code_ptr = P_code;
-+ lv_32fc_t* P_out_ptr = P_out;
-+
-+ *E_out_ptr = 0;
-+ *P_out_ptr = 0;
-+ *L_out_ptr = 0;
-+
-+ mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
-+
-+ real_E_code_acc = _mm_setzero_ps();
-+ imag_E_code_acc = _mm_setzero_ps();
-+ real_P_code_acc = _mm_setzero_ps();
-+ imag_P_code_acc = _mm_setzero_ps();
-+ real_L_code_acc = _mm_setzero_ps();
-+ imag_L_code_acc = _mm_setzero_ps();
-+
-+ if (sse_iters>0)
-+ {
-+ for(int number = 0;number < sse_iters; number++){
-+
-+ //Perform the carrier wipe-off
-+ x1 = _mm_load_si128((__m128i*)input_ptr);
-+ input_ptr += 4;
-+ x2 = _mm_load_si128((__m128i*)input_ptr);
-+
-+ y1 = _mm_load_si128((__m128i*)carrier_ptr);
-+ carrier_ptr += 4;
-+ y2 = _mm_load_si128((__m128i*)carrier_ptr);
-+
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
-+
-+ //Get early values
-+ y1 = _mm_load_si128((__m128i*)E_code_ptr);
-+ E_code_ptr += 4;
-+ y2 = _mm_load_si128((__m128i*)E_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ //Adds the float 32 results
-+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
-+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
-+
-+ //Get prompt values
-+ y1 = _mm_load_si128((__m128i*)P_code_ptr);
-+ P_code_ptr += 4;
-+ y2 = _mm_load_si128((__m128i*)P_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
-+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
-+
-+ //Get late values
-+ y1 = _mm_load_si128((__m128i*)L_code_ptr);
-+ L_code_ptr += 4;
-+ y2 = _mm_load_si128((__m128i*)L_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
-+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
-+
-+ input_ptr += 4;
-+ carrier_ptr += 4;
-+ E_code_ptr += 4;
-+ P_code_ptr += 4;
-+ L_code_ptr += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
-+
-+ _mm_store_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
-+
-+ for (int i = 0; i<4; ++i)
-+ {
-+ E_out_real += real_E_dotProductVector[i];
-+ E_out_imag += imag_E_dotProductVector[i];
-+ P_out_real += real_P_dotProductVector[i];
-+ P_out_imag += imag_P_dotProductVector[i];
-+ L_out_real += real_L_dotProductVector[i];
-+ L_out_imag += imag_L_dotProductVector[i];
-+ }
-+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
-+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
-+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
-+ }
-+
-+ lv_16sc_t bb_signal_sample;
-+ for(int i=0; i < num_points%8; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
-+ // Now get early, late, and prompt values for each
-+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
-+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
-+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
-+ }
-+
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ lv_16sc_t bb_signal_sample;
-+ lv_16sc_t tmp1;
-+ lv_16sc_t tmp2;
-+ lv_16sc_t tmp3;
-+
-+ bb_signal_sample = lv_cmake(0, 0);
-+
-+ *E_out = 0;
-+ *P_out = 0;
-+ *L_out = 0;
-+ // perform Early, Prompt and Late correlation
-+
-+ for(int i=0; i < num_points; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input[i] * carrier[i];
-+
-+ tmp1 = bb_signal_sample * E_code[i];
-+ tmp2 = bb_signal_sample * P_code[i];
-+ tmp3 = bb_signal_sample * L_code[i];
-+
-+ // Now get early, late, and prompt values for each
-+ *E_out += (lv_32fc_t)tmp1;
-+ *P_out += (lv_32fc_t)tmp2;
-+ *L_out += (lv_32fc_t)tmp3;
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,1568 @@
-+/*!
-+ * \file volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3.h
-+ * \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 32 bits vectors using different methods: inside u_sse4_1_first there is one method, inside u_sse4_1_second there is another... This protokernel has been created to test the performance of different methods.
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * Volk protokernel that performs the carrier wipe-off mixing and the
-+ * Early, Prompt, and Late correlation with 32 bits vectors (16 bits the
-+ * real part and 16 bits the imaginary part):
-+ * - The carrier wipe-off is done by multiplying the input signal by the
-+ * carrier (multiplication of 32 bits vectors) It returns the input
-+ * signal in base band (BB)
-+ * - Early values are calculated by multiplying the input signal in BB by the
-+ * early code (multiplication of 32 bits vectors), accumulating the results
-+ * - Prompt values are calculated by multiplying the input signal in BB by the
-+ * prompt code (multiplication of 32 bits vectors), accumulating the results
-+ * - Late values are calculated by multiplying the input signal in BB by the
-+ * late code (multiplication of 32 bits vectors), accumulating the results
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_H
-+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include "smmintrin.h"
-+ /*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_first(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ const unsigned int sse_iters = num_points / 4;
-+
-+ __m128i x, y, yaux, yl, yh, tmp1, tmp2, z, bb_signal_sample, bb_signal_sample_suffled;
-+
-+ __m128 z_ps_1, z_ps_2, z_E, z_P, z_L;
-+ __m128i z_i_1, z_i_2;
-+
-+ lv_32fc_t dotProduct_E;
-+ lv_32fc_t dotProduct_P;
-+ lv_32fc_t dotProduct_L;
-+
-+ z_E = _mm_setzero_ps();
-+ z_P = _mm_setzero_ps();
-+ z_L = _mm_setzero_ps();
-+
-+ const lv_16sc_t* _input = input;
-+ const lv_16sc_t* _carrier = carrier;
-+ const lv_16sc_t* _E_code = E_code;
-+ const lv_16sc_t* _P_code = P_code;
-+ const lv_16sc_t* _L_code = L_code;
-+
-+ if (sse_iters>0)
-+ {
-+ for(int number = 0;number < sse_iters; number++)
-+ {
-+ //Perform the carrier wipe-off
-+ x = _mm_lddqu_si128((__m128i*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_lddqu_si128((__m128i*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ // Load yl with cr,cr,dr,dr
-+ // Load yh with ci,ci,di,di
-+ yaux = _mm_shuffle_epi8 (y, _mm_set_epi8 (15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
-+ yl = _mm_unpacklo_epi16(yaux, yaux);
-+ yh = _mm_unpackhi_epi16(yaux, yaux);
-+
-+ tmp1 = _mm_mullo_epi16(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_epi8 (x, _mm_set_epi8 (13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mullo_epi16(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ tmp2 = _mm_mullo_epi16(tmp2,_mm_set_epi16 (1, -1, 1, -1, 1, -1, 1, -1));
-+ bb_signal_sample = _mm_add_epi16(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ bb_signal_sample_suffled = _mm_shuffle_epi8 (bb_signal_sample, _mm_set_epi8 (13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)); // Re-arrange bb_signal_sample to be ai,ar,bi,br
-+
-+ // correlation E,P,L (3x vector scalar product)
-+ // Early
-+ y = _mm_lddqu_si128((__m128i*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yaux = _mm_shuffle_epi8 (y, _mm_set_epi8 (15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
-+ yl = _mm_unpacklo_epi16(yaux, yaux);
-+ yh = _mm_unpackhi_epi16(yaux, yaux);
-+
-+ tmp1 = _mm_mullo_epi16(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ tmp2 = _mm_mullo_epi16(bb_signal_sample_suffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ tmp2 = _mm_mullo_epi16(tmp2,_mm_set_epi16 (1, -1, 1, -1, 1, -1, 1, -1));
-+ z = _mm_add_epi16(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ z_i_1 = _mm_cvtepi16_epi32(z);
-+ z_ps_1 = _mm_cvtepi32_ps(z_i_1);
-+ z = _mm_srli_si128 (z, 8);
-+ z_i_2 = _mm_cvtepi16_epi32(z);
-+ z_ps_2 = _mm_cvtepi32_ps(z_i_2);
-+
-+ z_E = _mm_add_ps(z_E, z_ps_1); // Add the complex multiplication results together
-+ z_E = _mm_add_ps(z_E, z_ps_2); // Add the complex multiplication results together
-+
-+ // Prompt
-+ y = _mm_lddqu_si128((__m128i*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yaux = _mm_shuffle_epi8 (y, _mm_set_epi8 (15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
-+ yl = _mm_unpacklo_epi16(yaux, yaux);
-+ yh = _mm_unpackhi_epi16(yaux, yaux);
-+
-+ tmp1 = _mm_mullo_epi16(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ tmp2 = _mm_mullo_epi16(bb_signal_sample_suffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ tmp2 = _mm_mullo_epi16(tmp2,_mm_set_epi16 (1, -1, 1, -1, 1, -1, 1, -1));
-+ z = _mm_add_epi16(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ z_i_1 = _mm_cvtepi16_epi32(z);
-+ z_ps_1 = _mm_cvtepi32_ps(z_i_1);
-+ z = _mm_srli_si128 (z, 8);
-+ z_i_2 = _mm_cvtepi16_epi32(z);
-+ z_ps_2 = _mm_cvtepi32_ps(z_i_2);
-+
-+ z_P = _mm_add_ps(z_P, z_ps_1); // Add the complex multiplication results together
-+ z_P = _mm_add_ps(z_P, z_ps_2); // Add the complex multiplication results together
-+
-+ // Late
-+ y = _mm_lddqu_si128((__m128i*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yaux = _mm_shuffle_epi8 (y, _mm_set_epi8 (15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
-+ yl = _mm_unpacklo_epi16(yaux, yaux);
-+ yh = _mm_unpackhi_epi16(yaux, yaux);
-+
-+ tmp1 = _mm_mullo_epi16(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ tmp2 = _mm_mullo_epi16(bb_signal_sample_suffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ tmp2 = _mm_mullo_epi16(tmp2,_mm_set_epi16 (1, -1, 1, -1, 1, -1, 1, -1));
-+ z = _mm_add_epi16(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ z_i_1 = _mm_cvtepi16_epi32(z);
-+ z_ps_1 = _mm_cvtepi32_ps(z_i_1);
-+ z = _mm_srli_si128 (z, 8);
-+ z_i_2 = _mm_cvtepi16_epi32(z);
-+ z_ps_2 = _mm_cvtepi32_ps(z_i_2);
-+
-+ z_L = _mm_add_ps(z_L, z_ps_1); // Add the complex multiplication results together
-+ z_L = _mm_add_ps(z_L, z_ps_2); // Add the complex multiplication results together
-+
-+ _input += 4;
-+ _carrier += 4;
-+ _E_code += 4;
-+ _L_code += 4;
-+ _P_code += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
-+
-+ _mm_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
-+
-+ dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] );
-+ dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] );
-+ dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] );
-+ }
-+
-+ for(int i=0; i < num_points%4; ++i)
-+ {
-+ dotProduct_E += (lv_32fc_t)((*_input) * (*_E_code++)*(*_carrier));
-+ dotProduct_P += (lv_32fc_t)((*_input) * (*_P_code++)*(*_carrier));
-+ dotProduct_L += (lv_32fc_t)((*_input++) * (*_L_code++)*(*_carrier++));
-+ }
-+
-+ *E_out = dotProduct_E;
-+ *P_out = dotProduct_P;
-+ *L_out = dotProduct_L;
-+
-+
-+
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include "smmintrin.h"
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_second(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ const unsigned int sse_iters = num_points / 8;
-+
-+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
-+ __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
-+
-+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
-+ __m128i real_output_i_1, real_output_i_2, imag_output_i_1, imag_output_i_2;
-+ __m128 real_output_ps_1, real_output_ps_2, imag_output_ps_1, imag_output_ps_2;
-+
-+ float E_out_real = 0;
-+ float E_out_imag = 0;
-+ float P_out_real = 0;
-+ float P_out_imag = 0;
-+ float L_out_real = 0;
-+ float L_out_imag = 0;
-+
-+ const lv_16sc_t* input_ptr = input;
-+ const lv_16sc_t* carrier_ptr = carrier;
-+
-+ const lv_16sc_t* E_code_ptr = E_code;
-+ lv_32fc_t* E_out_ptr = E_out;
-+ const lv_16sc_t* L_code_ptr = L_code;
-+ lv_32fc_t* L_out_ptr = L_out;
-+ const lv_16sc_t* P_code_ptr = P_code;
-+ lv_32fc_t* P_out_ptr = P_out;
-+
-+ *E_out_ptr = 0;
-+ *P_out_ptr = 0;
-+ *L_out_ptr = 0;
-+
-+ mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
-+
-+ real_E_code_acc = _mm_setzero_ps();
-+ imag_E_code_acc = _mm_setzero_ps();
-+ real_P_code_acc = _mm_setzero_ps();
-+ imag_P_code_acc = _mm_setzero_ps();
-+ real_L_code_acc = _mm_setzero_ps();
-+ imag_L_code_acc = _mm_setzero_ps();
-+
-+ if (sse_iters>0)
-+ {
-+ for(int number = 0;number < sse_iters; number++){
-+
-+ //Perform the carrier wipe-off
-+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
-+ input_ptr += 4;
-+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
-+
-+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+ carrier_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+
-+ imagx = _mm_srli_si128 (x1, 2);
-+ imagx = _mm_blend_epi16 (x2, imagx, 85);
-+ realx = _mm_slli_si128 (x2, 2);
-+ realx = _mm_blend_epi16 (realx, x1, 85);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (realx, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
-+
-+ real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ //Get early values
-+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+ E_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+
-+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+ real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
-+ real_output = _mm_srli_si128 (real_output, 8);
-+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+ real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
-+
-+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
-+ imag_output = _mm_srli_si128 (imag_output, 8);
-+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
-+
-+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps_1);
-+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps_2);
-+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps_1);
-+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps_2);
-+
-+ //Get prompt values
-+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+ P_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+
-+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+ real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
-+ real_output = _mm_srli_si128 (real_output, 8);
-+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+ real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
-+
-+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
-+ imag_output = _mm_srli_si128 (imag_output, 8);
-+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
-+
-+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps_1);
-+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps_2);
-+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps_1);
-+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps_2);
-+
-+ //Get late values
-+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+ L_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+
-+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+ real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
-+ real_output = _mm_srli_si128 (real_output, 8);
-+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+ real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
-+
-+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
-+ imag_output = _mm_srli_si128 (imag_output, 8);
-+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
-+
-+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps_1);
-+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps_2);
-+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps_1);
-+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps_2);
-+
-+ input_ptr += 4;
-+ carrier_ptr += 4;
-+ E_code_ptr += 4;
-+ L_code_ptr += 4;
-+ P_code_ptr += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
-+
-+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
-+
-+ for (int i = 0; i<4; ++i)
-+ {
-+ E_out_real += real_E_dotProductVector[i];
-+ E_out_imag += imag_E_dotProductVector[i];
-+ P_out_real += real_P_dotProductVector[i];
-+ P_out_imag += imag_P_dotProductVector[i];
-+ L_out_real += real_L_dotProductVector[i];
-+ L_out_imag += imag_L_dotProductVector[i];
-+ }
-+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
-+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
-+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
-+ }
-+
-+ lv_16sc_t bb_signal_sample;
-+ for(int i=0; i < num_points%8; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
-+ // Now get early, late, and prompt values for each
-+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
-+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
-+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
-+ }
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include "smmintrin.h"
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_third(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ const unsigned int sse_iters = num_points / 8;
-+ unsigned int index = 0;
-+ unsigned int indexPlus4 = 0;
-+
-+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
-+ __m128i realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, real_output_i32, imag_output_i32;
-+
-+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
-+ __m128i real_output_i_1, real_output_i_2, imag_output_i_1, imag_output_i_2;
-+ __m128 real_output_ps, imag_output_ps;
-+
-+ float E_out_real = 0;
-+ float E_out_imag = 0;
-+ float P_out_real = 0;
-+ float P_out_imag = 0;
-+ float L_out_real = 0;
-+ float L_out_imag = 0;
-+
-+ const lv_16sc_t* input_ptr = input;
-+ const lv_16sc_t* carrier_ptr = carrier;
-+
-+ const lv_16sc_t* E_code_ptr = E_code;
-+ lv_32fc_t* E_out_ptr = E_out;
-+ const lv_16sc_t* L_code_ptr = L_code;
-+ lv_32fc_t* L_out_ptr = L_out;
-+ const lv_16sc_t* P_code_ptr = P_code;
-+ lv_32fc_t* P_out_ptr = P_out;
-+
-+ *E_out_ptr = 0;
-+ *P_out_ptr = 0;
-+ *L_out_ptr = 0;
-+
-+ real_E_code_acc = _mm_setzero_ps();
-+ imag_E_code_acc = _mm_setzero_ps();
-+ real_P_code_acc = _mm_setzero_ps();
-+ imag_P_code_acc = _mm_setzero_ps();
-+ real_L_code_acc = _mm_setzero_ps();
-+ imag_L_code_acc = _mm_setzero_ps();
-+
-+ if (sse_iters>0)
-+ {
-+ for(index = 0;index < 8*sse_iters; index+=8){
-+ indexPlus4 = index + 4;
-+ //Perform the carrier wipe-off
-+ x1 = _mm_lddqu_si128((__m128i*)&input_ptr[index]);
-+ x2 = _mm_lddqu_si128((__m128i*)&input_ptr[indexPlus4]);
-+
-+ y1 = _mm_lddqu_si128((__m128i*)&carrier_ptr[index]);
-+ y2 = _mm_lddqu_si128((__m128i*)&carrier_ptr[indexPlus4]);
-+
-+ imagx = _mm_srli_si128 (x1, 2);
-+ imagx = _mm_blend_epi16 (x2, imagx, 85);
-+ realx = _mm_slli_si128 (x2, 2);
-+ realx = _mm_blend_epi16 (realx, x1, 85);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (realx, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
-+
-+ real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ //Get early values
-+ y1 = _mm_lddqu_si128((__m128i*)&E_code_ptr[index]);
-+ y2 = _mm_lddqu_si128((__m128i*)&E_code_ptr[indexPlus4]);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+
-+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+ real_output = _mm_srli_si128 (real_output, 8);
-+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
-+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
-+
-+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output = _mm_srli_si128 (imag_output, 8);
-+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
-+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
-+
-+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
-+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
-+
-+ //Get prompt values
-+ y1 = _mm_lddqu_si128((__m128i*)&P_code_ptr[index]);
-+ y2 = _mm_lddqu_si128((__m128i*)&P_code_ptr[indexPlus4]);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+
-+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+ real_output = _mm_srli_si128 (real_output, 8);
-+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
-+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
-+
-+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output = _mm_srli_si128 (imag_output, 8);
-+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
-+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
-+
-+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
-+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
-+
-+ //Get late values
-+ y1 = _mm_lddqu_si128((__m128i*)&L_code_ptr[index]);
-+ y2 = _mm_lddqu_si128((__m128i*)&L_code_ptr[indexPlus4]);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+
-+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+ real_output = _mm_srli_si128 (real_output, 8);
-+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
-+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
-+
-+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output = _mm_srli_si128 (imag_output, 8);
-+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
-+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
-+
-+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
-+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
-+
-+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
-+
-+ for (int i = 0; i<4; ++i)
-+ {
-+ E_out_real += real_E_dotProductVector[i];
-+ E_out_imag += imag_E_dotProductVector[i];
-+ P_out_real += real_P_dotProductVector[i];
-+ P_out_imag += imag_P_dotProductVector[i];
-+ L_out_real += real_L_dotProductVector[i];
-+ L_out_imag += imag_L_dotProductVector[i];
-+ }
-+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
-+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
-+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
-+ }
-+
-+ lv_16sc_t bb_signal_sample;
-+ for(; index < num_points; index++)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input_ptr[index] * carrier_ptr[index];
-+ // Now get early, late, and prompt values for each
-+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * E_code_ptr[index]);
-+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * P_code_ptr[index]);
-+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * L_code_ptr[index]);
-+ }
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include "smmintrin.h"
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_fourth(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ const unsigned int sse_iters = num_points / 8;
-+
-+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
-+ __m128i realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, real_output_i32, imag_output_i32;
-+
-+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
-+ __m128i real_output_i_1, real_output_i_2, imag_output_i_1, imag_output_i_2;
-+ __m128 real_output_ps, imag_output_ps;
-+
-+ float E_out_real = 0;
-+ float E_out_imag = 0;
-+ float P_out_real = 0;
-+ float P_out_imag = 0;
-+ float L_out_real = 0;
-+ float L_out_imag = 0;
-+
-+ const lv_16sc_t* input_ptr = input;
-+ const lv_16sc_t* carrier_ptr = carrier;
-+
-+ const lv_16sc_t* E_code_ptr = E_code;
-+ lv_32fc_t* E_out_ptr = E_out;
-+ const lv_16sc_t* L_code_ptr = L_code;
-+ lv_32fc_t* L_out_ptr = L_out;
-+ const lv_16sc_t* P_code_ptr = P_code;
-+ lv_32fc_t* P_out_ptr = P_out;
-+
-+ *E_out_ptr = 0;
-+ *P_out_ptr = 0;
-+ *L_out_ptr = 0;
-+
-+ real_E_code_acc = _mm_setzero_ps();
-+ imag_E_code_acc = _mm_setzero_ps();
-+ real_P_code_acc = _mm_setzero_ps();
-+ imag_P_code_acc = _mm_setzero_ps();
-+ real_L_code_acc = _mm_setzero_ps();
-+ imag_L_code_acc = _mm_setzero_ps();
-+
-+ if (sse_iters>0)
-+ {
-+ for(int number = 0;number < sse_iters; number++){
-+
-+ //Perform the carrier wipe-off
-+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
-+ input_ptr += 4;
-+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
-+
-+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+ carrier_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+
-+ imagx = _mm_srli_si128 (x1, 2);
-+ imagx = _mm_blend_epi16 (x2, imagx, 85);
-+ realx = _mm_slli_si128 (x2, 2);
-+ realx = _mm_blend_epi16 (realx, x1, 85);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (realx, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
-+
-+ real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ //Get early values
-+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+ E_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+
-+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+ real_output = _mm_srli_si128 (real_output, 8);
-+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
-+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
-+
-+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output = _mm_srli_si128 (imag_output, 8);
-+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
-+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
-+
-+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
-+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
-+
-+ //Get prompt values
-+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+ P_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+
-+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+ real_output = _mm_srli_si128 (real_output, 8);
-+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
-+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
-+
-+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output = _mm_srli_si128 (imag_output, 8);
-+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
-+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
-+
-+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
-+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
-+
-+ //Get late values
-+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+ L_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+
-+ imagy = _mm_srli_si128 (y1, 2);
-+ imagy = _mm_blend_epi16 (y2, imagy, 85);
-+ realy = _mm_slli_si128 (y2, 2);
-+ realy = _mm_blend_epi16 (realy, y1, 85);
-+
-+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+
-+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+
-+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+ real_output = _mm_srli_si128 (real_output, 8);
-+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
-+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
-+
-+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output = _mm_srli_si128 (imag_output, 8);
-+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
-+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
-+
-+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
-+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
-+
-+ input_ptr += 4;
-+ carrier_ptr += 4;
-+ E_code_ptr += 4;
-+ L_code_ptr += 4;
-+ P_code_ptr += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
-+
-+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
-+
-+ for (int i = 0; i<4; ++i)
-+ {
-+ E_out_real += real_E_dotProductVector[i];
-+ E_out_imag += imag_E_dotProductVector[i];
-+ P_out_real += real_P_dotProductVector[i];
-+ P_out_imag += imag_P_dotProductVector[i];
-+ L_out_real += real_L_dotProductVector[i];
-+ L_out_imag += imag_L_dotProductVector[i];
-+ }
-+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
-+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
-+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
-+ }
-+
-+ lv_16sc_t bb_signal_sample;
-+ for(int i=0; i < num_points%8; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
-+ // Now get early, late, and prompt values for each
-+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
-+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
-+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
-+ }
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include "smmintrin.h"
-+#include "CommonMacros/CommonMacros.h"
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_fifth(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ const unsigned int sse_iters = num_points / 8;
-+
-+ __m128i realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy;
-+ __m128i input_i_1, input_i_2, output_i32;
-+
-+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
-+ __m128i realx, imagx, realy, imagy, real_output, imag_output;
-+
-+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
-+ __m128 real_output_ps, imag_output_ps;
-+
-+ float E_out_real = 0;
-+ float E_out_imag = 0;
-+ float P_out_real = 0;
-+ float P_out_imag = 0;
-+ float L_out_real = 0;
-+ float L_out_imag = 0;
-+
-+ const lv_16sc_t* input_ptr = input;
-+ const lv_16sc_t* carrier_ptr = carrier;
-+
-+ const lv_16sc_t* E_code_ptr = E_code;
-+ lv_32fc_t* E_out_ptr = E_out;
-+ const lv_16sc_t* L_code_ptr = L_code;
-+ lv_32fc_t* L_out_ptr = L_out;
-+ const lv_16sc_t* P_code_ptr = P_code;
-+ lv_32fc_t* P_out_ptr = P_out;
-+
-+ *E_out_ptr = 0;
-+ *P_out_ptr = 0;
-+ *L_out_ptr = 0;
-+
-+ real_E_code_acc = _mm_setzero_ps();
-+ imag_E_code_acc = _mm_setzero_ps();
-+ real_P_code_acc = _mm_setzero_ps();
-+ imag_P_code_acc = _mm_setzero_ps();
-+ real_L_code_acc = _mm_setzero_ps();
-+ imag_L_code_acc = _mm_setzero_ps();
-+
-+ if (sse_iters>0)
-+ {
-+ for(int number = 0;number < sse_iters; number++){
-+
-+ //Perform the carrier wipe-off
-+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
-+ input_ptr += 4;
-+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
-+
-+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+ carrier_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
-+
-+ //Get early values
-+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+ E_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
-+
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
-+
-+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
-+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
-+
-+ //Get prompt values
-+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+ P_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
-+
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
-+
-+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
-+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
-+
-+ //Get late values
-+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+ L_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
-+
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)
-+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
-+
-+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
-+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
-+
-+ input_ptr += 4;
-+ carrier_ptr += 4;
-+ E_code_ptr += 4;
-+ L_code_ptr += 4;
-+ P_code_ptr += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
-+
-+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
-+
-+ for (int i = 0; i<4; ++i)
-+ {
-+ E_out_real += real_E_dotProductVector[i];
-+ E_out_imag += imag_E_dotProductVector[i];
-+ P_out_real += real_P_dotProductVector[i];
-+ P_out_imag += imag_P_dotProductVector[i];
-+ L_out_real += real_L_dotProductVector[i];
-+ L_out_imag += imag_L_dotProductVector[i];
-+ }
-+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
-+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
-+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
-+ }
-+
-+ lv_16sc_t bb_signal_sample;
-+ for(int i=0; i < num_points%8; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
-+ // Now get early, late, and prompt values for each
-+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
-+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
-+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
-+ }
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include "smmintrin.h"
-+#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
-+#include "CommonMacros/CommonMacros.h"
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_sixth(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ const unsigned int sse_iters = num_points / 8;
-+
-+ __m128i realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy;
-+ __m128i input_i_1, input_i_2, output_i32;
-+
-+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
-+ __m128i realx, imagx, realy, imagy, real_output, imag_output;
-+
-+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
-+ __m128 real_output_ps, imag_output_ps;
-+
-+ float E_out_real = 0;
-+ float E_out_imag = 0;
-+ float P_out_real = 0;
-+ float P_out_imag = 0;
-+ float L_out_real = 0;
-+ float L_out_imag = 0;
-+
-+ const lv_16sc_t* input_ptr = input;
-+ const lv_16sc_t* carrier_ptr = carrier;
-+
-+ const lv_16sc_t* E_code_ptr = E_code;
-+ lv_32fc_t* E_out_ptr = E_out;
-+ const lv_16sc_t* L_code_ptr = L_code;
-+ lv_32fc_t* L_out_ptr = L_out;
-+ const lv_16sc_t* P_code_ptr = P_code;
-+ lv_32fc_t* P_out_ptr = P_out;
-+
-+ *E_out_ptr = 0;
-+ *P_out_ptr = 0;
-+ *L_out_ptr = 0;
-+
-+ real_E_code_acc = _mm_setzero_ps();
-+ imag_E_code_acc = _mm_setzero_ps();
-+ real_P_code_acc = _mm_setzero_ps();
-+ imag_P_code_acc = _mm_setzero_ps();
-+ real_L_code_acc = _mm_setzero_ps();
-+ imag_L_code_acc = _mm_setzero_ps();
-+
-+ if (sse_iters>0)
-+ {
-+ for(int number = 0;number < sse_iters; number++){
-+
-+ //Perform the carrier wipe-off
-+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
-+ input_ptr += 4;
-+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
-+
-+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+ carrier_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
-+
-+ //Get early values
-+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+ E_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
-+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
-+
-+ //Get prompt values
-+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+ P_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
-+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
-+
-+ //Get late values
-+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+ L_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
-+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
-+
-+ input_ptr += 4;
-+ carrier_ptr += 4;
-+ E_code_ptr += 4;
-+ L_code_ptr += 4;
-+ P_code_ptr += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
-+
-+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
-+
-+ for (int i = 0; i<4; ++i)
-+ {
-+ E_out_real += real_E_dotProductVector[i];
-+ E_out_imag += imag_E_dotProductVector[i];
-+ P_out_real += real_P_dotProductVector[i];
-+ P_out_imag += imag_P_dotProductVector[i];
-+ L_out_real += real_L_dotProductVector[i];
-+ L_out_imag += imag_L_dotProductVector[i];
-+ }
-+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
-+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
-+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
-+ }
-+
-+ lv_16sc_t bb_signal_sample;
-+ for(int i=0; i < num_points%8; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
-+ // Now get early, late, and prompt values for each
-+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
-+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
-+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
-+ }
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ lv_16sc_t bb_signal_sample;
-+ lv_16sc_t tmp1;
-+ lv_16sc_t tmp2;
-+ lv_16sc_t tmp3;
-+
-+ bb_signal_sample = lv_cmake(0, 0);
-+
-+ *E_out = 0;
-+ *P_out = 0;
-+ *L_out = 0;
-+ // perform Early, Prompt and Late correlation
-+
-+ for(int i=0; i < num_points; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input[i] * carrier[i];
-+
-+ tmp1 = bb_signal_sample * E_code[i];
-+ tmp2 = bb_signal_sample * P_code[i];
-+ tmp3 = bb_signal_sample * L_code[i];
-+
-+ // Now get early, late, and prompt values for each
-+ *E_out += (lv_32fc_t)tmp1;
-+ *P_out += (lv_32fc_t)tmp2;
-+ *L_out += (lv_32fc_t)tmp3;
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_H */
-+
-+
-+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_a_H
-+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+//
-+//#ifdef LV_HAVE_SSE4_1
-+//#include "smmintrin.h"
-+///*!
-+// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+// \param input The input signal input
-+// \param carrier The carrier signal input
-+// \param E_code Early PRN code replica input
-+// \param P_code Early PRN code replica input
-+// \param L_code Early PRN code replica input
-+// \param E_out Early correlation output
-+// \param P_out Early correlation output
-+// \param L_out Early correlation output
-+// \param num_points The number of complex values in vectors
-+// */
-+//static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_a_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+//{
-+// const unsigned int sse_iters = num_points / 8;
-+//
-+// __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
-+// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
-+//
-+// __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
-+// __m128i real_output_i_1, real_output_i_2, imag_output_i_1, imag_output_i_2;
-+// __m128 real_output_ps_1, real_output_ps_2, imag_output_ps_1, imag_output_ps_2;
-+//
-+// float E_out_real = 0;
-+// float E_out_imag = 0;
-+// float P_out_real = 0;
-+// float P_out_imag = 0;
-+// float L_out_real = 0;
-+// float L_out_imag = 0;
-+//
-+// const lv_16sc_t* input_ptr = input;
-+// const lv_16sc_t* carrier_ptr = carrier;
-+//
-+// const lv_16sc_t* E_code_ptr = E_code;
-+// lv_32fc_t* E_out_ptr = E_out;
-+// const lv_16sc_t* L_code_ptr = L_code;
-+// lv_32fc_t* L_out_ptr = L_out;
-+// const lv_16sc_t* P_code_ptr = P_code;
-+// lv_32fc_t* P_out_ptr = P_out;
-+//
-+// *E_out_ptr = 0;
-+// *P_out_ptr = 0;
-+// *L_out_ptr = 0;
-+//
-+// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
-+//
-+// real_E_code_acc = _mm_setzero_ps();
-+// imag_E_code_acc = _mm_setzero_ps();
-+// real_P_code_acc = _mm_setzero_ps();
-+// imag_P_code_acc = _mm_setzero_ps();
-+// real_L_code_acc = _mm_setzero_ps();
-+// imag_L_code_acc = _mm_setzero_ps();
-+//
-+// if (sse_iters>0)
-+// {
-+// for(int number = 0;number < sse_iters; number++){
-+//
-+// //Perform the carrier wipe-off
-+// x1 = _mm_lddqu_si128((__m128i*)input_ptr);
-+// input_ptr += 4;
-+// x2 = _mm_lddqu_si128((__m128i*)input_ptr);
-+//
-+// y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+// carrier_ptr += 4;
-+// y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+//
-+// imagx = _mm_srli_si128 (x1, 2);
-+// imagx = _mm_blend_epi16 (x2, imagx, 85);
-+// realx = _mm_slli_si128 (x2, 2);
-+// realx = _mm_blend_epi16 (realx, x1, 85);
-+//
-+// imagy = _mm_srli_si128 (y1, 2);
-+// imagy = _mm_blend_epi16 (y2, imagy, 85);
-+// realy = _mm_slli_si128 (y2, 2);
-+// realy = _mm_blend_epi16 (realy, y1, 85);
-+//
-+// realx_mult_realy = _mm_mullo_epi16 (realx, realy);
-+// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
-+// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
-+// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
-+//
-+// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+//
-+// //Get early values
-+// y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+// E_code_ptr += 4;
-+// y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+//
-+// imagy = _mm_srli_si128 (y1, 2);
-+// imagy = _mm_blend_epi16 (y2, imagy, 85);
-+// realy = _mm_slli_si128 (y2, 2);
-+// realy = _mm_blend_epi16 (realy, y1, 85);
-+//
-+// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+//
-+// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+//
-+// real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+// real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
-+// real_output = _mm_srli_si128 (real_output, 8);
-+// real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+// real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
-+//
-+// imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+// imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
-+// imag_output = _mm_srli_si128 (imag_output, 8);
-+// imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+// imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
-+//
-+// real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps_1);
-+// real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps_2);
-+// imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps_1);
-+// imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps_2);
-+//
-+// //Get prompt values
-+// y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+// P_code_ptr += 4;
-+// y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+//
-+// imagy = _mm_srli_si128 (y1, 2);
-+// imagy = _mm_blend_epi16 (y2, imagy, 85);
-+// realy = _mm_slli_si128 (y2, 2);
-+// realy = _mm_blend_epi16 (realy, y1, 85);
-+//
-+// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+//
-+// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+//
-+// real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+// real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
-+// real_output = _mm_srli_si128 (real_output, 8);
-+// real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+// real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
-+//
-+// imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+// imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
-+// imag_output = _mm_srli_si128 (imag_output, 8);
-+// imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+// imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
-+//
-+// real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps_1);
-+// real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps_2);
-+// imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps_1);
-+// imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps_2);
-+//
-+// //Get late values
-+// y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+// L_code_ptr += 4;
-+// y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+//
-+// imagy = _mm_srli_si128 (y1, 2);
-+// imagy = _mm_blend_epi16 (y2, imagy, 85);
-+// realy = _mm_slli_si128 (y2, 2);
-+// realy = _mm_blend_epi16 (realy, y1, 85);
-+//
-+// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
-+// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
-+// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
-+// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
-+//
-+// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
-+// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
-+//
-+// real_output_i_1 = _mm_cvtepi16_epi32(real_output);
-+// real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
-+// real_output = _mm_srli_si128 (real_output, 8);
-+// real_output_i_2 = _mm_cvtepi16_epi32(real_output);
-+// real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
-+//
-+// imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
-+// imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
-+// imag_output = _mm_srli_si128 (imag_output, 8);
-+// imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
-+// imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
-+//
-+// real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps_1);
-+// real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps_2);
-+// imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps_1);
-+// imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps_2);
-+//
-+// input_ptr += 4;
-+// carrier_ptr += 4;
-+// E_code_ptr += 4;
-+// L_code_ptr += 4;
-+// P_code_ptr += 4;
-+// }
-+//
-+// __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
-+// __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
-+// __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
-+// __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
-+// __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
-+// __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
-+//
-+// _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
-+// _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
-+// _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
-+// _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
-+// _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
-+// _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
-+//
-+// for (int i = 0; i<4; ++i)
-+// {
-+// E_out_real += real_E_dotProductVector[i];
-+// E_out_imag += imag_E_dotProductVector[i];
-+// P_out_real += real_P_dotProductVector[i];
-+// P_out_imag += imag_P_dotProductVector[i];
-+// L_out_real += real_L_dotProductVector[i];
-+// L_out_imag += imag_L_dotProductVector[i];
-+// }
-+// *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
-+// *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
-+// *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
-+// }
-+//
-+// lv_16sc_t bb_signal_sample;
-+// for(int i=0; i < num_points%8; ++i)
-+// {
-+// //Perform the carrier wipe-off
-+// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
-+// // Now get early, late, and prompt values for each
-+// *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
-+// *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
-+// *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
-+// }
-+//}
-+//#endif /* LV_HAVE_SSE4_1 */
-+//
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
-+{
-+ lv_16sc_t bb_signal_sample;
-+ lv_16sc_t tmp1;
-+ lv_16sc_t tmp2;
-+ lv_16sc_t tmp3;
-+
-+ bb_signal_sample = lv_cmake(0, 0);
-+
-+ *E_out = 0;
-+ *P_out = 0;
-+ *L_out = 0;
-+ // perform Early, Prompt and Late correlation
-+
-+ for(int i=0; i < num_points; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input[i] * carrier[i];
-+
-+ tmp1 = bb_signal_sample * E_code[i];
-+ tmp2 = bb_signal_sample * P_code[i];
-+ tmp3 = bb_signal_sample * L_code[i];
-+
-+ // Now get early, late, and prompt values for each
-+ *E_out += (lv_32fc_t)tmp1;
-+ *P_out += (lv_32fc_t)tmp2;
-+ *L_out += (lv_32fc_t)tmp3;
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,595 @@
-+/*!
-+ * \file volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5.h
-+ * \brief Volk protokernel: performs the carrier wipe-off mixing and the Very early, Early, Prompt, Late and very late correlation with 32 bits vectors and returns float32 values.
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * Volk protokernel that performs the carrier wipe-off mixing and the
-+ * Very Early, Early, Prompt, Late and Very Late correlation with 32 bits vectors (16 bits the
-+ * real part and 16 bits the imaginary part) and accumulates into float32 values, returning them:
-+ * - The carrier wipe-off is done by multiplying the input signal by the
-+ * carrier (multiplication of 32 bits vectors) It returns the input
-+ * signal in base band (BB)
-+ * - Very Early values are calculated by multiplying the input signal in BB by the
-+ * very early code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
-+ * - Early values are calculated by multiplying the input signal in BB by the
-+ * early code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
-+ * - Prompt values are calculated by multiplying the input signal in BB by the
-+ * prompt code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
-+ * - Late values are calculated by multiplying the input signal in BB by the
-+ * late code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
-+ * - Very Late values are calculated by multiplying the input signal in BB by the
-+ * very late code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_H
-+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include "smmintrin.h"
-+#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
-+#include "CommonMacros/CommonMacros.h"
-+ /*!
-+ \brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param VE_code Very Early PRN code replica input
-+ \param E_code Early PRN code replica input
-+ \param P_code Prompt PRN code replica input
-+ \param L_code Late PRN code replica input
-+ \param VL_code Very Late PRN code replica input
-+ \param VE_out Very Early correlation output
-+ \param E_out Early correlation output
-+ \param P_out Prompt correlation output
-+ \param L_out Late correlation output
-+ \param VL_out Very Late correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
-+{
-+ const unsigned int sse_iters = num_points / 8;
-+
-+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
-+ __m128i realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
-+
-+ __m128 real_VE_code_acc, imag_VE_code_acc, real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc, real_VL_code_acc, imag_VL_code_acc;
-+ __m128i input_i_1, input_i_2, output_i32;
-+ __m128 real_output_ps, imag_output_ps;
-+
-+ float VE_out_real = 0;
-+ float VE_out_imag = 0;
-+ float E_out_real = 0;
-+ float E_out_imag = 0;
-+ float P_out_real = 0;
-+ float P_out_imag = 0;
-+ float L_out_real = 0;
-+ float L_out_imag = 0;
-+ float VL_out_real = 0;
-+ float VL_out_imag = 0;
-+
-+ const lv_16sc_t* input_ptr = input;
-+ const lv_16sc_t* carrier_ptr = carrier;
-+
-+ const lv_16sc_t* VE_code_ptr = VE_code;
-+ lv_32fc_t* VE_out_ptr = VE_out;
-+ const lv_16sc_t* E_code_ptr = E_code;
-+ lv_32fc_t* E_out_ptr = E_out;
-+ const lv_16sc_t* L_code_ptr = L_code;
-+ lv_32fc_t* L_out_ptr = L_out;
-+ const lv_16sc_t* P_code_ptr = P_code;
-+ lv_32fc_t* P_out_ptr = P_out;
-+ const lv_16sc_t* VL_code_ptr = VL_code;
-+ lv_32fc_t* VL_out_ptr = VL_out;
-+
-+ *VE_out_ptr = 0;
-+ *E_out_ptr = 0;
-+ *P_out_ptr = 0;
-+ *L_out_ptr = 0;
-+ *VL_out_ptr = 0;
-+
-+ real_VE_code_acc = _mm_setzero_ps();
-+ imag_VE_code_acc = _mm_setzero_ps();
-+ real_E_code_acc = _mm_setzero_ps();
-+ imag_E_code_acc = _mm_setzero_ps();
-+ real_P_code_acc = _mm_setzero_ps();
-+ imag_P_code_acc = _mm_setzero_ps();
-+ real_L_code_acc = _mm_setzero_ps();
-+ imag_L_code_acc = _mm_setzero_ps();
-+ real_VL_code_acc = _mm_setzero_ps();
-+ imag_VL_code_acc = _mm_setzero_ps();
-+
-+ if (sse_iters>0)
-+ {
-+ for(int number = 0;number < sse_iters; number++){
-+
-+ //Perform the carrier wipe-off
-+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
-+ input_ptr += 4;
-+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
-+
-+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+ carrier_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
-+
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
-+
-+ //Get very early values
-+ y1 = _mm_lddqu_si128((__m128i*)VE_code_ptr);
-+ VE_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)VE_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_VE_code_acc = _mm_add_ps (real_VE_code_acc, real_output_ps);
-+ imag_VE_code_acc = _mm_add_ps (imag_VE_code_acc, imag_output_ps);
-+
-+ //Get early values
-+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+ E_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
-+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
-+
-+ //Get prompt values
-+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+ P_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
-+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
-+
-+ //Get late values
-+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+ L_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
-+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
-+
-+ //Get very late values
-+ y1 = _mm_lddqu_si128((__m128i*)VL_code_ptr);
-+ VL_code_ptr += 4;
-+ y2 = _mm_lddqu_si128((__m128i*)VL_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_VL_code_acc = _mm_add_ps (real_VL_code_acc, real_output_ps);
-+ imag_VL_code_acc = _mm_add_ps (imag_VL_code_acc, imag_output_ps);
-+
-+ input_ptr += 4;
-+ carrier_ptr += 4;
-+ VE_code_ptr += 4;
-+ E_code_ptr += 4;
-+ P_code_ptr += 4;
-+ L_code_ptr += 4;
-+ VL_code_ptr += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) float real_VE_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_VE_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_VL_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_VL_dotProductVector[4];
-+
-+ _mm_storeu_ps((float*)real_VE_dotProductVector,real_VE_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_VE_dotProductVector,imag_VE_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)real_VL_dotProductVector,real_VL_code_acc); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)imag_VL_dotProductVector,imag_VL_code_acc); // Store the results back into the dot product vector
-+
-+ for (int i = 0; i<4; ++i)
-+ {
-+ VE_out_real += real_VE_dotProductVector[i];
-+ VE_out_imag += imag_VE_dotProductVector[i];
-+ E_out_real += real_E_dotProductVector[i];
-+ E_out_imag += imag_E_dotProductVector[i];
-+ P_out_real += real_P_dotProductVector[i];
-+ P_out_imag += imag_P_dotProductVector[i];
-+ L_out_real += real_L_dotProductVector[i];
-+ L_out_imag += imag_L_dotProductVector[i];
-+ VL_out_real += real_VL_dotProductVector[i];
-+ VL_out_imag += imag_VL_dotProductVector[i];
-+ }
-+ *VE_out_ptr = lv_cmake(VE_out_real, VE_out_imag);
-+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
-+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
-+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
-+ *VL_out_ptr = lv_cmake(VL_out_real, VL_out_imag);
-+ }
-+
-+ lv_16sc_t bb_signal_sample;
-+ for(int i=0; i < num_points%8; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
-+ // Now get early, late, and prompt values for each
-+ *VE_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VE_code_ptr++));
-+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
-+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
-+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
-+ *VL_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VL_code_ptr++));
-+ }
-+
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param VE_code Very Early PRN code replica input
-+ \param E_code Early PRN code replica input
-+ \param P_code Prompt PRN code replica input
-+ \param L_code Late PRN code replica input
-+ \param VL_code Very Late PRN code replica input
-+ \param VE_out Very Early correlation output
-+ \param E_out Early correlation output
-+ \param P_out Prompt correlation output
-+ \param L_out Late correlation output
-+ \param VL_out Very Late correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
-+{
-+ lv_16sc_t bb_signal_sample;
-+ lv_16sc_t tmp1;
-+ lv_16sc_t tmp2;
-+ lv_16sc_t tmp3;
-+ lv_16sc_t tmp4;
-+ lv_16sc_t tmp5;
-+
-+ bb_signal_sample = lv_cmake(0, 0);
-+
-+ *VE_out = 0;
-+ *E_out = 0;
-+ *P_out = 0;
-+ *L_out = 0;
-+ *VL_out = 0;
-+ // perform Early, Prompt and Late correlation
-+
-+ for(int i=0; i < num_points; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input[i] * carrier[i];
-+
-+ tmp1 = bb_signal_sample * VE_code[i];
-+ tmp2 = bb_signal_sample * E_code[i];
-+ tmp3 = bb_signal_sample * P_code[i];
-+ tmp4 = bb_signal_sample * L_code[i];
-+ tmp5 = bb_signal_sample * VL_code[i];
-+
-+ // Now get early, late, and prompt values for each
-+ *VE_out += (lv_32fc_t)tmp1;
-+ *E_out += (lv_32fc_t)tmp2;
-+ *P_out += (lv_32fc_t)tmp3;
-+ *L_out += (lv_32fc_t)tmp4;
-+ *VL_out += (lv_32fc_t)tmp5;
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_H */
-+
-+
-+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_H
-+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include "smmintrin.h"
-+#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
-+#include "CommonMacros/CommonMacros.h"
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param VE_code Very Early PRN code replica input
-+ \param E_code Early PRN code replica input
-+ \param P_code Prompt PRN code replica input
-+ \param L_code Late PRN code replica input
-+ \param VL_code Very Late PRN code replica input
-+ \param VE_out Very Early correlation output
-+ \param E_out Early correlation output
-+ \param P_out Prompt correlation output
-+ \param L_out Late correlation output
-+ \param VL_out Very Late correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
-+{
-+ const unsigned int sse_iters = num_points / 8;
-+
-+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
-+ __m128i realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
-+
-+ __m128 real_VE_code_acc, imag_VE_code_acc, real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc, real_VL_code_acc, imag_VL_code_acc;
-+ __m128i input_i_1, input_i_2, output_i32;
-+ __m128 real_output_ps, imag_output_ps;
-+
-+ float VE_out_real = 0;
-+ float VE_out_imag = 0;
-+ float E_out_real = 0;
-+ float E_out_imag = 0;
-+ float P_out_real = 0;
-+ float P_out_imag = 0;
-+ float L_out_real = 0;
-+ float L_out_imag = 0;
-+ float VL_out_real = 0;
-+ float VL_out_imag = 0;
-+
-+ const lv_16sc_t* input_ptr = input;
-+ const lv_16sc_t* carrier_ptr = carrier;
-+
-+ const lv_16sc_t* VE_code_ptr = VE_code;
-+ lv_32fc_t* VE_out_ptr = VE_out;
-+ const lv_16sc_t* E_code_ptr = E_code;
-+ lv_32fc_t* E_out_ptr = E_out;
-+ const lv_16sc_t* L_code_ptr = L_code;
-+ lv_32fc_t* L_out_ptr = L_out;
-+ const lv_16sc_t* P_code_ptr = P_code;
-+ lv_32fc_t* P_out_ptr = P_out;
-+ const lv_16sc_t* VL_code_ptr = VL_code;
-+ lv_32fc_t* VL_out_ptr = VL_out;
-+
-+ *VE_out_ptr = 0;
-+ *E_out_ptr = 0;
-+ *P_out_ptr = 0;
-+ *L_out_ptr = 0;
-+ *VL_out_ptr = 0;
-+
-+ real_VE_code_acc = _mm_setzero_ps();
-+ imag_VE_code_acc = _mm_setzero_ps();
-+ real_E_code_acc = _mm_setzero_ps();
-+ imag_E_code_acc = _mm_setzero_ps();
-+ real_P_code_acc = _mm_setzero_ps();
-+ imag_P_code_acc = _mm_setzero_ps();
-+ real_L_code_acc = _mm_setzero_ps();
-+ imag_L_code_acc = _mm_setzero_ps();
-+ real_VL_code_acc = _mm_setzero_ps();
-+ imag_VL_code_acc = _mm_setzero_ps();
-+
-+ if (sse_iters>0)
-+ {
-+ for(int number = 0;number < sse_iters; number++){
-+
-+ //Perform the carrier wipe-off
-+ x1 = _mm_load_si128((__m128i*)input_ptr);
-+ input_ptr += 4;
-+ x2 = _mm_load_si128((__m128i*)input_ptr);
-+
-+ y1 = _mm_load_si128((__m128i*)carrier_ptr);
-+ carrier_ptr += 4;
-+ y2 = _mm_load_si128((__m128i*)carrier_ptr);
-+
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
-+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
-+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
-+
-+ //Get very early values
-+ y1 = _mm_load_si128((__m128i*)VE_code_ptr);
-+ VE_code_ptr += 4;
-+ y2 = _mm_load_si128((__m128i*)VE_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_VE_code_acc = _mm_add_ps (real_VE_code_acc, real_output_ps);
-+ imag_VE_code_acc = _mm_add_ps (imag_VE_code_acc, imag_output_ps);
-+
-+ //Get early values
-+ y1 = _mm_load_si128((__m128i*)E_code_ptr);
-+ E_code_ptr += 4;
-+ y2 = _mm_load_si128((__m128i*)E_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
-+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
-+
-+ //Get prompt values
-+ y1 = _mm_load_si128((__m128i*)P_code_ptr);
-+ P_code_ptr += 4;
-+ y2 = _mm_load_si128((__m128i*)P_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
-+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
-+
-+ //Get late values
-+ y1 = _mm_load_si128((__m128i*)L_code_ptr);
-+ L_code_ptr += 4;
-+ y2 = _mm_load_si128((__m128i*)L_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
-+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
-+
-+ //Get very late values
-+ y1 = _mm_load_si128((__m128i*)VL_code_ptr);
-+ VL_code_ptr += 4;
-+ y2 = _mm_load_si128((__m128i*)VL_code_ptr);
-+
-+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
-+
-+ real_VL_code_acc = _mm_add_ps (real_VL_code_acc, real_output_ps);
-+ imag_VL_code_acc = _mm_add_ps (imag_VL_code_acc, imag_output_ps);
-+
-+ input_ptr += 4;
-+ carrier_ptr += 4;
-+ VE_code_ptr += 4;
-+ E_code_ptr += 4;
-+ P_code_ptr += 4;
-+ L_code_ptr += 4;
-+ VL_code_ptr += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) float real_VE_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_VE_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float real_VL_dotProductVector[4];
-+ __VOLK_ATTR_ALIGNED(16) float imag_VL_dotProductVector[4];
-+
-+ _mm_store_ps((float*)real_VE_dotProductVector,real_VE_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)imag_VE_dotProductVector,imag_VE_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)real_VL_dotProductVector,real_VL_code_acc); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)imag_VL_dotProductVector,imag_VL_code_acc); // Store the results back into the dot product vector
-+
-+ for (int i = 0; i<4; ++i)
-+ {
-+ VE_out_real += real_VE_dotProductVector[i];
-+ VE_out_imag += imag_VE_dotProductVector[i];
-+ E_out_real += real_E_dotProductVector[i];
-+ E_out_imag += imag_E_dotProductVector[i];
-+ P_out_real += real_P_dotProductVector[i];
-+ P_out_imag += imag_P_dotProductVector[i];
-+ L_out_real += real_L_dotProductVector[i];
-+ L_out_imag += imag_L_dotProductVector[i];
-+ VL_out_real += real_VL_dotProductVector[i];
-+ VL_out_imag += imag_VL_dotProductVector[i];
-+ }
-+ *VE_out_ptr = lv_cmake(VE_out_real, VE_out_imag);
-+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
-+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
-+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
-+ *VL_out_ptr = lv_cmake(VL_out_real, VL_out_imag);
-+ }
-+
-+ lv_16sc_t bb_signal_sample;
-+ for(int i=0; i < num_points%8; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
-+ // Now get early, late, and prompt values for each
-+ *VE_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VE_code_ptr++));
-+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
-+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
-+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
-+ *VL_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VL_code_ptr++));
-+ }
-+
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param VE_code Very Early PRN code replica input
-+ \param E_code Early PRN code replica input
-+ \param P_code Prompt PRN code replica input
-+ \param L_code Late PRN code replica input
-+ \param VL_code Very Late PRN code replica input
-+ \param VE_out Very Early correlation output
-+ \param E_out Early correlation output
-+ \param P_out Prompt correlation output
-+ \param L_out Late correlation output
-+ \param VL_out Very Late correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
-+{
-+ lv_16sc_t bb_signal_sample;
-+ lv_16sc_t tmp1;
-+ lv_16sc_t tmp2;
-+ lv_16sc_t tmp3;
-+ lv_16sc_t tmp4;
-+ lv_16sc_t tmp5;
-+
-+ bb_signal_sample = lv_cmake(0, 0);
-+
-+ *VE_out = 0;
-+ *E_out = 0;
-+ *P_out = 0;
-+ *L_out = 0;
-+ *VL_out = 0;
-+ // perform Early, Prompt and Late correlation
-+
-+ for(int i=0; i < num_points; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input[i] * carrier[i];
-+
-+ tmp1 = bb_signal_sample * VE_code[i];
-+ tmp2 = bb_signal_sample * E_code[i];
-+ tmp3 = bb_signal_sample * P_code[i];
-+ tmp4 = bb_signal_sample * L_code[i];
-+ tmp5 = bb_signal_sample * VL_code[i];
-+
-+ // Now get early, late, and prompt values for each
-+ *VE_out += (lv_32fc_t)tmp1;
-+ *E_out += (lv_32fc_t)tmp2;
-+ *P_out += (lv_32fc_t)tmp3;
-+ *L_out += (lv_32fc_t)tmp4;
-+ *VL_out += (lv_32fc_t)tmp5;
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_accumulator_s32f.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32f_accumulator_s32f.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_accumulator_s32f.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32f_accumulator_s32f.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,68 @@
-+#ifndef INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H
-+#define INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+*/
-+static inline void volk_gnsssdr_32f_accumulator_s32f_a_sse(float* result, const float* inputBuffer, unsigned int num_points){
-+ float returnValue = 0;
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ const float* aPtr = inputBuffer;
-+ __VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
-+
-+ __m128 accumulator = _mm_setzero_ps();
-+ __m128 aVal = _mm_setzero_ps();
-+
-+ for(;number < quarterPoints; number++){
-+ aVal = _mm_load_ps(aPtr);
-+ accumulator = _mm_add_ps(accumulator, aVal);
-+ aPtr += 4;
-+ }
-+ _mm_store_ps(tempBuffer,accumulator); // Store the results back into the C container
-+ returnValue = tempBuffer[0];
-+ returnValue += tempBuffer[1];
-+ returnValue += tempBuffer[2];
-+ returnValue += tempBuffer[3];
-+
-+ number = quarterPoints * 4;
-+ for(;number < num_points; number++){
-+ returnValue += (*aPtr++);
-+ }
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+*/
-+static inline void volk_gnsssdr_32f_accumulator_s32f_generic(float* result, const float* inputBuffer, unsigned int num_points){
-+ const float* aPtr = inputBuffer;
-+ unsigned int number = 0;
-+ float returnValue = 0;
-+
-+ for(;number < num_points; number++){
-+ returnValue += (*aPtr++);
-+ }
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_16u.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_16u.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_16u.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_16u.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,149 @@
-+#ifndef INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H
-+#define INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include
-+
-+static inline void volk_gnsssdr_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
-+ if(num_points > 0){
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ float* inputPtr = (float*)src0;
-+
-+ __m128 indexIncrementValues = _mm_set1_ps(4);
-+ __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
-+
-+ float max = src0[0];
-+ float index = 0;
-+ __m128 maxValues = _mm_set1_ps(max);
-+ __m128 maxValuesIndex = _mm_setzero_ps();
-+ __m128 compareResults;
-+ __m128 currentValues;
-+
-+ __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
-+ __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
-+
-+ for(;number < quarterPoints; number++){
-+
-+ currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
-+ currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
-+
-+ compareResults = _mm_cmpgt_ps(maxValues, currentValues);
-+
-+ maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
-+ maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults);
-+ }
-+
-+ // Calculate the largest value from the remaining 4 points
-+ _mm_store_ps(maxValuesBuffer, maxValues);
-+ _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
-+
-+ for(number = 0; number < 4; number++){
-+ if(maxValuesBuffer[number] > max){
-+ index = maxIndexesBuffer[number];
-+ max = maxValuesBuffer[number];
-+ }
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(;number < num_points; number++){
-+ if(src0[number] > max){
-+ index = number;
-+ max = src0[number];
-+ }
-+ }
-+ target[0] = (unsigned int)index;
-+ }
-+}
-+
-+#endif /*LV_HAVE_SSE4_1*/
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+
-+static inline void volk_gnsssdr_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) {
-+ if(num_points > 0){
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ float* inputPtr = (float*)src0;
-+
-+ __m128 indexIncrementValues = _mm_set1_ps(4);
-+ __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
-+
-+ float max = src0[0];
-+ float index = 0;
-+ __m128 maxValues = _mm_set1_ps(max);
-+ __m128 maxValuesIndex = _mm_setzero_ps();
-+ __m128 compareResults;
-+ __m128 currentValues;
-+
-+ __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
-+ __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
-+
-+ for(;number < quarterPoints; number++){
-+
-+ currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
-+ currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
-+
-+ compareResults = _mm_cmpgt_ps(maxValues, currentValues);
-+
-+ maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
-+
-+ maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
-+ }
-+
-+ // Calculate the largest value from the remaining 4 points
-+ _mm_store_ps(maxValuesBuffer, maxValues);
-+ _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
-+
-+ for(number = 0; number < 4; number++){
-+ if(maxValuesBuffer[number] > max){
-+ index = maxIndexesBuffer[number];
-+ max = maxValuesBuffer[number];
-+ }
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(;number < num_points; number++){
-+ if(src0[number] > max){
-+ index = number;
-+ max = src0[number];
-+ }
-+ }
-+ target[0] = (unsigned int)index;
-+ }
-+}
-+
-+#endif /*LV_HAVE_SSE*/
-+
-+#ifdef LV_HAVE_GENERIC
-+static inline void volk_gnsssdr_32f_index_max_16u_generic(unsigned int* target, const float* src0, unsigned int num_points) {
-+ if(num_points > 0){
-+ float max = src0[0];
-+ unsigned int index = 0;
-+
-+ unsigned int i = 1;
-+
-+ for(; i < num_points; ++i) {
-+
-+ if(src0[i] > max){
-+ index = i;
-+ max = src0[i];
-+ }
-+
-+ }
-+ target[0] = index;
-+ }
-+}
-+
-+#endif /*LV_HAVE_GENERIC*/
-+
-+
-+#endif /*INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H*/
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_s32f_convert_16i.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32f_s32f_convert_16i.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_s32f_convert_16i.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32f_s32f_convert_16i.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,302 @@
-+#ifndef INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_u_H
-+#define INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_u_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE2
-+#include
-+ /*!
-+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param scalar The value multiplied against each point in the input buffer
-+ \param num_points The number of data values to be converted
-+ \note Input buffer does NOT need to be properly aligned
-+ */
-+static inline void volk_gnsssdr_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
-+ unsigned int number = 0;
-+
-+ const unsigned int eighthPoints = num_points / 8;
-+
-+ const float* inputVectorPtr = (const float*)inputVector;
-+ int16_t* outputVectorPtr = outputVector;
-+
-+ float min_val = -32768;
-+ float max_val = 32767;
-+ float r;
-+
-+ __m128 vScalar = _mm_set_ps1(scalar);
-+ __m128 inputVal1, inputVal2;
-+ __m128i intInputVal1, intInputVal2;
-+ __m128 ret1, ret2;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ for(;number < eighthPoints; number++){
-+ inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-+ inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-+
-+ // Scale and clip
-+ ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
-+ ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
-+
-+ intInputVal1 = _mm_cvtps_epi32(ret1);
-+ intInputVal2 = _mm_cvtps_epi32(ret2);
-+
-+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-+
-+ _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
-+ outputVectorPtr += 8;
-+ }
-+
-+ number = eighthPoints * 8;
-+ for(; number < num_points; number++){
-+ r = inputVector[number] * scalar;
-+ if(r > max_val)
-+ r = max_val;
-+ else if(r < min_val)
-+ r = min_val;
-+ outputVector[number] = (int16_t)rintf(r);
-+ }
-+}
-+#endif /* LV_HAVE_SSE2 */
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+ /*!
-+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param scalar The value multiplied against each point in the input buffer
-+ \param num_points The number of data values to be converted
-+ \note Input buffer does NOT need to be properly aligned
-+ */
-+static inline void volk_gnsssdr_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
-+ unsigned int number = 0;
-+
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ const float* inputVectorPtr = (const float*)inputVector;
-+ int16_t* outputVectorPtr = outputVector;
-+
-+ float min_val = -32768;
-+ float max_val = 32767;
-+ float r;
-+
-+ __m128 vScalar = _mm_set_ps1(scalar);
-+ __m128 ret;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
-+
-+ for(;number < quarterPoints; number++){
-+ ret = _mm_loadu_ps(inputVectorPtr);
-+ inputVectorPtr += 4;
-+
-+ // Scale and clip
-+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
-+
-+ _mm_store_ps(outputFloatBuffer, ret);
-+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
-+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
-+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
-+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(; number < num_points; number++){
-+ r = inputVector[number] * scalar;
-+ if(r > max_val)
-+ r = max_val;
-+ else if(r < min_val)
-+ r = min_val;
-+ outputVector[number] = (int16_t)rintf(r);
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param scalar The value multiplied against each point in the input buffer
-+ \param num_points The number of data values to be converted
-+ \note Input buffer does NOT need to be properly aligned
-+ */
-+static inline void volk_gnsssdr_32f_s32f_convert_16i_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
-+ int16_t* outputVectorPtr = outputVector;
-+ const float* inputVectorPtr = inputVector;
-+ unsigned int number = 0;
-+ float min_val = -32768;
-+ float max_val = 32767;
-+ float r;
-+
-+ for(number = 0; number < num_points; number++){
-+ r = *inputVectorPtr++ * scalar;
-+ if(r > max_val)
-+ r = max_val;
-+ else if(r < min_val)
-+ r = min_val;
-+ *outputVectorPtr++ = (int16_t)rintf(r);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_u_H */
-+#ifndef INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_a_H
-+#define INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE2
-+#include
-+ /*!
-+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param scalar The value multiplied against each point in the input buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
-+ unsigned int number = 0;
-+
-+ const unsigned int eighthPoints = num_points / 8;
-+
-+ const float* inputVectorPtr = (const float*)inputVector;
-+ int16_t* outputVectorPtr = outputVector;
-+
-+ float min_val = -32768;
-+ float max_val = 32767;
-+ float r;
-+
-+ __m128 vScalar = _mm_set_ps1(scalar);
-+ __m128 inputVal1, inputVal2;
-+ __m128i intInputVal1, intInputVal2;
-+ __m128 ret1, ret2;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ for(;number < eighthPoints; number++){
-+ inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-+ inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-+
-+ // Scale and clip
-+ ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
-+ ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
-+
-+ intInputVal1 = _mm_cvtps_epi32(ret1);
-+ intInputVal2 = _mm_cvtps_epi32(ret2);
-+
-+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-+
-+ _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
-+ outputVectorPtr += 8;
-+ }
-+
-+ number = eighthPoints * 8;
-+ for(; number < num_points; number++){
-+ r = inputVector[number] * scalar;
-+ if(r > max_val)
-+ r = max_val;
-+ else if(r < min_val)
-+ r = min_val;
-+ outputVector[number] = (int16_t)rintf(r);
-+ }
-+}
-+#endif /* LV_HAVE_SSE2 */
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+ /*!
-+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param scalar The value multiplied against each point in the input buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
-+ unsigned int number = 0;
-+
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ const float* inputVectorPtr = (const float*)inputVector;
-+ int16_t* outputVectorPtr = outputVector;
-+
-+ float min_val = -32768;
-+ float max_val = 32767;
-+ float r;
-+
-+ __m128 vScalar = _mm_set_ps1(scalar);
-+ __m128 ret;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
-+
-+ for(;number < quarterPoints; number++){
-+ ret = _mm_load_ps(inputVectorPtr);
-+ inputVectorPtr += 4;
-+
-+ // Scale and clip
-+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
-+
-+ _mm_store_ps(outputFloatBuffer, ret);
-+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
-+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
-+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
-+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(; number < num_points; number++){
-+ r = inputVector[number] * scalar;
-+ if(r > max_val)
-+ r = max_val;
-+ else if(r < min_val)
-+ r = min_val;
-+ outputVector[number] = (int16_t)rintf(r);
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param scalar The value multiplied against each point in the input buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32f_s32f_convert_16i_a_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
-+ int16_t* outputVectorPtr = outputVector;
-+ const float* inputVectorPtr = inputVector;
-+ unsigned int number = 0;
-+ float min_val = -32768;
-+ float max_val = 32767;
-+ float r;
-+
-+ for(number = 0; number < num_points; number++){
-+ r = *inputVectorPtr++ * scalar;
-+ if(r < min_val)
-+ r = min_val;
-+ else if(r > max_val)
-+ r = max_val;
-+ *outputVectorPtr++ = (int16_t)rintf(r);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_x2_add_32f.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32f_x2_add_32f.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_x2_add_32f.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32f_x2_add_32f.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,147 @@
-+#ifndef INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H
-+#define INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H
-+
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+/*!
-+ \brief Adds the two input vectors and store their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be added
-+ \param bVector One of the vectors to be added
-+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
-+*/
-+static inline void volk_gnsssdr_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ float* cPtr = cVector;
-+ const float* aPtr = aVector;
-+ const float* bPtr= bVector;
-+
-+ __m128 aVal, bVal, cVal;
-+ for(;number < quarterPoints; number++){
-+
-+ aVal = _mm_loadu_ps(aPtr);
-+ bVal = _mm_loadu_ps(bPtr);
-+
-+ cVal = _mm_add_ps(aVal, bVal);
-+
-+ _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
-+
-+ aPtr += 4;
-+ bPtr += 4;
-+ cPtr += 4;
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(;number < num_points; number++){
-+ *cPtr++ = (*aPtr++) + (*bPtr++);
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Adds the two input vectors and store their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be added
-+ \param bVector One of the vectors to be added
-+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
-+*/
-+static inline void volk_gnsssdr_32f_x2_add_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-+ float* cPtr = cVector;
-+ const float* aPtr = aVector;
-+ const float* bPtr= bVector;
-+ unsigned int number = 0;
-+
-+ for(number = 0; number < num_points; number++){
-+ *cPtr++ = (*aPtr++) + (*bPtr++);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+#endif /* INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H */
-+#ifndef INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H
-+#define INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H
-+
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+/*!
-+ \brief Adds the two input vectors and store their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be added
-+ \param bVector One of the vectors to be added
-+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
-+*/
-+static inline void volk_gnsssdr_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ float* cPtr = cVector;
-+ const float* aPtr = aVector;
-+ const float* bPtr= bVector;
-+
-+ __m128 aVal, bVal, cVal;
-+ for(;number < quarterPoints; number++){
-+
-+ aVal = _mm_load_ps(aPtr);
-+ bVal = _mm_load_ps(bPtr);
-+
-+ cVal = _mm_add_ps(aVal, bVal);
-+
-+ _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-+
-+ aPtr += 4;
-+ bPtr += 4;
-+ cPtr += 4;
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(;number < num_points; number++){
-+ *cPtr++ = (*aPtr++) + (*bPtr++);
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Adds the two input vectors and store their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be added
-+ \param bVector One of the vectors to be added
-+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
-+*/
-+static inline void volk_gnsssdr_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-+ float* cPtr = cVector;
-+ const float* aPtr = aVector;
-+ const float* bPtr= bVector;
-+ unsigned int number = 0;
-+
-+ for(number = 0; number < num_points; number++){
-+ *cPtr++ = (*aPtr++) + (*bPtr++);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+#ifdef LV_HAVE_ORC
-+/*!
-+ \brief Adds the two input vectors and store their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be added
-+ \param bVector One of the vectors to be added
-+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
-+*/
-+extern void volk_gnsssdr_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
-+static inline void volk_gnsssdr_32f_x2_add_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
-+ volk_gnsssdr_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points);
-+}
-+#endif /* LV_HAVE_ORC */
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_conjugate_32fc.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_conjugate_32fc.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_conjugate_32fc.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_conjugate_32fc.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,127 @@
-+#ifndef INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H
-+#define INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+ /*!
-+ \brief Takes the conjugate of a complex vector.
-+ \param cVector The vector where the results will be stored
-+ \param aVector Vector to be conjugated
-+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 2;
-+
-+ __m128 x;
-+ lv_32fc_t* c = cVector;
-+ const lv_32fc_t* a = aVector;
-+
-+ __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
-+
-+ for(;number < halfPoints; number++){
-+
-+ x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
-+
-+ x = _mm_xor_ps(x, conjugator); // conjugate register
-+
-+ _mm_storeu_ps((float*)c,x); // Store the results back into the C container
-+
-+ a += 2;
-+ c += 2;
-+ }
-+
-+ if((num_points % 2) != 0) {
-+ *c = lv_conj(*a);
-+ }
-+}
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Takes the conjugate of a complex vector.
-+ \param cVector The vector where the results will be stored
-+ \param aVector Vector to be conjugated
-+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_conjugate_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
-+ lv_32fc_t* cPtr = cVector;
-+ const lv_32fc_t* aPtr = aVector;
-+ unsigned int number = 0;
-+
-+ for(number = 0; number < num_points; number++){
-+ *cPtr++ = lv_conj(*aPtr++);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H */
-+#ifndef INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H
-+#define INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+ /*!
-+ \brief Takes the conjugate of a complex vector.
-+ \param cVector The vector where the results will be stored
-+ \param aVector Vector to be conjugated
-+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 2;
-+
-+ __m128 x;
-+ lv_32fc_t* c = cVector;
-+ const lv_32fc_t* a = aVector;
-+
-+ __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
-+
-+ for(;number < halfPoints; number++){
-+
-+ x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
-+
-+ x = _mm_xor_ps(x, conjugator); // conjugate register
-+
-+ _mm_store_ps((float*)c,x); // Store the results back into the C container
-+
-+ a += 2;
-+ c += 2;
-+ }
-+
-+ if((num_points % 2) != 0) {
-+ *c = lv_conj(*a);
-+ }
-+}
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Takes the conjugate of a complex vector.
-+ \param cVector The vector where the results will be stored
-+ \param aVector Vector to be conjugated
-+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
-+ lv_32fc_t* cPtr = cVector;
-+ const lv_32fc_t* aPtr = aVector;
-+ unsigned int number = 0;
-+
-+ for(number = 0; number < num_points; number++){
-+ *cPtr++ = lv_conj(*aPtr++);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+#endif /* INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,295 @@
-+/*!
-+ * \file volk_gnsssdr_32fc_convert_16ic.h
-+ * \brief Volk protokernel: converts float32 complex values to 16 integer complex values taking care of overflow
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H
-+#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE2
-+#include
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
-+ const unsigned int sse_iters = num_points/4;
-+
-+ float* inputVectorPtr = (float*)inputVector;
-+ int16_t* outputVectorPtr = (int16_t*)outputVector;
-+
-+ float min_val = -32768;
-+ float max_val = 32767;
-+
-+ __m128 inputVal1, inputVal2;
-+ __m128i intInputVal1, intInputVal2;
-+ __m128 ret1, ret2;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ for(unsigned int i = 0;i < sse_iters; i++){
-+ inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+
-+ // Clip
-+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
-+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
-+
-+ intInputVal1 = _mm_cvtps_epi32(ret1);
-+ intInputVal2 = _mm_cvtps_epi32(ret2);
-+
-+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-+
-+ _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
-+ outputVectorPtr += 8;
-+ }
-+
-+ for(unsigned int i = 0; i < (num_points%4)*2; i++){
-+ if(inputVectorPtr[i] > max_val)
-+ inputVectorPtr[i] = max_val;
-+ else if(inputVectorPtr[i] < min_val)
-+ inputVectorPtr[i] = min_val;
-+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
-+ }
-+}
-+#endif /* LV_HAVE_SSE2 */
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
-+ const unsigned int sse_iters = num_points/4;
-+
-+ float* inputVectorPtr = (float*)inputVector;
-+ int16_t* outputVectorPtr = (int16_t*)outputVector;
-+
-+ float min_val = -32768;
-+ float max_val = 32767;
-+
-+ __m128 inputVal1, inputVal2;
-+ __m128i intInputVal1, intInputVal2;
-+ __m128 ret1, ret2;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ for(unsigned int i = 0;i < sse_iters; i++){
-+ inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+
-+ // Clip
-+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
-+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
-+
-+ intInputVal1 = _mm_cvtps_epi32(ret1);
-+ intInputVal2 = _mm_cvtps_epi32(ret2);
-+
-+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-+
-+ _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
-+ outputVectorPtr += 8;
-+ }
-+
-+ for(unsigned int i = 0; i < (num_points%4)*2; i++){
-+ if(inputVectorPtr[i] > max_val)
-+ inputVectorPtr[i] = max_val;
-+ else if(inputVectorPtr[i] < min_val)
-+ inputVectorPtr[i] = min_val;
-+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
-+ float* inputVectorPtr = (float*)inputVector;
-+ int16_t* outputVectorPtr = (int16_t*)outputVector;
-+ float min_val = -32768;
-+ float max_val = 32767;
-+
-+ for(unsigned int i = 0; i < num_points*2; i++){
-+ if(inputVectorPtr[i] > max_val)
-+ inputVectorPtr[i] = max_val;
-+ else if(inputVectorPtr[i] < min_val)
-+ inputVectorPtr[i] = min_val;
-+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H */
-+
-+
-+#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H
-+#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE2
-+#include
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
-+ const unsigned int sse_iters = num_points/4;
-+
-+ float* inputVectorPtr = (float*)inputVector;
-+ int16_t* outputVectorPtr = (int16_t*)outputVector;
-+
-+ float min_val = -32768;
-+ float max_val = 32767;
-+
-+ __m128 inputVal1, inputVal2;
-+ __m128i intInputVal1, intInputVal2;
-+ __m128 ret1, ret2;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ for(unsigned int i = 0;i < sse_iters; i++){
-+ inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+
-+ // Clip
-+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
-+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
-+
-+ intInputVal1 = _mm_cvtps_epi32(ret1);
-+ intInputVal2 = _mm_cvtps_epi32(ret2);
-+
-+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-+
-+ _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
-+ outputVectorPtr += 8;
-+ }
-+
-+ for(unsigned int i = 0; i < (num_points%4)*2; i++){
-+ if(inputVectorPtr[i] > max_val)
-+ inputVectorPtr[i] = max_val;
-+ else if(inputVectorPtr[i] < min_val)
-+ inputVectorPtr[i] = min_val;
-+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
-+ }
-+}
-+#endif /* LV_HAVE_SSE2 */
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
-+ const unsigned int sse_iters = num_points/4;
-+
-+ float* inputVectorPtr = (float*)inputVector;
-+ int16_t* outputVectorPtr = (int16_t*)outputVector;
-+
-+ float min_val = -32768;
-+ float max_val = 32767;
-+
-+ __m128 inputVal1, inputVal2;
-+ __m128i intInputVal1, intInputVal2;
-+ __m128 ret1, ret2;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ for(unsigned int i = 0;i < sse_iters; i++){
-+ inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+
-+ // Clip
-+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
-+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
-+
-+ intInputVal1 = _mm_cvtps_epi32(ret1);
-+ intInputVal2 = _mm_cvtps_epi32(ret2);
-+
-+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-+
-+ _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
-+ outputVectorPtr += 8;
-+ }
-+
-+ for(unsigned int i = 0; i < (num_points%4)*2; i++){
-+ if(inputVectorPtr[i] > max_val)
-+ inputVectorPtr[i] = max_val;
-+ else if(inputVectorPtr[i] < min_val)
-+ inputVectorPtr[i] = min_val;
-+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_convert_16ic_a_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
-+ float* inputVectorPtr = (float*)inputVector;
-+ int16_t* outputVectorPtr = (int16_t*)outputVector;
-+ float min_val = -32768;
-+ float max_val = 32767;
-+
-+ for(unsigned int i = 0; i < num_points*2; i++){
-+ if(inputVectorPtr[i] > max_val)
-+ inputVectorPtr[i] = max_val;
-+ else if(inputVectorPtr[i] < min_val)
-+ inputVectorPtr[i] = min_val;
-+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,213 @@
-+/*!
-+ * \file volk_gnsssdr_32fc_convert_8ic.h
-+ * \brief Volk protokernel: converts float32 complex values to 8 integer complex values taking care of overflow
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H
-+#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE2
-+#include
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
-+ const unsigned int sse_iters = num_points/8;
-+
-+ float* inputVectorPtr = (float*)inputVector;
-+ int8_t* outputVectorPtr = (int8_t*)outputVector;
-+
-+ float min_val = -128;
-+ float max_val = 127;
-+
-+ __m128 inputVal1, inputVal2, inputVal3, inputVal4;
-+ __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
-+ __m128i int8InputVal;
-+ __m128 ret1, ret2, ret3, ret4;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ for(unsigned int i = 0;i < sse_iters; i++){
-+ inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal3 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal4 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+
-+ // Clip
-+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
-+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
-+ ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
-+ ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
-+
-+ intInputVal1 = _mm_cvtps_epi32(ret1);
-+ intInputVal2 = _mm_cvtps_epi32(ret2);
-+ intInputVal3 = _mm_cvtps_epi32(ret3);
-+ intInputVal4 = _mm_cvtps_epi32(ret4);
-+
-+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-+ intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
-+ int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
-+
-+ _mm_storeu_si128((__m128i*)outputVectorPtr, int8InputVal);
-+ outputVectorPtr += 16;
-+ }
-+
-+ for(unsigned int i = 0; i < (num_points%4)*4; i++){
-+ if(inputVectorPtr[i] > max_val)
-+ inputVectorPtr[i] = max_val;
-+ else if(inputVectorPtr[i] < min_val)
-+ inputVectorPtr[i] = min_val;
-+ outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
-+ }
-+}
-+#endif /* LV_HAVE_SSE2 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
-+ float* inputVectorPtr = (float*)inputVector;
-+ int8_t* outputVectorPtr = (int8_t*)outputVector;
-+ float min_val = -128;
-+ float max_val = 127;
-+
-+ for(unsigned int i = 0; i < num_points*2; i++){
-+ if(inputVectorPtr[i] > max_val)
-+ inputVectorPtr[i] = max_val;
-+ else if(inputVectorPtr[i] < min_val)
-+ inputVectorPtr[i] = min_val;
-+ outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H */
-+
-+
-+#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H
-+#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE2
-+#include
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
-+ const unsigned int sse_iters = num_points/8;
-+
-+ float* inputVectorPtr = (float*)inputVector;
-+ int8_t* outputVectorPtr = (int8_t*)outputVector;
-+
-+ float min_val = -128;
-+ float max_val = 127;
-+
-+ __m128 inputVal1, inputVal2, inputVal3, inputVal4;
-+ __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
-+ __m128i int8InputVal;
-+ __m128 ret1, ret2, ret3, ret4;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ for(unsigned int i = 0;i < sse_iters; i++){
-+ inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal3 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal4 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+
-+ // Clip
-+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
-+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
-+ ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
-+ ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
-+
-+ intInputVal1 = _mm_cvtps_epi32(ret1);
-+ intInputVal2 = _mm_cvtps_epi32(ret2);
-+ intInputVal3 = _mm_cvtps_epi32(ret3);
-+ intInputVal4 = _mm_cvtps_epi32(ret4);
-+
-+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-+ intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
-+ int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
-+
-+ _mm_store_si128((__m128i*)outputVectorPtr, int8InputVal);
-+ outputVectorPtr += 16;
-+ }
-+
-+ for(unsigned int i = 0; i < (num_points%4)*4; i++){
-+ if(inputVectorPtr[i] > max_val)
-+ inputVectorPtr[i] = max_val;
-+ else if(inputVectorPtr[i] < min_val)
-+ inputVectorPtr[i] = min_val;
-+ outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
-+ }
-+}
-+#endif /* LV_HAVE_SSE2 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_convert_8ic_a_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
-+ float* inputVectorPtr = (float*)inputVector;
-+ int8_t* outputVectorPtr = (int8_t*)outputVector;
-+ float min_val = -128;
-+ float max_val = 127;
-+
-+ for(unsigned int i = 0; i < num_points*2; i++){
-+ if(inputVectorPtr[i] > max_val)
-+ inputVectorPtr[i] = max_val;
-+ else if(inputVectorPtr[i] < min_val)
-+ inputVectorPtr[i] = min_val;
-+ outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_magnitude_squared_32f.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_magnitude_squared_32f.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_magnitude_squared_32f.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_magnitude_squared_32f.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,228 @@
-+#ifndef INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_u_H
-+#define INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_u_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+ /*!
-+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
-+ \param complexVector The vector containing the complex input values
-+ \param magnitudeVector The vector containing the real output values
-+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ const float* complexVectorPtr = (float*)complexVector;
-+ float* magnitudeVectorPtr = magnitudeVector;
-+
-+ __m128 cplxValue1, cplxValue2, result;
-+ for(;number < quarterPoints; number++){
-+ cplxValue1 = _mm_loadu_ps(complexVectorPtr);
-+ complexVectorPtr += 4;
-+
-+ cplxValue2 = _mm_loadu_ps(complexVectorPtr);
-+ complexVectorPtr += 4;
-+
-+ cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
-+ cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
-+
-+ result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
-+
-+ _mm_storeu_ps(magnitudeVectorPtr, result);
-+ magnitudeVectorPtr += 4;
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(; number < num_points; number++){
-+ float val1Real = *complexVectorPtr++;
-+ float val1Imag = *complexVectorPtr++;
-+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-+ }
-+}
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+ /*!
-+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
-+ \param complexVector The vector containing the complex input values
-+ \param magnitudeVector The vector containing the real output values
-+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ const float* complexVectorPtr = (float*)complexVector;
-+ float* magnitudeVectorPtr = magnitudeVector;
-+
-+ __m128 cplxValue1, cplxValue2, iValue, qValue, result;
-+ for(;number < quarterPoints; number++){
-+ cplxValue1 = _mm_loadu_ps(complexVectorPtr);
-+ complexVectorPtr += 4;
-+
-+ cplxValue2 = _mm_loadu_ps(complexVectorPtr);
-+ complexVectorPtr += 4;
-+
-+ // Arrange in i1i2i3i4 format
-+ iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-+ // Arrange in q1q2q3q4 format
-+ qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-+
-+ iValue = _mm_mul_ps(iValue, iValue); // Square the I values
-+ qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
-+
-+ result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
-+
-+ _mm_storeu_ps(magnitudeVectorPtr, result);
-+ magnitudeVectorPtr += 4;
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(; number < num_points; number++){
-+ float val1Real = *complexVectorPtr++;
-+ float val1Imag = *complexVectorPtr++;
-+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
-+ \param complexVector The vector containing the complex input values
-+ \param magnitudeVector The vector containing the real output values
-+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-+ const float* complexVectorPtr = (float*)complexVector;
-+ float* magnitudeVectorPtr = magnitudeVector;
-+ unsigned int number = 0;
-+ for(number = 0; number < num_points; number++){
-+ const float real = *complexVectorPtr++;
-+ const float imag = *complexVectorPtr++;
-+ *magnitudeVectorPtr++ = (real*real) + (imag*imag);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */
-+#ifndef INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_a_H
-+#define INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_a_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+ /*!
-+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
-+ \param complexVector The vector containing the complex input values
-+ \param magnitudeVector The vector containing the real output values
-+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ const float* complexVectorPtr = (float*)complexVector;
-+ float* magnitudeVectorPtr = magnitudeVector;
-+
-+ __m128 cplxValue1, cplxValue2, result;
-+ for(;number < quarterPoints; number++){
-+ cplxValue1 = _mm_load_ps(complexVectorPtr);
-+ complexVectorPtr += 4;
-+
-+ cplxValue2 = _mm_load_ps(complexVectorPtr);
-+ complexVectorPtr += 4;
-+
-+ cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
-+ cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
-+
-+ result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
-+
-+ _mm_store_ps(magnitudeVectorPtr, result);
-+ magnitudeVectorPtr += 4;
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(; number < num_points; number++){
-+ float val1Real = *complexVectorPtr++;
-+ float val1Imag = *complexVectorPtr++;
-+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-+ }
-+}
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_SSE
-+#include
-+ /*!
-+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
-+ \param complexVector The vector containing the complex input values
-+ \param magnitudeVector The vector containing the real output values
-+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int quarterPoints = num_points / 4;
-+
-+ const float* complexVectorPtr = (float*)complexVector;
-+ float* magnitudeVectorPtr = magnitudeVector;
-+
-+ __m128 cplxValue1, cplxValue2, iValue, qValue, result;
-+ for(;number < quarterPoints; number++){
-+ cplxValue1 = _mm_load_ps(complexVectorPtr);
-+ complexVectorPtr += 4;
-+
-+ cplxValue2 = _mm_load_ps(complexVectorPtr);
-+ complexVectorPtr += 4;
-+
-+ // Arrange in i1i2i3i4 format
-+ iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-+ // Arrange in q1q2q3q4 format
-+ qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-+
-+ iValue = _mm_mul_ps(iValue, iValue); // Square the I values
-+ qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
-+
-+ result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
-+
-+ _mm_store_ps(magnitudeVectorPtr, result);
-+ magnitudeVectorPtr += 4;
-+ }
-+
-+ number = quarterPoints * 4;
-+ for(; number < num_points; number++){
-+ float val1Real = *complexVectorPtr++;
-+ float val1Imag = *complexVectorPtr++;
-+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
-+ \param complexVector The vector containing the complex input values
-+ \param magnitudeVector The vector containing the real output values
-+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-+ const float* complexVectorPtr = (float*)complexVector;
-+ float* magnitudeVectorPtr = magnitudeVector;
-+ unsigned int number = 0;
-+ for(number = 0; number < num_points; number++){
-+ const float real = *complexVectorPtr++;
-+ const float imag = *complexVectorPtr++;
-+ *magnitudeVectorPtr++ = (real*real) + (imag*imag);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_convert_8ic.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_convert_8ic.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_convert_8ic.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_convert_8ic.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,231 @@
-+/*!
-+ * \file volk_gnsssdr_32fc_s32f_convert_8ic.h
-+ * \brief Volk protokernel: converts float32 complex values to 8 integer complex values taking care of overflow
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H
-+#define INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE2
-+#include
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_s32f_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
-+ const unsigned int sse_iters = num_points/8;
-+
-+ float* inputVectorPtr = (float*)inputVector;
-+ int8_t* outputVectorPtr = (int8_t*)outputVector;
-+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
-+
-+ float min_val = -128;
-+ float max_val = 127;
-+
-+ __m128 inputVal1, inputVal2, inputVal3, inputVal4;
-+ __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
-+ __m128i int8InputVal;
-+ __m128 ret1, ret2, ret3, ret4;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ for(unsigned int i = 0;i < sse_iters; i++){
-+ inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal3 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal4 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+
-+ inputVal1 = _mm_mul_ps(inputVal1, invScalar);
-+ inputVal2 = _mm_mul_ps(inputVal2, invScalar);
-+ inputVal3 = _mm_mul_ps(inputVal3, invScalar);
-+ inputVal4 = _mm_mul_ps(inputVal4, invScalar);
-+ // Clip
-+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
-+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
-+ ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
-+ ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
-+
-+ intInputVal1 = _mm_cvtps_epi32(ret1);
-+ intInputVal2 = _mm_cvtps_epi32(ret2);
-+ intInputVal3 = _mm_cvtps_epi32(ret3);
-+ intInputVal4 = _mm_cvtps_epi32(ret4);
-+
-+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-+ intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
-+ int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
-+
-+ _mm_storeu_si128((__m128i*)outputVectorPtr, int8InputVal);
-+ outputVectorPtr += 16;
-+ }
-+
-+ float scaled = 0;
-+ for(unsigned int i = 0; i < (num_points%4)*4; i++){
-+ scaled = inputVectorPtr[i]/scalar;
-+ if(scaled > max_val)
-+ scaled = max_val;
-+ else if(scaled < min_val)
-+ scaled = min_val;
-+ outputVectorPtr[i] = (int8_t)rintf(scaled);
-+ }
-+}
-+#endif /* LV_HAVE_SSE2 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_s32f_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
-+ float* inputVectorPtr = (float*)inputVector;
-+ int8_t* outputVectorPtr = (int8_t*)outputVector;
-+ float scaled = 0;
-+ float min_val = -128;
-+ float max_val = 127;
-+
-+ for(unsigned int i = 0; i < num_points*2; i++){
-+ scaled = (inputVectorPtr[i])/scalar;
-+ if(scaled > max_val)
-+ scaled = max_val;
-+ else if(scaled < min_val)
-+ scaled = min_val;
-+ outputVectorPtr[i] = (int8_t)rintf(scaled);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H */
-+
-+
-+#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H
-+#define INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE2
-+#include
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_s32f_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
-+ const unsigned int sse_iters = num_points/8;
-+
-+ float* inputVectorPtr = (float*)inputVector;
-+ int8_t* outputVectorPtr = (int8_t*)outputVector;
-+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
-+
-+ float min_val = -128;
-+ float max_val = 127;
-+
-+ __m128 inputVal1, inputVal2, inputVal3, inputVal4;
-+ __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
-+ __m128i int8InputVal;
-+ __m128 ret1, ret2, ret3, ret4;
-+ __m128 vmin_val = _mm_set_ps1(min_val);
-+ __m128 vmax_val = _mm_set_ps1(max_val);
-+
-+ for(unsigned int i = 0;i < sse_iters; i++){
-+ inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal3 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+ inputVal4 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
-+
-+ inputVal1 = _mm_mul_ps(inputVal1, invScalar);
-+ inputVal2 = _mm_mul_ps(inputVal2, invScalar);
-+ inputVal3 = _mm_mul_ps(inputVal3, invScalar);
-+ inputVal4 = _mm_mul_ps(inputVal4, invScalar);
-+ // Clip
-+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
-+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
-+ ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
-+ ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
-+
-+ intInputVal1 = _mm_cvtps_epi32(ret1);
-+ intInputVal2 = _mm_cvtps_epi32(ret2);
-+ intInputVal3 = _mm_cvtps_epi32(ret3);
-+ intInputVal4 = _mm_cvtps_epi32(ret4);
-+
-+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-+ intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
-+ int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
-+
-+ _mm_store_si128((__m128i*)outputVectorPtr, int8InputVal);
-+ outputVectorPtr += 16;
-+ }
-+
-+ float scaled = 0;
-+ for(unsigned int i = 0; i < (num_points%4)*4; i++){
-+ scaled = inputVectorPtr[i]/scalar;
-+ if(scaled > max_val)
-+ scaled = max_val;
-+ else if(scaled < min_val)
-+ scaled = min_val;
-+ outputVectorPtr[i] = (int8_t)rintf(scaled);
-+ }
-+}
-+#endif /* LV_HAVE_SSE2 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
-+ \param inputVector The floating point input data buffer
-+ \param outputVector The 16 bit output data buffer
-+ \param num_points The number of data values to be converted
-+ */
-+static inline void volk_gnsssdr_32fc_s32f_convert_8ic_a_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
-+ float* inputVectorPtr = (float*)inputVector;
-+ int8_t* outputVectorPtr = (int8_t*)outputVector;
-+ float scaled = 0;
-+ float min_val = -128;
-+ float max_val = 127;
-+
-+ for(unsigned int i = 0; i < num_points*2; i++){
-+ scaled = inputVectorPtr[i]/scalar;
-+ if(scaled > max_val)
-+ scaled = max_val;
-+ else if(scaled < min_val)
-+ scaled = min_val;
-+ outputVectorPtr[i] = (int8_t)rintf(scaled);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,266 @@
-+/*!
-+ * \file volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc
-+ * \brief Volk protokernel: replaces the tracking function for update_local_code
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * Volk protokernel that replaces the tracking function for update_local_code
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H
-+#define INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include
-+ /*!
-+ \brief Takes the conjugate of a complex vector.
-+ \param cVector The vector where the results will be stored
-+ \param aVector Vector to be conjugated
-+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_sse4_1(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points){
-+
-+// float* pointer1 = (float*)&d_very_early_late_spc_chips;
-+// *pointer1 = 1;
-+// float* pointer2 = (float*)&code_length_half_chips;
-+// *pointer2 = 6;
-+// float* pointer3 = (float*)&code_phase_step_half_chips;
-+// *pointer3 = 7;
-+// float* pointer4 = (float*)&tcode_half_chips_input;
-+// *pointer4 = 8;
-+
-+ const unsigned int sse_iters = num_points / 4;
-+
-+ __m128 tquot, fmod_num, fmod_result, associated_chip_index_array;
-+
-+ __m128 tcode_half_chips_array = _mm_set_ps (tcode_half_chips_input+3*code_phase_step_half_chips, tcode_half_chips_input+2*code_phase_step_half_chips, tcode_half_chips_input+code_phase_step_half_chips, tcode_half_chips_input);
-+ __m128 code_phase_step_half_chips_array = _mm_set1_ps (code_phase_step_half_chips*4);
-+ __m128 d_very_early_late_spc_chips_Multiplied_by_2 = _mm_set1_ps (2*d_very_early_late_spc_chips);
-+ __m128 code_length_half_chips_array = _mm_set1_ps (code_length_half_chips);
-+ __m128 twos = _mm_set1_ps (2);
-+ __m128i associated_chip_index_array_int;
-+
-+ __VOLK_ATTR_ALIGNED(16) int32_t output[4];
-+
-+ for (unsigned int i = 0; i < sse_iters; i++)
-+ {
-+ //fmod = numer - tquot * denom; tquot = numer/denom truncated
-+ //associated_chip_index = 2 + round(fmod(tcode_half_chips - 2*d_very_early_late_spc_chips, code_length_half_chips));
-+ fmod_num = _mm_sub_ps (tcode_half_chips_array, d_very_early_late_spc_chips_Multiplied_by_2);
-+ tquot = _mm_div_ps (fmod_num, code_length_half_chips_array);
-+ tquot = _mm_round_ps (tquot, (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) );
-+ fmod_result = _mm_sub_ps (fmod_num, _mm_mul_ps (tquot, code_length_half_chips_array));
-+
-+ associated_chip_index_array = _mm_round_ps (fmod_result, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC));
-+ associated_chip_index_array = _mm_add_ps(twos, associated_chip_index_array);
-+ associated_chip_index_array_int = _mm_cvtps_epi32 (associated_chip_index_array);
-+ _mm_storeu_si128 ((__m128i*)output, associated_chip_index_array_int);
-+
-+ //d_very_early_code[i] = d_ca_code[associated_chip_index];
-+ *d_very_early_code++ = d_ca_code[output[0]];
-+ *d_very_early_code++ = d_ca_code[output[1]];
-+ *d_very_early_code++ = d_ca_code[output[2]];
-+ *d_very_early_code++ = d_ca_code[output[3]];
-+
-+ //tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
-+ tcode_half_chips_array = _mm_add_ps (tcode_half_chips_array, code_phase_step_half_chips_array);
-+ }
-+
-+ if (num_points%4!=0)
-+ {
-+ __VOLK_ATTR_ALIGNED(16) float tcode_half_chips_stored[4];
-+ _mm_storeu_si128 ((__m128i*)tcode_half_chips_stored, tcode_half_chips_array);
-+
-+ int associated_chip_index;
-+ float tcode_half_chips = tcode_half_chips_stored[0];
-+ float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
-+
-+ for (unsigned int i = 0; i < num_points%4; i++)
-+ {
-+ associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
-+ d_very_early_code[i] = d_ca_code[associated_chip_index];
-+ tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
-+ }
-+ }
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Takes the conjugate of a complex vector.
-+ \param cVector The vector where the results will be stored
-+ \param aVector Vector to be conjugated
-+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_generic(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points){
-+
-+ float* pointer1 = (float*)&d_very_early_late_spc_chips;
-+ *pointer1 = 1;
-+ float* pointer2 = (float*)&code_length_half_chips;
-+ *pointer2 = 6;
-+ float* pointer3 = (float*)&code_phase_step_half_chips;
-+ *pointer3 = 7;
-+ float* pointer4 = (float*)&tcode_half_chips_input;
-+ *pointer4 = 8;
-+
-+ int associated_chip_index;
-+ float tcode_half_chips = tcode_half_chips_input;
-+ float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
-+
-+ for (unsigned int i = 0; i < num_points; i++)
-+ {
-+ associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
-+ d_very_early_code[i] = d_ca_code[associated_chip_index];
-+ tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H */
-+#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_H
-+#define INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include
-+ /*!
-+ \brief Takes the conjugate of a complex vector.
-+ \param cVector The vector where the results will be stored
-+ \param aVector Vector to be conjugated
-+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_sse4_1(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points){
-+
-+ // float* pointer1 = (float*)&d_very_early_late_spc_chips;
-+ // *pointer1 = 1;
-+ // float* pointer2 = (float*)&code_length_half_chips;
-+ // *pointer2 = 6;
-+ // float* pointer3 = (float*)&code_phase_step_half_chips;
-+ // *pointer3 = 7;
-+ // float* pointer4 = (float*)&tcode_half_chips_input;
-+ // *pointer4 = 8;
-+
-+ const unsigned int sse_iters = num_points / 4;
-+
-+ __m128 tquot, fmod_num, fmod_result, associated_chip_index_array;
-+
-+ __m128 tcode_half_chips_array = _mm_set_ps (tcode_half_chips_input+3*code_phase_step_half_chips, tcode_half_chips_input+2*code_phase_step_half_chips, tcode_half_chips_input+code_phase_step_half_chips, tcode_half_chips_input);
-+ __m128 code_phase_step_half_chips_array = _mm_set1_ps (code_phase_step_half_chips*4);
-+ __m128 d_very_early_late_spc_chips_Multiplied_by_2 = _mm_set1_ps (2*d_very_early_late_spc_chips);
-+ __m128 code_length_half_chips_array = _mm_set1_ps (code_length_half_chips);
-+ __m128 twos = _mm_set1_ps (2);
-+ __m128i associated_chip_index_array_int;
-+
-+ __VOLK_ATTR_ALIGNED(16) int32_t output[4];
-+
-+ for (unsigned int i = 0; i < sse_iters; i++)
-+ {
-+ //fmod = numer - tquot * denom; tquot = numer/denom truncated
-+ //associated_chip_index = 2 + round(fmod(tcode_half_chips - 2*d_very_early_late_spc_chips, code_length_half_chips));
-+ fmod_num = _mm_sub_ps (tcode_half_chips_array, d_very_early_late_spc_chips_Multiplied_by_2);
-+ tquot = _mm_div_ps (fmod_num, code_length_half_chips_array);
-+ tquot = _mm_round_ps (tquot, (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) );
-+ fmod_result = _mm_sub_ps (fmod_num, _mm_mul_ps (tquot, code_length_half_chips_array));
-+
-+ associated_chip_index_array = _mm_round_ps (fmod_result, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC));
-+ associated_chip_index_array = _mm_add_ps(twos, associated_chip_index_array);
-+ associated_chip_index_array_int = _mm_cvtps_epi32 (associated_chip_index_array);
-+ _mm_store_si128 ((__m128i*)output, associated_chip_index_array_int);
-+
-+ //d_very_early_code[i] = d_ca_code[associated_chip_index];
-+ *d_very_early_code++ = d_ca_code[output[0]];
-+ *d_very_early_code++ = d_ca_code[output[1]];
-+ *d_very_early_code++ = d_ca_code[output[2]];
-+ *d_very_early_code++ = d_ca_code[output[3]];
-+
-+ //tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
-+ tcode_half_chips_array = _mm_add_ps (tcode_half_chips_array, code_phase_step_half_chips_array);
-+ }
-+
-+ if (num_points%4!=0)
-+ {
-+ __VOLK_ATTR_ALIGNED(16) float tcode_half_chips_stored[4];
-+ _mm_store_si128 ((__m128i*)tcode_half_chips_stored, tcode_half_chips_array);
-+
-+ int associated_chip_index;
-+ float tcode_half_chips = tcode_half_chips_stored[0];
-+ float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
-+
-+ for (unsigned int i = 0; i < num_points%4; i++)
-+ {
-+ associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
-+ d_very_early_code[i] = d_ca_code[associated_chip_index];
-+ tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
-+ }
-+ }
-+
-+}
-+#endif /* LV_HAVE_SSE4_1 */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Takes the conjugate of a complex vector.
-+ \param cVector The vector where the results will be stored
-+ \param aVector Vector to be conjugated
-+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_generic(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points){
-+
-+ // float* pointer1 = (float*)&d_very_early_late_spc_chips;
-+ // *pointer1 = 1;
-+ // float* pointer2 = (float*)&code_length_half_chips;
-+ // *pointer2 = 6;
-+ // float* pointer3 = (float*)&code_phase_step_half_chips;
-+ // *pointer3 = 7;
-+ // float* pointer4 = (float*)&tcode_half_chips_input;
-+ // *pointer4 = 8;
-+
-+ int associated_chip_index;
-+ float tcode_half_chips = tcode_half_chips_input;
-+ float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
-+
-+ for (unsigned int i = 0; i < num_points; i++)
-+ {
-+ associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
-+ d_very_early_code[i] = d_ca_code[associated_chip_index];
-+ tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32fc_multiply_32fc.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32fc_multiply_32fc.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32fc_multiply_32fc.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32fc_multiply_32fc.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,178 @@
-+#ifndef INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_u_H
-+#define INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_u_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+/*!
-+ \brief Multiplies the input vector by a scalar and stores the results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector The vector to be multiplied
-+ \param scalar The complex scalar to multiply aVector
-+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-+*/
-+static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 2;
-+
-+ __m128 x, yl, yh, z, tmp1, tmp2;
-+ lv_32fc_t* c = cVector;
-+ const lv_32fc_t* a = aVector;
-+
-+ // Set up constant scalar vector
-+ yl = _mm_set_ps1(lv_creal(scalar));
-+ yh = _mm_set_ps1(lv_cimag(scalar));
-+
-+ for(;number < halfPoints; number++){
-+
-+ x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ _mm_storeu_ps((float*)c,z); // Store the results back into the C container
-+
-+ a += 2;
-+ c += 2;
-+ }
-+
-+ if((num_points % 2) != 0) {
-+ *c = (*a) * scalar;
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Multiplies the input vector by a scalar and stores the results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector The vector to be multiplied
-+ \param scalar The complex scalar to multiply aVector
-+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-+*/
-+static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-+ lv_32fc_t* cPtr = cVector;
-+ const lv_32fc_t* aPtr = aVector;
-+ unsigned int number = num_points;
-+
-+ // unwrap loop
-+ while (number >= 8){
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ number -= 8;
-+ }
-+
-+ // clean up any remaining
-+ while (number-- > 0)
-+ *cPtr++ = *aPtr++ * scalar;
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
-+#ifndef INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_a_H
-+#define INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+ /*!
-+ \brief Multiplies the two input complex vectors and stores their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be multiplied
-+ \param bVector One of the vectors to be multiplied
-+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 2;
-+
-+ __m128 x, yl, yh, z, tmp1, tmp2;
-+ lv_32fc_t* c = cVector;
-+ const lv_32fc_t* a = aVector;
-+
-+ // Set up constant scalar vector
-+ yl = _mm_set_ps1(lv_creal(scalar));
-+ yh = _mm_set_ps1(lv_cimag(scalar));
-+
-+ for(;number < halfPoints; number++){
-+
-+ x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ _mm_store_ps((float*)c,z); // Store the results back into the C container
-+
-+ a += 2;
-+ c += 2;
-+ }
-+
-+ if((num_points % 2) != 0) {
-+ *c = (*a) * scalar;
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Multiplies the two input complex vectors and stores their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be multiplied
-+ \param bVector One of the vectors to be multiplied
-+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-+ lv_32fc_t* cPtr = cVector;
-+ const lv_32fc_t* aPtr = aVector;
-+ unsigned int number = num_points;
-+
-+ // unwrap loop
-+ while (number >= 8){
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ *cPtr++ = (*aPtr++) * scalar;
-+ number -= 8;
-+ }
-+
-+ // clean up any remaining
-+ while (number-- > 0)
-+ *cPtr++ = *aPtr++ * scalar;
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+
-+
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_dot_prod_32fc.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_dot_prod_32fc.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_dot_prod_32fc.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_dot_prod_32fc.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,763 @@
-+#ifndef INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H
-+#define INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+
-+#ifdef LV_HAVE_GENERIC
-+
-+
-+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-+
-+ float * res = (float*) result;
-+ float * in = (float*) input;
-+ float * tp = (float*) taps;
-+ unsigned int n_2_ccomplex_blocks = num_points/2;
-+ unsigned int isodd = num_points & 1;
-+
-+ float sum0[2] = {0,0};
-+ float sum1[2] = {0,0};
-+ unsigned int i = 0;
-+
-+ for(i = 0; i < n_2_ccomplex_blocks; ++i) {
-+ sum0[0] += in[0] * tp[0] - in[1] * tp[1];
-+ sum0[1] += in[0] * tp[1] + in[1] * tp[0];
-+ sum1[0] += in[2] * tp[2] - in[3] * tp[3];
-+ sum1[1] += in[2] * tp[3] + in[3] * tp[2];
-+
-+ in += 4;
-+ tp += 4;
-+ }
-+
-+ res[0] = sum0[0] + sum1[0];
-+ res[1] = sum0[1] + sum1[1];
-+
-+ // Cleanup if we had an odd number of points
-+ for(i = 0; i < isodd; ++i) {
-+ *result += input[num_points - 1] * taps[num_points - 1];
-+ }
-+}
-+
-+#endif /*LV_HAVE_GENERIC*/
-+
-+
-+
-+#if LV_HAVE_SSE && LV_HAVE_64
-+
-+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-+
-+ const unsigned int num_bytes = num_points*8;
-+ unsigned int isodd = num_points & 1;
-+
-+ asm
-+ (
-+ "# ccomplex_dotprod_generic (float* result, const float *input,\n\t"
-+ "# const float *taps, unsigned num_bytes)\n\t"
-+ "# float sum0 = 0;\n\t"
-+ "# float sum1 = 0;\n\t"
-+ "# float sum2 = 0;\n\t"
-+ "# float sum3 = 0;\n\t"
-+ "# do {\n\t"
-+ "# sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
-+ "# sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
-+ "# sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
-+ "# sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
-+ "# input += 4;\n\t"
-+ "# taps += 4; \n\t"
-+ "# } while (--n_2_ccomplex_blocks != 0);\n\t"
-+ "# result[0] = sum0 + sum2;\n\t"
-+ "# result[1] = sum1 + sum3;\n\t"
-+ "# TODO: prefetch and better scheduling\n\t"
-+ " xor %%r9, %%r9\n\t"
-+ " xor %%r10, %%r10\n\t"
-+ " movq %%rcx, %%rax\n\t"
-+ " movq %%rcx, %%r8\n\t"
-+ " movq %[rsi], %%r9\n\t"
-+ " movq %[rdx], %%r10\n\t"
-+ " xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
-+ " movups 0(%%r9), %%xmm0\n\t"
-+ " xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
-+ " movups 0(%%r10), %%xmm2\n\t"
-+ " shr $5, %%rax # rax = n_2_ccomplex_blocks / 2\n\t"
-+ " shr $4, %%r8\n\t"
-+ " jmp .%=L1_test\n\t"
-+ " # 4 taps / loop\n\t"
-+ " # something like ?? cycles / loop\n\t"
-+ ".%=Loop1: \n\t"
-+ "# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
-+ "# movups (%%r9), %%xmmA\n\t"
-+ "# movups (%%r10), %%xmmB\n\t"
-+ "# movups %%xmmA, %%xmmZ\n\t"
-+ "# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
-+ "# mulps %%xmmB, %%xmmA\n\t"
-+ "# mulps %%xmmZ, %%xmmB\n\t"
-+ "# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
-+ "# xorps %%xmmPN, %%xmmA\n\t"
-+ "# movups %%xmmA, %%xmmZ\n\t"
-+ "# unpcklps %%xmmB, %%xmmA\n\t"
-+ "# unpckhps %%xmmB, %%xmmZ\n\t"
-+ "# movups %%xmmZ, %%xmmY\n\t"
-+ "# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
-+ "# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
-+ "# addps %%xmmZ, %%xmmA\n\t"
-+ "# addps %%xmmA, %%xmmC\n\t"
-+ "# A=xmm0, B=xmm2, Z=xmm4\n\t"
-+ "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
-+ " movups 16(%%r9), %%xmm1\n\t"
-+ " movups %%xmm0, %%xmm4\n\t"
-+ " mulps %%xmm2, %%xmm0\n\t"
-+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
-+ " movups 16(%%r10), %%xmm3\n\t"
-+ " movups %%xmm1, %%xmm5\n\t"
-+ " addps %%xmm0, %%xmm6\n\t"
-+ " mulps %%xmm3, %%xmm1\n\t"
-+ " shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
-+ " addps %%xmm1, %%xmm6\n\t"
-+ " mulps %%xmm4, %%xmm2\n\t"
-+ " movups 32(%%r9), %%xmm0\n\t"
-+ " addps %%xmm2, %%xmm7\n\t"
-+ " mulps %%xmm5, %%xmm3\n\t"
-+ " add $32, %%r9\n\t"
-+ " movups 32(%%r10), %%xmm2\n\t"
-+ " addps %%xmm3, %%xmm7\n\t"
-+ " add $32, %%r10\n\t"
-+ ".%=L1_test:\n\t"
-+ " dec %%rax\n\t"
-+ " jge .%=Loop1\n\t"
-+ " # We've handled the bulk of multiplies up to here.\n\t"
-+ " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
-+ " # If so, we've got 2 more taps to do.\n\t"
-+ " and $1, %%r8\n\t"
-+ " je .%=Leven\n\t"
-+ " # The count was odd, do 2 more taps.\n\t"
-+ " # Note that we've already got mm0/mm2 preloaded\n\t"
-+ " # from the main loop.\n\t"
-+ " movups %%xmm0, %%xmm4\n\t"
-+ " mulps %%xmm2, %%xmm0\n\t"
-+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
-+ " addps %%xmm0, %%xmm6\n\t"
-+ " mulps %%xmm4, %%xmm2\n\t"
-+ " addps %%xmm2, %%xmm7\n\t"
-+ ".%=Leven:\n\t"
-+ " # neg inversor\n\t"
-+ " xorps %%xmm1, %%xmm1\n\t"
-+ " mov $0x80000000, %%r9\n\t"
-+ " movd %%r9, %%xmm1\n\t"
-+ " shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
-+ " # pfpnacc\n\t"
-+ " xorps %%xmm1, %%xmm6\n\t"
-+ " movups %%xmm6, %%xmm2\n\t"
-+ " unpcklps %%xmm7, %%xmm6\n\t"
-+ " unpckhps %%xmm7, %%xmm2\n\t"
-+ " movups %%xmm2, %%xmm3\n\t"
-+ " shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
-+ " shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
-+ " addps %%xmm2, %%xmm6\n\t"
-+ " # xmm6 = r1 i2 r3 i4\n\t"
-+ " movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
-+ " addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
-+ " movlps %%xmm6, (%[rdi]) # store low 2x32 bits (complex) to memory\n\t"
-+ :
-+ :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
-+ :"rax", "r8", "r9", "r10"
-+ );
-+
-+
-+ if(isodd) {
-+ *result += input[num_points - 1] * taps[num_points - 1];
-+ }
-+
-+ return;
-+
-+}
-+
-+#endif /* LV_HAVE_SSE && LV_HAVE_64 */
-+
-+
-+
-+
-+#ifdef LV_HAVE_SSE3
-+
-+#include
-+
-+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-+
-+ lv_32fc_t dotProduct;
-+ memset(&dotProduct, 0x0, 2*sizeof(float));
-+
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points/2;
-+ unsigned int isodd = num_points & 1;
-+
-+ __m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
-+
-+ const lv_32fc_t* a = input;
-+ const lv_32fc_t* b = taps;
-+
-+ dotProdVal = _mm_setzero_ps();
-+
-+ for(;number < halfPoints; number++){
-+
-+ x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together
-+
-+ a += 2;
-+ b += 2;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
-+
-+ _mm_storeu_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector
-+
-+ dotProduct += ( dotProductVector[0] + dotProductVector[1] );
-+
-+ if(isodd) {
-+ dotProduct += input[num_points - 1] * taps[num_points - 1];
-+ }
-+
-+ *result = dotProduct;
-+}
-+
-+#endif /*LV_HAVE_SSE3*/
-+
-+#ifdef LV_HAVE_SSE4_1
-+
-+#include
-+
-+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-+
-+ unsigned int i = 0;
-+ const unsigned int qtr_points = num_points/4;
-+ const unsigned int isodd = num_points & 3;
-+
-+ __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
-+ float *p_input, *p_taps;
-+ __m64 *p_result;
-+
-+ p_result = (__m64*)result;
-+ p_input = (float*)input;
-+ p_taps = (float*)taps;
-+
-+ static const __m128i neg = {0x000000000000000080000000};
-+
-+ real0 = _mm_setzero_ps();
-+ real1 = _mm_setzero_ps();
-+ im0 = _mm_setzero_ps();
-+ im1 = _mm_setzero_ps();
-+
-+ for(; i < qtr_points; ++i) {
-+ xmm0 = _mm_loadu_ps(p_input);
-+ xmm1 = _mm_loadu_ps(p_taps);
-+
-+ p_input += 4;
-+ p_taps += 4;
-+
-+ xmm2 = _mm_loadu_ps(p_input);
-+ xmm3 = _mm_loadu_ps(p_taps);
-+
-+ p_input += 4;
-+ p_taps += 4;
-+
-+ xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
-+ xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
-+ xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
-+ xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
-+
-+ //imaginary vector from input
-+ xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
-+ //real vector from input
-+ xmm3 = _mm_unpacklo_ps(xmm0, xmm4);
-+ //imaginary vector from taps
-+ xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
-+ //real vector from taps
-+ xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
-+
-+ xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
-+ xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
-+
-+ xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
-+ xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
-+
-+ real0 = _mm_add_ps(xmm4, real0);
-+ real1 = _mm_add_ps(xmm5, real1);
-+ im0 = _mm_add_ps(xmm6, im0);
-+ im1 = _mm_add_ps(xmm7, im1);
-+ }
-+
-+ real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec);
-+
-+ im0 = _mm_add_ps(im0, im1);
-+ real0 = _mm_add_ps(real0, real1);
-+
-+ im0 = _mm_add_ps(im0, real0);
-+
-+ _mm_storel_pi(p_result, im0);
-+
-+ for(i = num_points-isodd; i < num_points; i++) {
-+ *result += input[i] * taps[i];
-+ }
-+}
-+
-+#endif /*LV_HAVE_SSE4_1*/
-+
-+
-+
-+
-+#endif /*INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H*/
-+#ifndef INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H
-+#define INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+
-+#ifdef LV_HAVE_GENERIC
-+
-+
-+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-+
-+ const unsigned int num_bytes = num_points*8;
-+
-+ float * res = (float*) result;
-+ float * in = (float*) input;
-+ float * tp = (float*) taps;
-+ unsigned int n_2_ccomplex_blocks = num_bytes >> 4;
-+ unsigned int isodd = num_points & 1;
-+
-+ float sum0[2] = {0,0};
-+ float sum1[2] = {0,0};
-+ unsigned int i = 0;
-+
-+ for(i = 0; i < n_2_ccomplex_blocks; ++i) {
-+ sum0[0] += in[0] * tp[0] - in[1] * tp[1];
-+ sum0[1] += in[0] * tp[1] + in[1] * tp[0];
-+ sum1[0] += in[2] * tp[2] - in[3] * tp[3];
-+ sum1[1] += in[2] * tp[3] + in[3] * tp[2];
-+
-+ in += 4;
-+ tp += 4;
-+ }
-+
-+ res[0] = sum0[0] + sum1[0];
-+ res[1] = sum0[1] + sum1[1];
-+
-+ for(i = 0; i < isodd; ++i) {
-+ *result += input[num_points - 1] * taps[num_points - 1];
-+ }
-+}
-+
-+#endif /*LV_HAVE_GENERIC*/
-+
-+
-+#if LV_HAVE_SSE && LV_HAVE_64
-+
-+
-+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-+
-+ const unsigned int num_bytes = num_points*8;
-+ unsigned int isodd = num_points & 1;
-+
-+ asm
-+ (
-+ "# ccomplex_dotprod_generic (float* result, const float *input,\n\t"
-+ "# const float *taps, unsigned num_bytes)\n\t"
-+ "# float sum0 = 0;\n\t"
-+ "# float sum1 = 0;\n\t"
-+ "# float sum2 = 0;\n\t"
-+ "# float sum3 = 0;\n\t"
-+ "# do {\n\t"
-+ "# sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
-+ "# sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
-+ "# sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
-+ "# sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
-+ "# input += 4;\n\t"
-+ "# taps += 4; \n\t"
-+ "# } while (--n_2_ccomplex_blocks != 0);\n\t"
-+ "# result[0] = sum0 + sum2;\n\t"
-+ "# result[1] = sum1 + sum3;\n\t"
-+ "# TODO: prefetch and better scheduling\n\t"
-+ " xor %%r9, %%r9\n\t"
-+ " xor %%r10, %%r10\n\t"
-+ " movq %%rcx, %%rax\n\t"
-+ " movq %%rcx, %%r8\n\t"
-+ " movq %[rsi], %%r9\n\t"
-+ " movq %[rdx], %%r10\n\t"
-+ " xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
-+ " movaps 0(%%r9), %%xmm0\n\t"
-+ " xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
-+ " movaps 0(%%r10), %%xmm2\n\t"
-+ " shr $5, %%rax # rax = n_2_ccomplex_blocks / 2\n\t"
-+ " shr $4, %%r8\n\t"
-+ " jmp .%=L1_test\n\t"
-+ " # 4 taps / loop\n\t"
-+ " # something like ?? cycles / loop\n\t"
-+ ".%=Loop1: \n\t"
-+ "# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
-+ "# movaps (%%r9), %%xmmA\n\t"
-+ "# movaps (%%r10), %%xmmB\n\t"
-+ "# movaps %%xmmA, %%xmmZ\n\t"
-+ "# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
-+ "# mulps %%xmmB, %%xmmA\n\t"
-+ "# mulps %%xmmZ, %%xmmB\n\t"
-+ "# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
-+ "# xorps %%xmmPN, %%xmmA\n\t"
-+ "# movaps %%xmmA, %%xmmZ\n\t"
-+ "# unpcklps %%xmmB, %%xmmA\n\t"
-+ "# unpckhps %%xmmB, %%xmmZ\n\t"
-+ "# movaps %%xmmZ, %%xmmY\n\t"
-+ "# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
-+ "# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
-+ "# addps %%xmmZ, %%xmmA\n\t"
-+ "# addps %%xmmA, %%xmmC\n\t"
-+ "# A=xmm0, B=xmm2, Z=xmm4\n\t"
-+ "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
-+ " movaps 16(%%r9), %%xmm1\n\t"
-+ " movaps %%xmm0, %%xmm4\n\t"
-+ " mulps %%xmm2, %%xmm0\n\t"
-+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
-+ " movaps 16(%%r10), %%xmm3\n\t"
-+ " movaps %%xmm1, %%xmm5\n\t"
-+ " addps %%xmm0, %%xmm6\n\t"
-+ " mulps %%xmm3, %%xmm1\n\t"
-+ " shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
-+ " addps %%xmm1, %%xmm6\n\t"
-+ " mulps %%xmm4, %%xmm2\n\t"
-+ " movaps 32(%%r9), %%xmm0\n\t"
-+ " addps %%xmm2, %%xmm7\n\t"
-+ " mulps %%xmm5, %%xmm3\n\t"
-+ " add $32, %%r9\n\t"
-+ " movaps 32(%%r10), %%xmm2\n\t"
-+ " addps %%xmm3, %%xmm7\n\t"
-+ " add $32, %%r10\n\t"
-+ ".%=L1_test:\n\t"
-+ " dec %%rax\n\t"
-+ " jge .%=Loop1\n\t"
-+ " # We've handled the bulk of multiplies up to here.\n\t"
-+ " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
-+ " # If so, we've got 2 more taps to do.\n\t"
-+ " and $1, %%r8\n\t"
-+ " je .%=Leven\n\t"
-+ " # The count was odd, do 2 more taps.\n\t"
-+ " # Note that we've already got mm0/mm2 preloaded\n\t"
-+ " # from the main loop.\n\t"
-+ " movaps %%xmm0, %%xmm4\n\t"
-+ " mulps %%xmm2, %%xmm0\n\t"
-+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
-+ " addps %%xmm0, %%xmm6\n\t"
-+ " mulps %%xmm4, %%xmm2\n\t"
-+ " addps %%xmm2, %%xmm7\n\t"
-+ ".%=Leven:\n\t"
-+ " # neg inversor\n\t"
-+ " xorps %%xmm1, %%xmm1\n\t"
-+ " mov $0x80000000, %%r9\n\t"
-+ " movd %%r9, %%xmm1\n\t"
-+ " shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
-+ " # pfpnacc\n\t"
-+ " xorps %%xmm1, %%xmm6\n\t"
-+ " movaps %%xmm6, %%xmm2\n\t"
-+ " unpcklps %%xmm7, %%xmm6\n\t"
-+ " unpckhps %%xmm7, %%xmm2\n\t"
-+ " movaps %%xmm2, %%xmm3\n\t"
-+ " shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
-+ " shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
-+ " addps %%xmm2, %%xmm6\n\t"
-+ " # xmm6 = r1 i2 r3 i4\n\t"
-+ " movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
-+ " addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
-+ " movlps %%xmm6, (%[rdi]) # store low 2x32 bits (complex) to memory\n\t"
-+ :
-+ :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
-+ :"rax", "r8", "r9", "r10"
-+ );
-+
-+
-+ if(isodd) {
-+ *result += input[num_points - 1] * taps[num_points - 1];
-+ }
-+
-+ return;
-+
-+}
-+
-+#endif
-+
-+#if LV_HAVE_SSE && LV_HAVE_32
-+
-+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-+
-+ volk_gnsssdr_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_points);
-+
-+#if 0
-+ const unsigned int num_bytes = num_points*8;
-+ unsigned int isodd = num_points & 1;
-+
-+ asm volatile
-+ (
-+ " #pushl %%ebp\n\t"
-+ " #movl %%esp, %%ebp\n\t"
-+ " movl 12(%%ebp), %%eax # input\n\t"
-+ " movl 16(%%ebp), %%edx # taps\n\t"
-+ " movl 20(%%ebp), %%ecx # n_bytes\n\t"
-+ " xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
-+ " movaps 0(%%eax), %%xmm0\n\t"
-+ " xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
-+ " movaps 0(%%edx), %%xmm2\n\t"
-+ " shrl $5, %%ecx # ecx = n_2_ccomplex_blocks / 2\n\t"
-+ " jmp .%=L1_test\n\t"
-+ " # 4 taps / loop\n\t"
-+ " # something like ?? cycles / loop\n\t"
-+ ".%=Loop1: \n\t"
-+ "# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
-+ "# movaps (%%eax), %%xmmA\n\t"
-+ "# movaps (%%edx), %%xmmB\n\t"
-+ "# movaps %%xmmA, %%xmmZ\n\t"
-+ "# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
-+ "# mulps %%xmmB, %%xmmA\n\t"
-+ "# mulps %%xmmZ, %%xmmB\n\t"
-+ "# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
-+ "# xorps %%xmmPN, %%xmmA\n\t"
-+ "# movaps %%xmmA, %%xmmZ\n\t"
-+ "# unpcklps %%xmmB, %%xmmA\n\t"
-+ "# unpckhps %%xmmB, %%xmmZ\n\t"
-+ "# movaps %%xmmZ, %%xmmY\n\t"
-+ "# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
-+ "# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
-+ "# addps %%xmmZ, %%xmmA\n\t"
-+ "# addps %%xmmA, %%xmmC\n\t"
-+ "# A=xmm0, B=xmm2, Z=xmm4\n\t"
-+ "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
-+ " movaps 16(%%eax), %%xmm1\n\t"
-+ " movaps %%xmm0, %%xmm4\n\t"
-+ " mulps %%xmm2, %%xmm0\n\t"
-+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
-+ " movaps 16(%%edx), %%xmm3\n\t"
-+ " movaps %%xmm1, %%xmm5\n\t"
-+ " addps %%xmm0, %%xmm6\n\t"
-+ " mulps %%xmm3, %%xmm1\n\t"
-+ " shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
-+ " addps %%xmm1, %%xmm6\n\t"
-+ " mulps %%xmm4, %%xmm2\n\t"
-+ " movaps 32(%%eax), %%xmm0\n\t"
-+ " addps %%xmm2, %%xmm7\n\t"
-+ " mulps %%xmm5, %%xmm3\n\t"
-+ " addl $32, %%eax\n\t"
-+ " movaps 32(%%edx), %%xmm2\n\t"
-+ " addps %%xmm3, %%xmm7\n\t"
-+ " addl $32, %%edx\n\t"
-+ ".%=L1_test:\n\t"
-+ " decl %%ecx\n\t"
-+ " jge .%=Loop1\n\t"
-+ " # We've handled the bulk of multiplies up to here.\n\t"
-+ " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
-+ " # If so, we've got 2 more taps to do.\n\t"
-+ " movl 20(%%ebp), %%ecx # n_2_ccomplex_blocks\n\t"
-+ " shrl $4, %%ecx\n\t"
-+ " andl $1, %%ecx\n\t"
-+ " je .%=Leven\n\t"
-+ " # The count was odd, do 2 more taps.\n\t"
-+ " # Note that we've already got mm0/mm2 preloaded\n\t"
-+ " # from the main loop.\n\t"
-+ " movaps %%xmm0, %%xmm4\n\t"
-+ " mulps %%xmm2, %%xmm0\n\t"
-+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
-+ " addps %%xmm0, %%xmm6\n\t"
-+ " mulps %%xmm4, %%xmm2\n\t"
-+ " addps %%xmm2, %%xmm7\n\t"
-+ ".%=Leven:\n\t"
-+ " # neg inversor\n\t"
-+ " movl 8(%%ebp), %%eax \n\t"
-+ " xorps %%xmm1, %%xmm1\n\t"
-+ " movl $0x80000000, (%%eax)\n\t"
-+ " movss (%%eax), %%xmm1\n\t"
-+ " shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
-+ " # pfpnacc\n\t"
-+ " xorps %%xmm1, %%xmm6\n\t"
-+ " movaps %%xmm6, %%xmm2\n\t"
-+ " unpcklps %%xmm7, %%xmm6\n\t"
-+ " unpckhps %%xmm7, %%xmm2\n\t"
-+ " movaps %%xmm2, %%xmm3\n\t"
-+ " shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
-+ " shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
-+ " addps %%xmm2, %%xmm6\n\t"
-+ " # xmm6 = r1 i2 r3 i4\n\t"
-+ " #movl 8(%%ebp), %%eax # @result\n\t"
-+ " movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
-+ " addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
-+ " movlps %%xmm6, (%%eax) # store low 2x32 bits (complex) to memory\n\t"
-+ " #popl %%ebp\n\t"
-+ :
-+ :
-+ : "eax", "ecx", "edx"
-+ );
-+
-+
-+ int getem = num_bytes % 16;
-+
-+ if(isodd) {
-+ *result += (input[num_points - 1] * taps[num_points - 1]);
-+ }
-+
-+ return;
-+#endif
-+}
-+
-+#endif /*LV_HAVE_SSE*/
-+
-+#ifdef LV_HAVE_SSE3
-+
-+#include
-+
-+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-+
-+ const unsigned int num_bytes = num_points*8;
-+ unsigned int isodd = num_points & 1;
-+
-+ lv_32fc_t dotProduct;
-+ memset(&dotProduct, 0x0, 2*sizeof(float));
-+
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_bytes >> 4;
-+
-+ __m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
-+
-+ const lv_32fc_t* a = input;
-+ const lv_32fc_t* b = taps;
-+
-+ dotProdVal = _mm_setzero_ps();
-+
-+ for(;number < halfPoints; number++){
-+
-+ x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together
-+
-+ a += 2;
-+ b += 2;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
-+
-+ _mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector
-+
-+ dotProduct += ( dotProductVector[0] + dotProductVector[1] );
-+
-+ if(isodd) {
-+ dotProduct += input[num_points - 1] * taps[num_points - 1];
-+ }
-+
-+ *result = dotProduct;
-+}
-+
-+#endif /*LV_HAVE_SSE3*/
-+
-+#ifdef LV_HAVE_SSE4_1
-+
-+#include
-+
-+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-+
-+ unsigned int i = 0;
-+ const unsigned int qtr_points = num_points/4;
-+ const unsigned int isodd = num_points & 3;
-+
-+ __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
-+ float *p_input, *p_taps;
-+ __m64 *p_result;
-+
-+ static const __m128i neg = {0x000000000000000080000000};
-+
-+ p_result = (__m64*)result;
-+ p_input = (float*)input;
-+ p_taps = (float*)taps;
-+
-+ real0 = _mm_setzero_ps();
-+ real1 = _mm_setzero_ps();
-+ im0 = _mm_setzero_ps();
-+ im1 = _mm_setzero_ps();
-+
-+ for(; i < qtr_points; ++i) {
-+ xmm0 = _mm_load_ps(p_input);
-+ xmm1 = _mm_load_ps(p_taps);
-+
-+ p_input += 4;
-+ p_taps += 4;
-+
-+ xmm2 = _mm_load_ps(p_input);
-+ xmm3 = _mm_load_ps(p_taps);
-+
-+ p_input += 4;
-+ p_taps += 4;
-+
-+ xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
-+ xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
-+ xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
-+ xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
-+
-+ //imaginary vector from input
-+ xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
-+ //real vector from input
-+ xmm3 = _mm_unpacklo_ps(xmm0, xmm4);
-+ //imaginary vector from taps
-+ xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
-+ //real vector from taps
-+ xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
-+
-+ xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
-+ xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
-+
-+ xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
-+ xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
-+
-+ real0 = _mm_add_ps(xmm4, real0);
-+ real1 = _mm_add_ps(xmm5, real1);
-+ im0 = _mm_add_ps(xmm6, im0);
-+ im1 = _mm_add_ps(xmm7, im1);
-+ }
-+
-+ real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec);
-+
-+ im0 = _mm_add_ps(im0, im1);
-+ real0 = _mm_add_ps(real0, real1);
-+
-+ im0 = _mm_add_ps(im0, real0);
-+
-+ _mm_storel_pi(p_result, im0);
-+
-+ for(i = num_points-isodd; i < num_points; i++) {
-+ *result += input[i] * taps[i];
-+ }
-+}
-+
-+#endif /*LV_HAVE_SSE4_1*/
-+
-+#endif /*INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H*/
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_multiply_32fc.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_multiply_32fc.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_multiply_32fc.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_multiply_32fc.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,170 @@
-+#ifndef INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H
-+#define INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+ /*!
-+ \brief Multiplies the two input complex vectors and stores their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be multiplied
-+ \param bVector One of the vectors to be multiplied
-+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 2;
-+
-+ __m128 x, y, yl, yh, z, tmp1, tmp2;
-+ lv_32fc_t* c = cVector;
-+ const lv_32fc_t* a = aVector;
-+ const lv_32fc_t* b = bVector;
-+
-+ for(;number < halfPoints; number++){
-+
-+ x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ _mm_storeu_ps((float*)c,z); // Store the results back into the C container
-+
-+ a += 2;
-+ b += 2;
-+ c += 2;
-+ }
-+
-+ if((num_points % 2) != 0) {
-+ *c = (*a) * (*b);
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Multiplies the two input complex vectors and stores their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be multiplied
-+ \param bVector One of the vectors to be multiplied
-+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_x2_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-+ lv_32fc_t* cPtr = cVector;
-+ const lv_32fc_t* aPtr = aVector;
-+ const lv_32fc_t* bPtr= bVector;
-+ unsigned int number = 0;
-+
-+ for(number = 0; number < num_points; number++){
-+ *cPtr++ = (*aPtr++) * (*bPtr++);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
-+#ifndef INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H
-+#define INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+ /*!
-+ \brief Multiplies the two input complex vectors and stores their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be multiplied
-+ \param bVector One of the vectors to be multiplied
-+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 2;
-+
-+ __m128 x, y, yl, yh, z, tmp1, tmp2;
-+ lv_32fc_t* c = cVector;
-+ const lv_32fc_t* a = aVector;
-+ const lv_32fc_t* b = bVector;
-+ for(;number < halfPoints; number++){
-+
-+ x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ _mm_store_ps((float*)c,z); // Store the results back into the C container
-+
-+ a += 2;
-+ b += 2;
-+ c += 2;
-+ }
-+
-+ if((num_points % 2) != 0) {
-+ *c = (*a) * (*b);
-+ }
-+}
-+#endif /* LV_HAVE_SSE */
-+
-+#ifdef LV_HAVE_GENERIC
-+ /*!
-+ \brief Multiplies the two input complex vectors and stores their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be multiplied
-+ \param bVector One of the vectors to be multiplied
-+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-+ */
-+static inline void volk_gnsssdr_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-+ lv_32fc_t* cPtr = cVector;
-+ const lv_32fc_t* aPtr = aVector;
-+ const lv_32fc_t* bPtr= bVector;
-+ unsigned int number = 0;
-+
-+ for(number = 0; number < num_points; number++){
-+ *cPtr++ = (*aPtr++) * (*bPtr++);
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+#ifdef LV_HAVE_ORC
-+ /*!
-+ \brief Multiplies the two input complex vectors and stores their results in the third vector
-+ \param cVector The vector where the results will be stored
-+ \param aVector One of the vectors to be multiplied
-+ \param bVector One of the vectors to be multiplied
-+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
-+ */
-+extern void volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
-+static inline void volk_gnsssdr_32fc_x2_multiply_32fc_u_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-+ volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
-+}
-+#endif /* LV_HAVE_ORC */
-+
-+
-+
-+
-+
-+#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,409 @@
-+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H
-+#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+/*!
-+ * TODO: Code the SSE4 version and benchmark it
-+ */
-+#ifdef LV_HAVE_SSE3
-+#include
-+
-+
-+ /*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
-+{
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 2;
-+
-+ lv_32fc_t dotProduct_E;
-+ memset(&dotProduct_E, 0x0, 2*sizeof(float));
-+ lv_32fc_t dotProduct_P;
-+ memset(&dotProduct_P, 0x0, 2*sizeof(float));
-+ lv_32fc_t dotProduct_L;
-+ memset(&dotProduct_L, 0x0, 2*sizeof(float));
-+
-+ // Aux vars
-+ __m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L;
-+
-+ z_E = _mm_setzero_ps();
-+ z_P = _mm_setzero_ps();
-+ z_L = _mm_setzero_ps();
-+
-+ //input and output vectors
-+ //lv_32fc_t* _input_BB = input_BB;
-+ const lv_32fc_t* _input = input;
-+ const lv_32fc_t* _carrier = carrier;
-+ const lv_32fc_t* _E_code = E_code;
-+ const lv_32fc_t* _P_code = P_code;
-+ const lv_32fc_t* _L_code = L_code;
-+
-+ for(;number < halfPoints; number++)
-+ {
-+ // carrier wipe-off (vector point-to-point product)
-+ x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ //_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container
-+
-+ // correlation E,P,L (3x vector scalar product)
-+ // Early
-+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ x = z;
-+
-+ y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
-+
-+ // Prompt
-+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
-+
-+ // Late
-+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
-+
-+ /*pointer increment*/
-+ _carrier += 2;
-+ _input += 2;
-+ //_input_BB += 2;
-+ _E_code += 2;
-+ _P_code += 2;
-+ _L_code +=2;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
-+ //__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB;
-+
-+ _mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
-+
-+ dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
-+ dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
-+ dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
-+
-+ if((num_points % 2) != 0)
-+ {
-+ //_input_BB = (*_input) * (*_carrier);
-+ dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
-+ dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
-+ dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
-+ }
-+
-+ *E_out = dotProduct_E;
-+ *P_out = dotProduct_P;
-+ *L_out = dotProduct_L;
-+}
-+
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
-+{
-+ lv_32fc_t bb_signal_sample;
-+
-+ bb_signal_sample = lv_cmake(0, 0);
-+
-+ *E_out = 0;
-+ *P_out = 0;
-+ *L_out = 0;
-+ // perform Early, Prompt and Late correlation
-+ for(int i=0; i < num_points; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input[i] * carrier[i];
-+ // Now get early, late, and prompt values for each
-+ *E_out += bb_signal_sample * E_code[i];
-+ *P_out += bb_signal_sample * P_code[i];
-+ *L_out += bb_signal_sample * L_code[i];
-+ }
-+}
-+
-+#endif /* LV_HAVE_GENERIC */
-+
-+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H */
-+
-+
-+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H
-+#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
-+{
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 2;
-+
-+ lv_32fc_t dotProduct_E;
-+ memset(&dotProduct_E, 0x0, 2*sizeof(float));
-+ lv_32fc_t dotProduct_P;
-+ memset(&dotProduct_P, 0x0, 2*sizeof(float));
-+ lv_32fc_t dotProduct_L;
-+ memset(&dotProduct_L, 0x0, 2*sizeof(float));
-+
-+ // Aux vars
-+ __m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L;
-+
-+ z_E = _mm_setzero_ps();
-+ z_P = _mm_setzero_ps();
-+ z_L = _mm_setzero_ps();
-+
-+ //input and output vectors
-+ //lv_32fc_t* _input_BB = input_BB;
-+ const lv_32fc_t* _input = input;
-+ const lv_32fc_t* _carrier = carrier;
-+ const lv_32fc_t* _E_code = E_code;
-+ const lv_32fc_t* _P_code = P_code;
-+ const lv_32fc_t* _L_code = L_code;
-+
-+ for(;number < halfPoints; number++)
-+ {
-+ // carrier wipe-off (vector point-to-point product)
-+ x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ //_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container
-+
-+ // correlation E,P,L (3x vector scalar product)
-+ // Early
-+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ x = z;
-+
-+ y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
-+
-+ // Prompt
-+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
-+
-+ // Late
-+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+
-+ z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
-+
-+ /*pointer increment*/
-+ _carrier += 2;
-+ _input += 2;
-+ //_input_BB += 2;
-+ _E_code += 2;
-+ _P_code += 2;
-+ _L_code +=2;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
-+ //__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB;
-+
-+ _mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
-+
-+ dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
-+ dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
-+ dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
-+
-+ if((num_points % 2) != 0)
-+ {
-+ //_input_BB = (*_input) * (*_carrier);
-+ dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
-+ dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
-+ dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
-+ }
-+
-+ *E_out = dotProduct_E;
-+ *P_out = dotProduct_P;
-+ *L_out = dotProduct_L;
-+}
-+
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
-+{
-+ lv_32fc_t bb_signal_sample;
-+
-+ bb_signal_sample = lv_cmake(0, 0);
-+
-+ *E_out = 0;
-+ *P_out = 0;
-+ *L_out = 0;
-+ // perform Early, Prompt and Late correlation
-+ for(int i=0; i < num_points; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input[i] * carrier[i];
-+ // Now get early, late, and prompt values for each
-+ *E_out += bb_signal_sample * E_code[i];
-+ *P_out += bb_signal_sample * P_code[i];
-+ *L_out += bb_signal_sample * L_code[i];
-+ }
-+}
-+
-+#endif /* LV_HAVE_GENERIC */
-+
-+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,848 @@
-+/*!
-+ * \file volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5
-+ * \brief Volk protokernel: performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation with 64 bits vectors
-+ * \authors
-+ * - Javier Arribas, 2011. jarribas(at)cttc.es
-+ *
- Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * Volk protokernel that performs the carrier wipe-off mixing and the
-+ * VE, Early, Prompt, Late and VL correlation with 64 bits vectors (32 bits the
-+ * real part and 32 bits the imaginary part):
-+ * - The carrier wipe-off is done by multiplying the input signal by the
-+ * carrier (multiplication of 64 bits vectors) It returns the input
-+ * signal in base band (BB)
-+ * - VE values are calculated by multiplying the input signal in BB by the
-+ * VE code (multiplication of 64 bits vectors), accumulating the results
-+ * - Early values are calculated by multiplying the input signal in BB by the
-+ * early code (multiplication of 64 bits vectors), accumulating the results
-+ * - Prompt values are calculated by multiplying the input signal in BB by the
-+ * prompt code (multiplication of 64 bits vectors), accumulating the results
-+ * - Late values are calculated by multiplying the input signal in BB by the
-+ * late code (multiplication of 64 bits vectors), accumulating the results
-+ * - VL values are calculated by multiplying the input signal in BB by the
-+ * VL code (multiplication of 64 bits vectors), accumulating the results
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H
-+#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_AVX
-+#include
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param VE_code VE PRN code replica input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param VL_code VL PRN code replica input
-+ \param VE_out VE correlation output
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param VL_out VL correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_avx(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
-+{
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 4;
-+
-+ lv_32fc_t dotProduct_VE;
-+ lv_32fc_t dotProduct_E;
-+ lv_32fc_t dotProduct_P;
-+ lv_32fc_t dotProduct_L;
-+ lv_32fc_t dotProduct_VL;
-+
-+ // Aux vars
-+ __m256 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
-+ __m256 bb_signal_sample, bb_signal_sample_shuffled;
-+
-+ z_VE = _mm256_setzero_ps();
-+ z_E = _mm256_setzero_ps();
-+ z_P = _mm256_setzero_ps();
-+ z_L = _mm256_setzero_ps();
-+ z_VL = _mm256_setzero_ps();
-+
-+ //input and output vectors
-+ const lv_32fc_t* _input = input;
-+ const lv_32fc_t* _carrier = carrier;
-+ const lv_32fc_t* _VE_code = VE_code;
-+ const lv_32fc_t* _E_code = E_code;
-+ const lv_32fc_t* _P_code = P_code;
-+ const lv_32fc_t* _L_code = L_code;
-+ const lv_32fc_t* _VL_code = VL_code;
-+
-+ for(;number < halfPoints; number++)
-+ {
-+ // carrier wipe-off (vector point-to-point product)
-+ x = _mm256_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm256_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ bb_signal_sample = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ bb_signal_sample_shuffled = _mm256_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
-+
-+ // correlation VE,E,P,L,VL (5x vector scalar product)
-+ // VE
-+ y = _mm256_loadu_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_VE = _mm256_add_ps(z_VE, z); // Add the complex multiplication results together
-+
-+ // Early
-+ y = _mm256_loadu_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_E = _mm256_add_ps(z_E, z); // Add the complex multiplication results together
-+
-+ // Prompt
-+ y = _mm256_loadu_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_P = _mm256_add_ps(z_P, z); // Add the complex multiplication results together
-+
-+ // Late
-+ y = _mm256_loadu_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_L = _mm256_add_ps(z_L, z); // Add the complex multiplication results together
-+
-+ // VL
-+ y = _mm256_loadu_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_VL = _mm256_add_ps(z_VL, z); // Add the complex multiplication results together
-+
-+ /*pointer increment*/
-+ _carrier += 4;
-+ _input += 4;
-+ _VE_code += 4;
-+ _E_code += 4;
-+ _P_code += 4;
-+ _L_code += 4;
-+ _VL_code += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VE[4];
-+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_E[4];
-+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_P[4];
-+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_L[4];
-+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VL[4];
-+
-+ _mm256_storeu_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
-+ _mm256_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
-+ _mm256_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
-+ _mm256_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
-+ _mm256_storeu_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
-+
-+ dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] + dotProductVector_VE[2] + dotProductVector_VE[3] );
-+ dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] + dotProductVector_E[2] + dotProductVector_E[3] );
-+ dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] + dotProductVector_P[2] + dotProductVector_P[3] );
-+ dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] + dotProductVector_L[2] + dotProductVector_L[3] );
-+ dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] + dotProductVector_VL[2] + dotProductVector_VL[3] );
-+
-+ for (int i = 0; i<(num_points % 4); ++i)
-+ {
-+ dotProduct_VE += (*_input) * (*_VE_code++) * (*_carrier);
-+ dotProduct_E += (*_input) * (*_E_code++) * (*_carrier);
-+ dotProduct_P += (*_input) * (*_P_code++) * (*_carrier);
-+ dotProduct_L += (*_input) * (*_L_code++) * (*_carrier);
-+ dotProduct_VL += (*_input++) * (*_VL_code++) * (*_carrier++);
-+ }
-+
-+ *VE_out = dotProduct_VE;
-+ *E_out = dotProduct_E;
-+ *P_out = dotProduct_P;
-+ *L_out = dotProduct_L;
-+ *VL_out = dotProduct_VL;
-+}
-+#endif /* LV_HAVE_AVX */
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+ /*!
-+ \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param VE_code VE PRN code replica input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param VL_code VL PRN code replica input
-+ \param VE_out VE correlation output
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param VL_out VL correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
-+{
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 2;
-+
-+ lv_32fc_t dotProduct_VE;
-+ lv_32fc_t dotProduct_E;
-+ lv_32fc_t dotProduct_P;
-+ lv_32fc_t dotProduct_L;
-+ lv_32fc_t dotProduct_VL;
-+
-+ // Aux vars
-+ __m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
-+ __m128 bb_signal_sample, bb_signal_sample_shuffled;
-+
-+ z_VE = _mm_setzero_ps();
-+ z_E = _mm_setzero_ps();
-+ z_P = _mm_setzero_ps();
-+ z_L = _mm_setzero_ps();
-+ z_VL = _mm_setzero_ps();
-+
-+ //input and output vectors
-+ const lv_32fc_t* _input = input;
-+ const lv_32fc_t* _carrier = carrier;
-+ const lv_32fc_t* _VE_code = VE_code;
-+ const lv_32fc_t* _E_code = E_code;
-+ const lv_32fc_t* _P_code = P_code;
-+ const lv_32fc_t* _L_code = L_code;
-+ const lv_32fc_t* _VL_code = VL_code;
-+
-+ for(;number < halfPoints; number++)
-+ {
-+ // carrier wipe-off (vector point-to-point product)
-+ x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
-+
-+ // correlation VE,E,P,L,VL (5x vector scalar product)
-+ // VE
-+ y = _mm_loadu_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together
-+
-+ // Early
-+ y = _mm_loadu_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
-+
-+ // Prompt
-+ y = _mm_loadu_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
-+
-+ // Late
-+ y = _mm_loadu_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
-+
-+ // VL
-+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_loadu_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together
-+
-+ /*pointer increment*/
-+ _carrier += 2;
-+ _input += 2;
-+ _VE_code += 2;
-+ _E_code += 2;
-+ _P_code += 2;
-+ _L_code +=2;
-+ _VL_code +=2;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2];
-+
-+ _mm_storeu_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
-+ _mm_storeu_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
-+
-+ dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] );
-+ dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] );
-+ dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] );
-+ dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] );
-+ dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] );
-+
-+ if((num_points % 2) != 0)
-+ {
-+ dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier);
-+ dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
-+ dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
-+ dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
-+ dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier);
-+ }
-+
-+ *VE_out = dotProduct_VE;
-+ *E_out = dotProduct_E;
-+ *P_out = dotProduct_P;
-+ *L_out = dotProduct_L;
-+ *VL_out = dotProduct_VL;
-+}
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param VE_code VE PRN code replica input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param VL_code VL PRN code replica input
-+ \param VE_out VE correlation output
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param VL_out VL correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
-+{
-+ lv_32fc_t bb_signal_sample;
-+
-+ bb_signal_sample = lv_cmake(0, 0);
-+
-+ *VE_out = 0;
-+ *E_out = 0;
-+ *P_out = 0;
-+ *L_out = 0;
-+ *VL_out = 0;
-+ // perform Early, Prompt and Late correlation
-+ for(int i=0; i < num_points; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input[i] * carrier[i];
-+ // Now get early, late, and prompt values for each
-+ *VE_out += bb_signal_sample * VE_code[i];
-+ *E_out += bb_signal_sample * E_code[i];
-+ *P_out += bb_signal_sample * P_code[i];
-+ *L_out += bb_signal_sample * L_code[i];
-+ *VL_out += bb_signal_sample * VL_code[i];
-+ }
-+}
-+
-+#endif /* LV_HAVE_GENERIC */
-+
-+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H */
-+
-+
-+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H
-+#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H
-+
-+#include
-+#include
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_AVX
-+#include
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param VE_code VE PRN code replica input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param VL_code VL PRN code replica input
-+ \param VE_out VE correlation output
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param VL_out VL correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_avx(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
-+{
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 4;
-+
-+ lv_32fc_t dotProduct_VE;
-+ lv_32fc_t dotProduct_E;
-+ lv_32fc_t dotProduct_P;
-+ lv_32fc_t dotProduct_L;
-+ lv_32fc_t dotProduct_VL;
-+
-+ // Aux vars
-+ __m256 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
-+ __m256 bb_signal_sample, bb_signal_sample_shuffled;
-+
-+ z_VE = _mm256_setzero_ps();
-+ z_E = _mm256_setzero_ps();
-+ z_P = _mm256_setzero_ps();
-+ z_L = _mm256_setzero_ps();
-+ z_VL = _mm256_setzero_ps();
-+
-+ //input and output vectors
-+ const lv_32fc_t* _input = input;
-+ const lv_32fc_t* _carrier = carrier;
-+ const lv_32fc_t* _VE_code = VE_code;
-+ const lv_32fc_t* _E_code = E_code;
-+ const lv_32fc_t* _P_code = P_code;
-+ const lv_32fc_t* _L_code = L_code;
-+ const lv_32fc_t* _VL_code = VL_code;
-+
-+ for(;number < halfPoints; number++)
-+ {
-+ // carrier wipe-off (vector point-to-point product)
-+ x = _mm256_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm256_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ bb_signal_sample = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ bb_signal_sample_shuffled = _mm256_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
-+
-+ // correlation VE,E,P,L,VL (5x vector scalar product)
-+ // VE
-+ y = _mm256_load_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_VE = _mm256_add_ps(z_VE, z); // Add the complex multiplication results together
-+
-+ // Early
-+ y = _mm256_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_E = _mm256_add_ps(z_E, z); // Add the complex multiplication results together
-+
-+ // Prompt
-+ y = _mm256_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_P = _mm256_add_ps(z_P, z); // Add the complex multiplication results together
-+
-+ // Late
-+ y = _mm256_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_L = _mm256_add_ps(z_L, z); // Add the complex multiplication results together
-+
-+ // VL
-+ y = _mm256_load_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_VL = _mm256_add_ps(z_VL, z); // Add the complex multiplication results together
-+
-+ /*pointer increment*/
-+ _carrier += 4;
-+ _input += 4;
-+ _VE_code += 4;
-+ _E_code += 4;
-+ _P_code += 4;
-+ _L_code += 4;
-+ _VL_code += 4;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VE[4];
-+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_E[4];
-+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_P[4];
-+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_L[4];
-+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VL[4];
-+
-+ _mm256_store_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
-+ _mm256_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
-+ _mm256_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
-+ _mm256_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
-+ _mm256_store_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
-+
-+ dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] + dotProductVector_VE[2] + dotProductVector_VE[3] );
-+ dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] + dotProductVector_E[2] + dotProductVector_E[3] );
-+ dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] + dotProductVector_P[2] + dotProductVector_P[3] );
-+ dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] + dotProductVector_L[2] + dotProductVector_L[3] );
-+ dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] + dotProductVector_VL[2] + dotProductVector_VL[3] );
-+
-+ for (int i = 0; i<(num_points % 4); ++i)
-+ {
-+ dotProduct_VE += (*_input) * (*_VE_code++) * (*_carrier);
-+ dotProduct_E += (*_input) * (*_E_code++) * (*_carrier);
-+ dotProduct_P += (*_input) * (*_P_code++) * (*_carrier);
-+ dotProduct_L += (*_input) * (*_L_code++) * (*_carrier);
-+ dotProduct_VL += (*_input++) * (*_VL_code++) * (*_carrier++);
-+ }
-+
-+ *VE_out = dotProduct_VE;
-+ *E_out = dotProduct_E;
-+ *P_out = dotProduct_P;
-+ *L_out = dotProduct_L;
-+ *VL_out = dotProduct_VL;
-+}
-+#endif /* LV_HAVE_AVX */
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param VE_code VE PRN code replica input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param VL_code VL PRN code replica input
-+ \param VE_out VE correlation output
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param VL_out VL correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
-+{
-+ unsigned int number = 0;
-+ const unsigned int halfPoints = num_points / 2;
-+
-+ lv_32fc_t dotProduct_VE;
-+ lv_32fc_t dotProduct_E;
-+ lv_32fc_t dotProduct_P;
-+ lv_32fc_t dotProduct_L;
-+ lv_32fc_t dotProduct_VL;
-+
-+ // Aux vars
-+ __m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
-+ __m128 bb_signal_sample, bb_signal_sample_shuffled;
-+
-+ z_VE = _mm_setzero_ps();
-+ z_E = _mm_setzero_ps();
-+ z_P = _mm_setzero_ps();
-+ z_L = _mm_setzero_ps();
-+ z_VL = _mm_setzero_ps();
-+
-+ //input and output vectors
-+ const lv_32fc_t* _input = input;
-+ const lv_32fc_t* _carrier = carrier;
-+ const lv_32fc_t* _VE_code = VE_code;
-+ const lv_32fc_t* _E_code = E_code;
-+ const lv_32fc_t* _P_code = P_code;
-+ const lv_32fc_t* _L_code = L_code;
-+ const lv_32fc_t* _VL_code = VL_code;
-+
-+ for(;number < halfPoints; number++)
-+ {
-+ // carrier wipe-off (vector point-to-point product)
-+ x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+
-+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-+
-+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
-+
-+ // correlation VE,E,P,L,VL (5x vector scalar product)
-+ // VE
-+ y = _mm_load_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together
-+
-+ // Early
-+ y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
-+
-+ // Prompt
-+ y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
-+
-+ // Late
-+ y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
-+
-+ // VL
-+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
-+ y = _mm_load_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
-+
-+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-+
-+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-+
-+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
-+ z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together
-+
-+ /*pointer increment*/
-+ _carrier += 2;
-+ _input += 2;
-+ _VE_code += 2;
-+ _E_code += 2;
-+ _P_code += 2;
-+ _L_code +=2;
-+ _VL_code +=2;
-+ }
-+
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
-+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2];
-+
-+ _mm_store_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
-+ _mm_store_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
-+
-+ dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] );
-+ dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] );
-+ dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] );
-+ dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] );
-+ dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] );
-+
-+ if((num_points % 2) != 0)
-+ {
-+ dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier);
-+ dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
-+ dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
-+ dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
-+ dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier);
-+ }
-+
-+ *VE_out = dotProduct_VE;
-+ *E_out = dotProduct_E;
-+ *P_out = dotProduct_P;
-+ *L_out = dotProduct_L;
-+ *VL_out = dotProduct_VL;
-+}
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
-+ \param input The input signal input
-+ \param carrier The carrier signal input
-+ \param VE_code VE PRN code replica input
-+ \param E_code Early PRN code replica input
-+ \param P_code Early PRN code replica input
-+ \param L_code Early PRN code replica input
-+ \param VL_code VL PRN code replica input
-+ \param VE_out VE correlation output
-+ \param E_out Early correlation output
-+ \param P_out Early correlation output
-+ \param L_out Early correlation output
-+ \param VL_out VL correlation output
-+ \param num_points The number of complex values in vectors
-+ */
-+static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
-+{
-+ lv_32fc_t bb_signal_sample;
-+
-+ bb_signal_sample = lv_cmake(0, 0);
-+
-+ *VE_out = 0;
-+ *E_out = 0;
-+ *P_out = 0;
-+ *L_out = 0;
-+ *VL_out = 0;
-+ // perform Early, Prompt and Late correlation
-+ for(int i=0; i < num_points; ++i)
-+ {
-+ //Perform the carrier wipe-off
-+ bb_signal_sample = input[i] * carrier[i];
-+ // Now get early, late, and prompt values for each
-+ *VE_out += bb_signal_sample * VE_code[i];
-+ *E_out += bb_signal_sample * E_code[i];
-+ *P_out += bb_signal_sample * P_code[i];
-+ *L_out += bb_signal_sample * L_code[i];
-+ *VL_out += bb_signal_sample * VL_code[i];
-+ }
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H */
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,243 @@
-+/*!
-+ * \file volk_gnsssdr_64f_accumulator_64f.h
-+ * \brief Volk protokernel: 64 bits (double) scalar accumulator
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * Volk protokernel that implements an accumulator of char values
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_volk_gnsssdr_64f_accumulator_64f_u_H
-+#define INCLUDED_volk_gnsssdr_64f_accumulator_64f_u_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_AVX
-+#include
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+static inline void volk_gnsssdr_64f_accumulator_64f_u_avx(double* result,const double* inputBuffer, unsigned int num_points){
-+ double returnValue = 0;
-+ const unsigned int sse_iters = num_points / 4;
-+
-+ const double* aPtr = inputBuffer;
-+
-+ __VOLK_ATTR_ALIGNED(32) double tempBuffer[4];
-+ __m256d accumulator = _mm256_setzero_pd();
-+ __m256d aVal = _mm256_setzero_pd();
-+
-+ for(unsigned int number = 0; number < sse_iters; number++)
-+ {
-+ aVal = _mm256_loadu_pd(aPtr);
-+ accumulator = _mm256_add_pd(accumulator, aVal);
-+ aPtr += 4;
-+ }
-+
-+ _mm256_storeu_pd((double*)tempBuffer,accumulator);
-+
-+ for(int i = 0; i<4; ++i){
-+ returnValue += tempBuffer[i];
-+ }
-+
-+ for(int i = 0; i<(num_points % 4); ++i){
-+ returnValue += (*aPtr++);
-+ }
-+
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_AVX */
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+static inline void volk_gnsssdr_64f_accumulator_64f_u_sse3(double* result,const double* inputBuffer, unsigned int num_points){
-+ double returnValue = 0;
-+ const unsigned int sse_iters = num_points / 2;
-+
-+ const double* aPtr = inputBuffer;
-+
-+ __VOLK_ATTR_ALIGNED(16) double tempBuffer[2];
-+ __m128d accumulator = _mm_setzero_pd();
-+ __m128d aVal = _mm_setzero_pd();
-+
-+ for(unsigned int number = 0; number < sse_iters; number++)
-+ {
-+ aVal = _mm_loadu_pd(aPtr);
-+ accumulator = _mm_add_pd(accumulator, aVal);
-+ aPtr += 2;
-+ }
-+
-+ _mm_storeu_pd((double*)tempBuffer,accumulator);
-+
-+ for(int i = 0; i<2; ++i){
-+ returnValue += tempBuffer[i];
-+ }
-+
-+ for(int i = 0; i<(num_points % 2); ++i){
-+ returnValue += (*aPtr++);
-+ }
-+
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const double* inputBuffer, unsigned int num_points){
-+ const double* aPtr = inputBuffer;
-+ double returnValue = 0;
-+
-+ for(unsigned int number = 0;number < num_points; number++){
-+ returnValue += (*aPtr++);
-+ }
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_u_H */
-+
-+
-+#ifndef INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H
-+#define INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_AVX
-+#include
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+static inline void volk_gnsssdr_64f_accumulator_64f_a_avx(double* result,const double* inputBuffer, unsigned int num_points){
-+ double returnValue = 0;
-+ const unsigned int sse_iters = num_points / 4;
-+
-+ const double* aPtr = inputBuffer;
-+
-+ __VOLK_ATTR_ALIGNED(32) double tempBuffer[4];
-+ __m256d accumulator = _mm256_setzero_pd();
-+ __m256d aVal = _mm256_setzero_pd();
-+
-+ for(unsigned int number = 0; number < sse_iters; number++)
-+ {
-+ aVal = _mm256_load_pd(aPtr);
-+ accumulator = _mm256_add_pd(accumulator, aVal);
-+ aPtr += 4;
-+ }
-+
-+ _mm256_store_pd((double*)tempBuffer,accumulator);
-+
-+ for(int i = 0; i<4; ++i){
-+ returnValue += tempBuffer[i];
-+ }
-+
-+ for(int i = 0; i<(num_points % 4); ++i){
-+ returnValue += (*aPtr++);
-+ }
-+
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_AVX */
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const double* inputBuffer, unsigned int num_points){
-+ double returnValue = 0;
-+ const unsigned int sse_iters = num_points / 2;
-+
-+ const double* aPtr = inputBuffer;
-+
-+ __VOLK_ATTR_ALIGNED(16) double tempBuffer[2];
-+ __m128d accumulator = _mm_setzero_pd();
-+ __m128d aVal = _mm_setzero_pd();
-+
-+ for(unsigned int number = 0; number < sse_iters; number++)
-+ {
-+ aVal = _mm_load_pd(aPtr);
-+ accumulator = _mm_add_pd(accumulator, aVal);
-+ aPtr += 2;
-+ }
-+
-+ _mm_store_pd((double*)tempBuffer,accumulator);
-+
-+ for(int i = 0; i<2; ++i){
-+ returnValue += tempBuffer[i];
-+ }
-+
-+ for(int i = 0; i<(num_points % 2); ++i){
-+ returnValue += (*aPtr++);
-+ }
-+
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+static inline void volk_gnsssdr_64f_accumulator_64f_a_generic(double* result,const double* inputBuffer, unsigned int num_points){
-+ const double* aPtr = inputBuffer;
-+ double returnValue = 0;
-+
-+ for(unsigned int number = 0;number < num_points; number++){
-+ returnValue += (*aPtr++);
-+ }
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_GENERIC */
-+#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H */
-\ No newline at end of file
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,183 @@
-+/*!
-+ * \file volk_gnsssdr_8i_accumulator_s8i.h
-+ * \brief Volk protokernel: 8 bits (char) scalar accumulator
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * Volk protokernel that implements an accumulator of char values
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H
-+#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const char* inputBuffer, unsigned int num_points){
-+ char returnValue = 0;
-+ const unsigned int sse_iters = num_points / 16;
-+
-+ const char* aPtr = inputBuffer;
-+
-+ __VOLK_ATTR_ALIGNED(16) char tempBuffer[16];
-+ __m128i accumulator = _mm_setzero_si128();
-+ __m128i aVal = _mm_setzero_si128();
-+
-+ for(unsigned int number = 0; number < sse_iters; number++){
-+ aVal = _mm_lddqu_si128((__m128i*)aPtr);
-+ accumulator = _mm_add_epi8(accumulator, aVal);
-+ aPtr += 16;
-+ }
-+ _mm_storeu_si128((__m128i*)tempBuffer,accumulator);
-+
-+ for(int i = 0; i<16; ++i){
-+ returnValue += tempBuffer[i];
-+ }
-+
-+ for(int i = 0; i<(num_points % 16); ++i){
-+ returnValue += (*aPtr++);
-+ }
-+
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const char* inputBuffer, unsigned int num_points){
-+ const char* aPtr = inputBuffer;
-+ char returnValue = 0;
-+
-+ for(unsigned int number = 0;number < num_points; number++){
-+ returnValue += (*aPtr++);
-+ }
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H */
-+
-+
-+#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H
-+#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_SSE3
-+#include
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const char* inputBuffer, unsigned int num_points){
-+ char returnValue = 0;
-+ const unsigned int sse_iters = num_points / 16;
-+
-+ const char* aPtr = inputBuffer;
-+
-+ __VOLK_ATTR_ALIGNED(16) char tempBuffer[16];
-+ __m128i accumulator = _mm_setzero_si128();
-+ __m128i aVal = _mm_setzero_si128();
-+
-+ for(unsigned int number = 0; number < sse_iters; number++){
-+ aVal = _mm_load_si128((__m128i*)aPtr);
-+ accumulator = _mm_add_epi8(accumulator, aVal);
-+ aPtr += 16;
-+ }
-+ _mm_store_si128((__m128i*)tempBuffer,accumulator);
-+
-+ for(int i = 0; i<16; ++i){
-+ returnValue += tempBuffer[i];
-+ }
-+
-+ for(int i = 0; i<(num_points % 16); ++i){
-+ returnValue += (*aPtr++);
-+ }
-+
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_SSE3 */
-+
-+#ifdef LV_HAVE_GENERIC
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+static inline void volk_gnsssdr_8i_accumulator_s8i_a_generic(char* result, const char* inputBuffer, unsigned int num_points){
-+ const char* aPtr = inputBuffer;
-+ char returnValue = 0;
-+
-+ for(unsigned int number = 0;number < num_points; number++){
-+ returnValue += (*aPtr++);
-+ }
-+ *result = returnValue;
-+}
-+#endif /* LV_HAVE_GENERIC */
-+
-+#ifdef LV_HAVE_ORC
-+/*!
-+ \brief Accumulates the values in the input buffer
-+ \param result The accumulated result
-+ \param inputBuffer The buffer of data to be accumulated
-+ \param num_points The number of values in inputBuffer to be accumulated
-+ */
-+extern void volk_gnsssdr_8i_accumulator_s8i_a_orc_impl(short* result, const char* inputBuffer, unsigned int num_points);
-+static inline void volk_gnsssdr_8i_accumulator_s8i_u_orc(char* result, const char* inputBuffer, unsigned int num_points){
-+
-+ short res = 0;
-+ char* resc = (char*)&res;
-+ resc++;
-+
-+ volk_gnsssdr_8i_accumulator_s8i_a_orc_impl(&res, inputBuffer, num_points);
-+
-+ *result = *resc;
-+}
-+#endif /* LV_HAVE_ORC */
-+
-+#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H */
-+
-diff -rupN /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h
---- /Users/andres/Desktop/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h 1970-01-01 01:00:00.000000000 +0100
-+++ /Users/andres/Desktop/volk_gnsssdr_original/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h 2014-10-15 01:55:08.000000000 +0200
-@@ -0,0 +1,493 @@
-+/*!
-+ * \file volk_gnsssdr_8i_index_max_16u.h
-+ * \brief Volk protokernel: calculates the index of the maximum value in a group of 8 bits (char) scalars
-+ * \authors
-+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
-+ *
-+ *
-+ * Volk protokernel that returns the index of the maximum value of a group of 8 bits (char) scalars
-+ *
-+ * -------------------------------------------------------------------------
-+ *
-+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
-+ *
-+ * GNSS-SDR is a software defined Global Navigation
-+ * Satellite Systems receiver
-+ *
-+ * This file is part of GNSS-SDR.
-+ *
-+ * GNSS-SDR is free software: you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation, either version 3 of the License, or
-+ * at your option) any later version.
-+ *
-+ * GNSS-SDR is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with GNSS-SDR. If not, see .
-+ *
-+ * -------------------------------------------------------------------------
-+ */
-+
-+#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H
-+#define INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H
-+
-+#include
-+#include
-+#include
-+
-+#ifdef LV_HAVE_AVX
-+#include "immintrin.h"
-+/*!
-+ \brief Returns the index of the max value in src0
-+ \param target The index of the max value in src0
-+ \param src0 The buffer of data to be analysed
-+ \param num_points The number of values in src0 to be analysed
-+ */
-+static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points) {
-+ if(num_points > 0){
-+ const unsigned int sse_iters = num_points / 32;
-+
-+ char* basePtr = (char*)src0;
-+ char* inputPtr = (char*)src0;
-+ char max = src0[0];
-+ unsigned int index = 0;
-+ __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
-+ __m256i ones, compareResults, currentValues;
-+ __m128i compareResultslo, compareResultshi, maxValues, lo, hi;
-+
-+ ones = _mm256_set1_epi8(0xFF);
-+ maxValues = _mm_set1_epi8(max);
-+
-+ for(unsigned int number = 0; number < sse_iters; number++)
-+ {
-+ currentValues = _mm256_lddqu_si256((__m256i*)inputPtr);
-+
-+ lo = _mm256_castsi256_si128(currentValues);
-+ hi = _mm256_extractf128_si256(currentValues,1);
-+
-+ compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
-+ compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
-+
-+ //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
-+ compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
-+
-+ if (!_mm256_testc_si256(compareResults, ones))
-+ {
-+ _mm256_storeu_si256((__m256i*)¤tValuesBuffer, currentValues);
-+
-+ for(int i = 0; i < 32; i++)
-+ {
-+ if(currentValuesBuffer[i] > max)
-+ {
-+ index = inputPtr - basePtr + i;
-+ max = currentValuesBuffer[i];
-+ }
-+ }
-+ maxValues = _mm_set1_epi8(max);
-+ }
-+
-+ inputPtr += 32;
-+ }
-+
-+ for(int i = 0; i<(num_points % 32); ++i)
-+ {
-+ if(src0[i] > max)
-+ {
-+ index = i;
-+ max = src0[i];
-+ }
-+ }
-+ target[0] = index;
-+ }
-+}
-+
-+#endif /*LV_HAVE_AVX*/
-+
-+#ifdef LV_HAVE_SSE4_1
-+#include