diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7abf997dd..033cee5bf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -333,6 +333,40 @@ if(NOT GNURADIO_TRELLIS_FOUND)
endif()
+###############################################################################
+# Volk_gnsssdr module
+#In order to use volk_gnsssr module it is necessary to add:
+# 1) include_directories(..${VOLK_GNSSSDR_INCLUDE_DIRS}..)
+# 2) target_link_libraries(..${VOLK_GNSSSDR_LIBRARIES}..)
+###############################################################################
+
+if(ENABLE_VOLK_GNSSSDR)
+ message(STATUS "The volk_gnsssdr module with custom protokernels coded by gnss-sdr will be compiled.")
+ message(STATUS "You can disable it with 'cmake -DENABLE_VOLK_GNSSSDR=OFF ../'" )
+else(ENABLE_VOLK_GNSSSDR)
+ message(STATUS "The volk_gnsssdr module with custom protokernels coded by gnss-sdr is not enabled. Some configurations that use custom protokernels will not work." )
+ message(STATUS "Enable it with 'cmake -D ENABLE_VOLK_GNSSSDR=ON ../'." )
+endif(ENABLE_VOLK_GNSSSDR)
+
+if(ENABLE_VOLK_GNSSSDR)
+ set(VOLK_GNSSSDR_BASE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/src/algorithms/libs/volk_gnsssdr)
+ add_subdirectory(${VOLK_GNSSSDR_BASE_PATH})
+
+ set(VOLK_GNSSSDR_INCLUDE_DIRS
+ ${VOLK_GNSSSDR_BASE_PATH}/include
+ ${CMAKE_CURRENT_BINARY_DIR}/src/algorithms/libs/volk_gnsssdr/include
+ )
+
+ set(VOLK_GNSSSDR_LIBRARIES
+ #Path to libs of volk_gnsssdr target: ${VOLK_GNSSSDR_BASE_PATH}/lib/Debug/libvolk_gnsssdr.dylib
+ volk_gnsssdr
+ )
+
+ message(" * INCLUDES: ${VOLK_GNSSSDR_INCLUDE_DIRS} ")
+ message(" * LIBS: ${VOLK_GNSSSDR_LIBRARIES} ")
+ message("-- END OF: Setup volk_gnsssdr as a subproject.")
+endif(ENABLE_VOLK_GNSSSDR)
+
################################################################################
# gflags - http://code.google.com/p/gflags/
diff --git a/src/algorithms/libs/volk_gnsssdr/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr/CMakeLists.txt
new file mode 100644
index 000000000..77481beda
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/CMakeLists.txt
@@ -0,0 +1,183 @@
+#
+# Copyright 2011 Free Software Foundation, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+
+########################################################################
+# Project setup
+########################################################################
+cmake_minimum_required(VERSION 2.6)
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "Choose build type: None Debug Release RelWithDebInfo MinSizeRel")
+project(volk_gnsssdr)
+enable_language(CXX)
+enable_language(C)
+enable_testing()
+set(VERSION 0.1)
+set(LIBVER 0.0.0)
+
+set(CMAKE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) #allows this to be a sub-project
+set(CMAKE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) #allows this to be a sub-project
+set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) #location for custom "Modules"
+
+########################################################################
+# Environment setup
+########################################################################
+IF(NOT DEFINED BOOST_ROOT)
+ SET(BOOST_ROOT ${CMAKE_INSTALL_PREFIX})
+ENDIF()
+
+IF(NOT DEFINED CROSSCOMPILE_MULTILIB)
+ SET(CROSSCOMPILE_MULTILIB "")
+ENDIF()
+SET(CROSSCOMPILE_MULTILIB ${CROSSCOMPILE_MULTILIB} CACHE STRING "Define \"true\" if you have and want to use multiple C development libs installed for cross compile")
+
+
+########################################################################
+# Dependencies setup
+########################################################################
+include(GrPython) #sets PYTHON_EXECUTABLE and PYTHON_DASH_B
+VOLK_PYTHON_CHECK_MODULE("python >= 2.5" sys "sys.version.split()[0] >= '2.5'" PYTHON_MIN_VER_FOUND)
+VOLK_PYTHON_CHECK_MODULE("Cheetah >= 2.0.0" Cheetah "Cheetah.Version >= '2.0.0'" CHEETAH_FOUND)
+
+if(NOT PYTHON_MIN_VER_FOUND)
+ message(FATAL_ERROR "Python 2.5 or greater required to build VOLK")
+endif()
+
+if(NOT CHEETAH_FOUND)
+ message(FATAL_ERROR "Cheetah templates required to build VOLK")
+endif()
+
+if(MSVC)
+ if (NOT DEFINED BOOST_ALL_DYN_LINK)
+ set(BOOST_ALL_DYN_LINK TRUE)
+ endif()
+ set(BOOST_ALL_DYN_LINK "${BOOST_ALL_DYN_LINK}" CACHE BOOL "boost enable dynamic linking")
+ if(BOOST_ALL_DYN_LINK)
+ add_definitions(-DBOOST_ALL_DYN_LINK) #setup boost auto-linking in msvc
+ else(BOOST_ALL_DYN_LINK)
+ unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link
+ endif(BOOST_ALL_DYN_LINK)
+endif(MSVC)
+include(VolkBoost)
+
+if(NOT Boost_FOUND)
+ message(FATAL_ERROR "VOLK Requires boost to build")
+endif()
+
+option(ENABLE_ORC "Enable Orc" True)
+if(ENABLE_ORC)
+ find_package(ORC)
+else(ENABLE_ORC)
+ message(STATUS "Disabling use of ORC")
+endif(ENABLE_ORC)
+
+########################################################################
+# Setup the package config file
+########################################################################
+#set variables found in the pc.in file
+set(prefix ${CMAKE_INSTALL_PREFIX})
+set(exec_prefix "\${prefix}")
+set(libdir "\${exec_prefix}/lib${LIB_SUFFIX}")
+set(includedir "\${prefix}/include")
+
+configure_file(
+ ${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr.pc.in
+ ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.pc
+@ONLY)
+
+install(
+ FILES ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.pc
+ DESTINATION lib${LIB_SUFFIX}/pkgconfig
+ COMPONENT "volk_gnsssdr_devel"
+)
+
+########################################################################
+# Install all headers in the include directories
+########################################################################
+set(VOLK_RUNTIME_DIR bin)
+set(VOLK_LIBRARY_DIR lib${LIB_SUFFIX})
+set(VOLK_INCLUDE_DIR include)
+
+install(
+ DIRECTORY ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr
+ DESTINATION include COMPONENT "volk_gnsssdr_devel"
+ FILES_MATCHING PATTERN "*.h"
+)
+
+install(FILES
+ ${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_prefs.h
+ ${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_complex.h
+ ${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_common.h
+ ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h
+ ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h
+ ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h
+ ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_typedefs.h
+ ${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_malloc.h
+ DESTINATION include/volk_gnsssdr
+ COMPONENT "volk_gnsssdr_devel"
+)
+
+########################################################################
+# Install cmake search routine for external use
+########################################################################
+
+if(NOT CMAKE_MODULES_DIR)
+ set(CMAKE_MODULES_DIR lib${LIB_SUFFIX}/cmake)
+endif(NOT CMAKE_MODULES_DIR)
+
+install(
+ FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/VolkConfig.cmake
+ DESTINATION ${CMAKE_MODULES_DIR}/volk_gnsssdr
+ COMPONENT "volk_gnsssdr_devel"
+)
+
+########################################################################
+# On Apple only, set install name and use rpath correctly, if not already set
+########################################################################
+if(APPLE)
+ if(NOT CMAKE_INSTALL_NAME_DIR)
+ set(CMAKE_INSTALL_NAME_DIR
+ ${CMAKE_INSTALL_PREFIX}/${GR_LIBRARY_DIR} CACHE
+ PATH "Library Install Name Destination Directory" FORCE)
+ endif(NOT CMAKE_INSTALL_NAME_DIR)
+ if(NOT CMAKE_INSTALL_RPATH)
+ set(CMAKE_INSTALL_RPATH
+ ${CMAKE_INSTALL_PREFIX}/${GR_LIBRARY_DIR} CACHE
+ PATH "Library Install RPath" FORCE)
+ endif(NOT CMAKE_INSTALL_RPATH)
+ if(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
+ set(CMAKE_BUILD_WITH_INSTALL_RPATH ON CACHE
+ BOOL "Do Build Using Library Install RPath" FORCE)
+ endif(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
+endif(APPLE)
+
+########################################################################
+# Setup the library
+########################################################################
+add_subdirectory(lib)
+
+########################################################################
+# And the utility apps
+########################################################################
+add_subdirectory(apps)
+add_subdirectory(python/volk_gnsssdr_modtool)
+
+########################################################################
+# Print summary
+########################################################################
+message(STATUS "Using install prefix: ${CMAKE_INSTALL_PREFIX}")
diff --git a/src/algorithms/libs/volk_gnsssdr/apps/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr/apps/CMakeLists.txt
new file mode 100644
index 000000000..3158c4280
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/apps/CMakeLists.txt
@@ -0,0 +1,61 @@
+#
+# Copyright 2011-2013 Free Software Foundation, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+
+########################################################################
+# Setup profiler
+########################################################################
+if(Boost_FOUND)
+
+if(MSVC)
+ include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc)
+endif(MSVC)
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_CURRENT_BINARY_DIR}
+ ${CMAKE_SOURCE_DIR}/include
+ ${CMAKE_BINARY_DIR}/include
+ ${CMAKE_SOURCE_DIR}/lib
+ ${CMAKE_BINARY_DIR}/lib
+ ${Boost_INCLUDE_DIRS}
+)
+
+# MAKE volk_gnsssdr_profile
+add_executable(volk_gnsssdr_profile
+ ${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_profile.cc
+ ${CMAKE_SOURCE_DIR}/lib/qa_utils.cc
+)
+
+target_link_libraries(volk_gnsssdr_profile volk_gnsssdr ${Boost_LIBRARIES})
+
+install(
+ TARGETS volk_gnsssdr_profile
+ DESTINATION bin
+ COMPONENT "volk_gnsssdr"
+)
+
+# MAKE volk_gnsssdr-config-info
+add_executable(volk_gnsssdr-config-info volk_gnsssdr-config-info.cc)
+target_link_libraries(volk_gnsssdr-config-info volk_gnsssdr ${Boost_LIBRARIES})
+
+install(
+ TARGETS volk_gnsssdr-config-info
+ DESTINATION bin
+ COMPONENT "volk_gnsssdr"
+)
+
+endif(Boost_FOUND)
diff --git a/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr-config-info.cc b/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr-config-info.cc
new file mode 100644
index 000000000..ec8c09525
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr-config-info.cc
@@ -0,0 +1,96 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2013 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include
+#endif
+
+#include
+#include "volk_gnsssdr/volk_gnsssdr.h"
+#include
+#include
+
+namespace po = boost::program_options;
+
+int
+main(int argc, char **argv)
+{
+ po::options_description desc("Program options: volk_gnsssdr-config-info [options]");
+ po::variables_map vm;
+
+ desc.add_options()
+ ("help,h", "print help message")
+ ("prefix", "print VOLK installation prefix")
+ ("builddate", "print VOLK build date (RFC2822 format)")
+ ("cc", "print VOLK C compiler version")
+ ("cflags", "print VOLK CFLAGS")
+ ("all-machines", "print VOLK machines built into library")
+ ("avail-machines", "print VOLK machines the current platform can use")
+ ("machine", "print the VOLK machine that will be used")
+ ("version,v", "print VOLK version")
+ ;
+
+ try {
+ po::store(po::parse_command_line(argc, argv, desc), vm);
+ po::notify(vm);
+ }
+ catch (po::error& error){
+ std::cerr << "Error: " << error.what() << std::endl << std::endl;
+ std::cerr << desc << std::endl;
+ return 1;
+ }
+
+ if(vm.size() == 0 || vm.count("help")) {
+ std::cout << desc << std::endl;
+ return 1;
+ }
+
+ if(vm.count("prefix"))
+ std::cout << volk_gnsssdr_prefix() << std::endl;
+
+ if(vm.count("builddate"))
+ std::cout << volk_gnsssdr_build_date() << std::endl;
+
+ if(vm.count("version"))
+ std::cout << volk_gnsssdr_version() << std::endl;
+
+ if(vm.count("cc"))
+ std::cout << volk_gnsssdr_c_compiler() << std::endl;
+
+ if(vm.count("cflags"))
+ std::cout << volk_gnsssdr_compiler_flags() << std::endl;
+
+ // stick an extra ';' to make output of this and avail-machines the
+ // same structure for easier parsing
+ if(vm.count("all-machines"))
+ std::cout << volk_gnsssdr_available_machines() << ";" << std::endl;
+
+ if(vm.count("avail-machines")) {
+ volk_gnsssdr_list_machines();
+ }
+
+ if(vm.count("machine")) {
+ std::cout << volk_gnsssdr_get_machine() << std::endl;
+ }
+
+ return 0;
+}
diff --git a/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr_profile.cc b/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr_profile.cc
new file mode 100644
index 000000000..ed5c0d53d
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr_profile.cc
@@ -0,0 +1,239 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2012-2014 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "qa_utils.h"
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace fs = boost::filesystem;
+
+void write_json(std::ofstream &json_file, std::vector results) {
+ json_file << "{" << std::endl;
+ json_file << " \"volk_tests\": [" << std::endl;
+ size_t len = results.size();
+ size_t i = 0;
+ BOOST_FOREACH(volk_gnsssdr_test_results_t &result, results) {
+ json_file << " {" << std::endl;
+ json_file << " \"name\": \"" << result.name << "\"," << std::endl;
+ json_file << " \"vlen\": " << result.vlen << "," << std::endl;
+ json_file << " \"iter\": " << result.iter << "," << std::endl;
+ json_file << " \"best_arch_a\": \"" << result.best_arch_a
+ << "\"," << std::endl;
+ json_file << " \"best_arch_u\": \"" << result.best_arch_u
+ << "\"," << std::endl;
+ json_file << " \"results\": {" << std::endl;
+ size_t results_len = result.results.size();
+ size_t ri = 0;
+ typedef std::pair tpair;
+ BOOST_FOREACH(tpair pair, result.results) {
+ volk_gnsssdr_test_time_t time = pair.second;
+ json_file << " \"" << time.name << "\": {" << std::endl;
+ json_file << " \"name\": \"" << time.name << "\"," << std::endl;
+ json_file << " \"time\": " << time.time << "," << std::endl;
+ json_file << " \"units\": \"" << time.units << "\"" << std::endl;
+ json_file << " }" ;
+ if(ri+1 != results_len) {
+ json_file << ",";
+ }
+ json_file << std::endl;
+ ri++;
+ }
+ json_file << " }" << std::endl;
+ json_file << " }";
+ if(i+1 != len) {
+ json_file << ",";
+ }
+ json_file << std::endl;
+ i++;
+ }
+ json_file << " ]" << std::endl;
+ json_file << "}" << std::endl;
+}
+
+int main(int argc, char *argv[]) {
+ // Adding program options
+ boost::program_options::options_description desc("Options");
+ desc.add_options()
+ ("help,h", "Print help messages")
+ ("benchmark,b",
+ boost::program_options::value()->default_value( false )
+ ->implicit_value( true ),
+ "Run all kernels (benchmark mode)")
+ ("tests-regex,R",
+ boost::program_options::value(),
+ "Run tests matching regular expression.")
+ ("json,j",
+ boost::program_options::value(),
+ "JSON output file")
+ ;
+
+ // Handle the options that were given
+ boost::program_options::variables_map vm;
+ bool benchmark_mode;
+ std::string kernel_regex;
+ bool store_results = true;
+ std::ofstream json_file;
+
+ try {
+ boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
+ boost::program_options::notify(vm);
+ benchmark_mode = vm.count("benchmark")?vm["benchmark"].as():false;
+ if ( vm.count("tests-regex" ) ) {
+ kernel_regex = vm["tests-regex"].as();
+ store_results = false;
+ std::cout << "Warning: using a regexp will not save results to a config" << std::endl;
+ }
+ else {
+ kernel_regex = ".*";
+ store_results = true;
+ }
+ } catch (boost::program_options::error& error) {
+ std::cerr << "Error: " << error.what() << std::endl << std::endl;
+ std::cerr << desc << std::endl;
+ return 1;
+ }
+ /** --help option
+ */
+ if ( vm.count("help") )
+ {
+ std::cout << "The VOLK profiler." << std::endl
+ << desc << std::endl;
+ return 0;
+ }
+
+ if ( vm.count("json") )
+ {
+ json_file.open( vm["json"].as().c_str() );
+ }
+
+
+ // Run tests
+ std::vector results;
+
+ //VOLK_PROFILE(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_16i_max_star_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_32u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_64u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, lv_32fc_t(1.0, 0.5), 204602, 1000, &results, benchmark_mode, kernel_regex);
+
+ //GNSS-SDR PROTO-KERNELS
+ //lv_32fc_t sfv = lv_cmake((float)1, (float)2);
+ //example: VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, sfv, 204602, 1000, &results, benchmark_mode, kernel_regex);
+
+ //CAN NOT BE TESTED YET BECAUSE VOLK MODULE DOES NOT SUPPORT IT:
+ //VOLK_PROFILE(volk_gnsssdr_s32f_x2_update_local_carrier_32fc, 1e-4, 0, 16007, 1, &results, benchmark_mode, kernel_regex);
+ //VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc, 1e-4, 0, 7, 1, &results, benchmark_mode, kernel_regex);
+
+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_TEST_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+
+ VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+
+ VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+
+ VOLK_PROFILE(volk_gnsssdr_32fc_convert_16ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_convert_8ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_s32f_convert_8ic, 1e-4, 5, 16000, 250, &results, benchmark_mode, kernel_regex);
+
+ /*VOLK_PROFILE(volk_gnsssdr_32f_accumulator_s32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32f_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8i_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8i_max_s8i, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32f_x2_add_32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_conjugate_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_magnitude_squared_32f, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_x2_dot_prod_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32fc_x2_multiply_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_64f_accumulator_64f, 1e-4, 0, 16000, 1000, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_32f_s32f_convert_16i, 1e-4, 1, 204602, 250, &results, benchmark_mode, kernel_regex);
+ VOLK_PROFILE(volk_gnsssdr_16i_s32f_convert_32f, 1e-4, 1, 204602, 250, &results, benchmark_mode, kernel_regex);*/
+
+ // Until we can update the config on a kernel by kernel basis
+ // do not overwrite volk_config when using a regex.
+ if(store_results) {
+ char path[1024];
+ volk_gnsssdr_get_config_path(path);
+
+ const fs::path config_path(path);
+
+ if (not fs::exists(config_path.branch_path()))
+ {
+ std::cout << "Creating " << config_path.branch_path() << "..." << std::endl;
+ fs::create_directories(config_path.branch_path());
+ }
+
+ std::cout << "Writing " << config_path << "..." << std::endl;
+ std::ofstream config(config_path.string().c_str());
+ if(!config.is_open()) { //either we don't have write access or we don't have the dir yet
+ std::cout << "Error opening file " << config_path << std::endl;
+ }
+
+ config << "\
+ #this file is generated by volk_profile.\n\
+ #the function name is followed by the preferred architecture.\n\
+ ";
+
+ BOOST_FOREACH(volk_gnsssdr_test_results_t result, results) {
+ config << result.config_name << " "
+ << result.best_arch_a << " "
+ << result.best_arch_u << std::endl;
+ }
+ config.close();
+ }
+ else {
+ std::cout << "Warning: config not generated" << std::endl;
+ }
+}
diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/CMakeParseArgumentsCopy.cmake b/src/algorithms/libs/volk_gnsssdr/cmake/CMakeParseArgumentsCopy.cmake
new file mode 100644
index 000000000..7ce4c49ae
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/cmake/CMakeParseArgumentsCopy.cmake
@@ -0,0 +1,138 @@
+# CMAKE_PARSE_ARGUMENTS( args...)
+#
+# CMAKE_PARSE_ARGUMENTS() is intended to be used in macros or functions for
+# parsing the arguments given to that macro or function.
+# It processes the arguments and defines a set of variables which hold the
+# values of the respective options.
+#
+# The argument contains all options for the respective macro,
+# i.e. keywords which can be used when calling the macro without any value
+# following, like e.g. the OPTIONAL keyword of the install() command.
+#
+# The argument contains all keywords for this macro
+# which are followed by one value, like e.g. DESTINATION keyword of the
+# install() command.
+#
+# The argument contains all keywords for this macro
+# which can be followed by more than one value, like e.g. the TARGETS or
+# FILES keywords of the install() command.
+#
+# When done, CMAKE_PARSE_ARGUMENTS() will have defined for each of the
+# keywords listed in , and
+# a variable composed of the given
+# followed by "_" and the name of the respective keyword.
+# These variables will then hold the respective value from the argument list.
+# For the keywords this will be TRUE or FALSE.
+#
+# All remaining arguments are collected in a variable
+# _UNPARSED_ARGUMENTS, this can be checked afterwards to see whether
+# your macro was called with unrecognized parameters.
+#
+# As an example here a my_install() macro, which takes similar arguments as the
+# real install() command:
+#
+# function(MY_INSTALL)
+# set(options OPTIONAL FAST)
+# set(oneValueArgs DESTINATION RENAME)
+# set(multiValueArgs TARGETS CONFIGURATIONS)
+# cmake_parse_arguments(MY_INSTALL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
+# ...
+#
+# Assume my_install() has been called like this:
+# my_install(TARGETS foo bar DESTINATION bin OPTIONAL blub)
+#
+# After the cmake_parse_arguments() call the macro will have set the following
+# variables:
+# MY_INSTALL_OPTIONAL = TRUE
+# MY_INSTALL_FAST = FALSE (this option was not used when calling my_install()
+# MY_INSTALL_DESTINATION = "bin"
+# MY_INSTALL_RENAME = "" (was not used)
+# MY_INSTALL_TARGETS = "foo;bar"
+# MY_INSTALL_CONFIGURATIONS = "" (was not used)
+# MY_INSTALL_UNPARSED_ARGUMENTS = "blub" (no value expected after "OPTIONAL"
+#
+# You can the continue and process these variables.
+#
+# Keywords terminate lists of values, e.g. if directly after a one_value_keyword
+# another recognized keyword follows, this is interpreted as the beginning of
+# the new option.
+# E.g. my_install(TARGETS foo DESTINATION OPTIONAL) would result in
+# MY_INSTALL_DESTINATION set to "OPTIONAL", but MY_INSTALL_DESTINATION would
+# be empty and MY_INSTALL_OPTIONAL would be set to TRUE therefor.
+
+#=============================================================================
+# Copyright 2010 Alexander Neundorf
+#
+# Distributed under the OSI-approved BSD License (the "License");
+# see accompanying file Copyright.txt for details.
+#
+# This software is distributed WITHOUT ANY WARRANTY; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the License for more information.
+#=============================================================================
+# (To distribute this file outside of CMake, substitute the full
+# License text for the above reference.)
+
+
+if(__CMAKE_PARSE_ARGUMENTS_INCLUDED)
+ return()
+endif()
+set(__CMAKE_PARSE_ARGUMENTS_INCLUDED TRUE)
+
+
+function(CMAKE_PARSE_ARGUMENTS prefix _optionNames _singleArgNames _multiArgNames)
+ # first set all result variables to empty/FALSE
+ foreach(arg_name ${_singleArgNames} ${_multiArgNames})
+ set(${prefix}_${arg_name})
+ endforeach(arg_name)
+
+ foreach(option ${_optionNames})
+ set(${prefix}_${option} FALSE)
+ endforeach(option)
+
+ set(${prefix}_UNPARSED_ARGUMENTS)
+
+ set(insideValues FALSE)
+ set(currentArgName)
+
+ # now iterate over all arguments and fill the result variables
+ foreach(currentArg ${ARGN})
+ list(FIND _optionNames "${currentArg}" optionIndex) # ... then this marks the end of the arguments belonging to this keyword
+ list(FIND _singleArgNames "${currentArg}" singleArgIndex) # ... then this marks the end of the arguments belonging to this keyword
+ list(FIND _multiArgNames "${currentArg}" multiArgIndex) # ... then this marks the end of the arguments belonging to this keyword
+
+ if(${optionIndex} EQUAL -1 AND ${singleArgIndex} EQUAL -1 AND ${multiArgIndex} EQUAL -1)
+ if(insideValues)
+ if("${insideValues}" STREQUAL "SINGLE")
+ set(${prefix}_${currentArgName} ${currentArg})
+ set(insideValues FALSE)
+ elseif("${insideValues}" STREQUAL "MULTI")
+ list(APPEND ${prefix}_${currentArgName} ${currentArg})
+ endif()
+ else(insideValues)
+ list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg})
+ endif(insideValues)
+ else()
+ if(NOT ${optionIndex} EQUAL -1)
+ set(${prefix}_${currentArg} TRUE)
+ set(insideValues FALSE)
+ elseif(NOT ${singleArgIndex} EQUAL -1)
+ set(currentArgName ${currentArg})
+ set(${prefix}_${currentArgName})
+ set(insideValues "SINGLE")
+ elseif(NOT ${multiArgIndex} EQUAL -1)
+ set(currentArgName ${currentArg})
+ set(${prefix}_${currentArgName})
+ set(insideValues "MULTI")
+ endif()
+ endif()
+
+ endforeach(currentArg)
+
+ # propagate the result variables to the caller:
+ foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames})
+ set(${prefix}_${arg_name} ${${prefix}_${arg_name}} PARENT_SCOPE)
+ endforeach(arg_name)
+ set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} PARENT_SCOPE)
+
+endfunction(CMAKE_PARSE_ARGUMENTS _options _singleArgs _multiArgs)
diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/FindORC.cmake b/src/algorithms/libs/volk_gnsssdr/cmake/FindORC.cmake
new file mode 100644
index 000000000..f21513f72
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/cmake/FindORC.cmake
@@ -0,0 +1,36 @@
+FIND_PACKAGE(PkgConfig)
+PKG_CHECK_MODULES(PC_ORC "orc-0.4 > 0.4.11")
+
+
+
+
+FIND_PROGRAM(ORCC_EXECUTABLE orcc
+ HINTS ${PC_ORC_TOOLSDIR}
+ PATHS ${ORC_ROOT}/bin ${CMAKE_INSTALL_PREFIX}/bin)
+
+FIND_PATH(ORC_INCLUDE_DIR NAMES orc/orc.h
+ HINTS ${PC_ORC_INCLUDEDIR}
+ PATHS ${ORC_ROOT}/include/orc-0.4 ${CMAKE_INSTALL_PREFIX}/include/orc-0.4)
+
+
+FIND_PATH(ORC_LIBRARY_DIR NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}orc-0.4${CMAKE_SHARED_LIBRARY_SUFFIX}
+ HINTS ${PC_ORC_LIBDIR}
+ PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX})
+
+FIND_LIBRARY(ORC_LIB orc-0.4
+ HINTS ${PC_ORC_LIBRARY_DIRS}
+ PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX})
+
+LIST(APPEND ORC_LIBRARY
+ ${ORC_LIB}
+)
+
+
+SET(ORC_INCLUDE_DIRS ${ORC_INCLUDE_DIR})
+SET(ORC_LIBRARIES ${ORC_LIBRARY})
+SET(ORC_LIBRARY_DIRS ${ORC_LIBRARY_DIR})
+
+INCLUDE(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(ORC "orc files" ORC_LIBRARY ORC_INCLUDE_DIR ORCC_EXECUTABLE)
+
+mark_as_advanced(ORC_INCLUDE_DIR ORC_LIBRARY ORCC_EXECUTABLE)
diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/GrPython.cmake b/src/algorithms/libs/volk_gnsssdr/cmake/GrPython.cmake
new file mode 100644
index 000000000..b7b561b7b
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/cmake/GrPython.cmake
@@ -0,0 +1,234 @@
+# Copyright 2010-2011,2013 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+
+if(DEFINED __INCLUDED_VOLK_PYTHON_CMAKE)
+ return()
+endif()
+set(__INCLUDED_VOLK_PYTHON_CMAKE TRUE)
+
+########################################################################
+# Setup the python interpreter:
+# This allows the user to specify a specific interpreter,
+# or finds the interpreter via the built-in cmake module.
+########################################################################
+#this allows the user to override PYTHON_EXECUTABLE
+if(PYTHON_EXECUTABLE)
+
+ set(PYTHONINTERP_FOUND TRUE)
+
+#otherwise if not set, try to automatically find it
+else(PYTHON_EXECUTABLE)
+
+ #use the built-in find script
+ find_package(PythonInterp 2)
+
+ #and if that fails use the find program routine
+ if(NOT PYTHONINTERP_FOUND)
+ find_program(PYTHON_EXECUTABLE NAMES python python2 python2.7 python2.6 python2.5)
+ if(PYTHON_EXECUTABLE)
+ set(PYTHONINTERP_FOUND TRUE)
+ endif(PYTHON_EXECUTABLE)
+ endif(NOT PYTHONINTERP_FOUND)
+
+endif(PYTHON_EXECUTABLE)
+
+#make the path to the executable appear in the cmake gui
+set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} CACHE FILEPATH "python interpreter")
+
+#make sure we can use -B with python (introduced in 2.6)
+if(PYTHON_EXECUTABLE)
+ execute_process(
+ COMMAND ${PYTHON_EXECUTABLE} -B -c ""
+ OUTPUT_QUIET ERROR_QUIET
+ RESULT_VARIABLE PYTHON_HAS_DASH_B_RESULT
+ )
+ if(PYTHON_HAS_DASH_B_RESULT EQUAL 0)
+ set(PYTHON_DASH_B "-B")
+ endif()
+endif(PYTHON_EXECUTABLE)
+
+########################################################################
+# Check for the existence of a python module:
+# - desc a string description of the check
+# - mod the name of the module to import
+# - cmd an additional command to run
+# - have the result variable to set
+########################################################################
+macro(VOLK_PYTHON_CHECK_MODULE desc mod cmd have)
+ message(STATUS "")
+ message(STATUS "Python checking for ${desc}")
+ execute_process(
+ COMMAND ${PYTHON_EXECUTABLE} -c "
+#########################################
+try: import ${mod}
+except:
+ try: ${mod}
+ except: exit(-1)
+try: assert ${cmd}
+except: exit(-1)
+#########################################"
+ RESULT_VARIABLE ${have}
+ )
+ if(${have} EQUAL 0)
+ message(STATUS "Python checking for ${desc} - found")
+ set(${have} TRUE)
+ else(${have} EQUAL 0)
+ message(STATUS "Python checking for ${desc} - not found")
+ set(${have} FALSE)
+ endif(${have} EQUAL 0)
+endmacro(VOLK_PYTHON_CHECK_MODULE)
+
+########################################################################
+# Sets the python installation directory VOLK_PYTHON_DIR
+########################################################################
+execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "
+from distutils import sysconfig
+print sysconfig.get_python_lib(plat_specific=True, prefix='')
+" OUTPUT_VARIABLE VOLK_PYTHON_DIR OUTPUT_STRIP_TRAILING_WHITESPACE
+)
+file(TO_CMAKE_PATH ${VOLK_PYTHON_DIR} VOLK_PYTHON_DIR)
+
+########################################################################
+# Create an always-built target with a unique name
+# Usage: VOLK_UNIQUE_TARGET( )
+########################################################################
+function(VOLK_UNIQUE_TARGET desc)
+ file(RELATIVE_PATH reldir ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+ execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import re, hashlib
+unique = hashlib.md5('${reldir}${ARGN}').hexdigest()[:5]
+print(re.sub('\\W', '_', '${desc} ${reldir} ' + unique))"
+ OUTPUT_VARIABLE _target OUTPUT_STRIP_TRAILING_WHITESPACE)
+ add_custom_target(${_target} ALL DEPENDS ${ARGN})
+endfunction(VOLK_UNIQUE_TARGET)
+
+########################################################################
+# Install python sources (also builds and installs byte-compiled python)
+########################################################################
+function(VOLK_PYTHON_INSTALL)
+ include(CMakeParseArgumentsCopy)
+ CMAKE_PARSE_ARGUMENTS(VOLK_PYTHON_INSTALL "" "DESTINATION;COMPONENT" "FILES;PROGRAMS" ${ARGN})
+
+ ####################################################################
+ if(VOLK_PYTHON_INSTALL_FILES)
+ ####################################################################
+ install(${ARGN}) #installs regular python files
+
+ #create a list of all generated files
+ unset(pysrcfiles)
+ unset(pycfiles)
+ unset(pyofiles)
+ foreach(pyfile ${VOLK_PYTHON_INSTALL_FILES})
+ get_filename_component(pyfile ${pyfile} ABSOLUTE)
+ list(APPEND pysrcfiles ${pyfile})
+
+ #determine if this file is in the source or binary directory
+ file(RELATIVE_PATH source_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${pyfile})
+ string(LENGTH "${source_rel_path}" source_rel_path_len)
+ file(RELATIVE_PATH binary_rel_path ${CMAKE_CURRENT_BINARY_DIR} ${pyfile})
+ string(LENGTH "${binary_rel_path}" binary_rel_path_len)
+
+ #and set the generated path appropriately
+ if(${source_rel_path_len} GREATER ${binary_rel_path_len})
+ set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${binary_rel_path})
+ else()
+ set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${source_rel_path})
+ endif()
+ list(APPEND pycfiles ${pygenfile}c)
+ list(APPEND pyofiles ${pygenfile}o)
+
+ #ensure generation path exists
+ get_filename_component(pygen_path ${pygenfile} PATH)
+ file(MAKE_DIRECTORY ${pygen_path})
+
+ endforeach(pyfile)
+
+ #the command to generate the pyc files
+ add_custom_command(
+ DEPENDS ${pysrcfiles} OUTPUT ${pycfiles}
+ COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pycfiles}
+ )
+
+ #the command to generate the pyo files
+ add_custom_command(
+ DEPENDS ${pysrcfiles} OUTPUT ${pyofiles}
+ COMMAND ${PYTHON_EXECUTABLE} -O ${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pyofiles}
+ )
+
+ #create install rule and add generated files to target list
+ set(python_install_gen_targets ${pycfiles} ${pyofiles})
+ install(FILES ${python_install_gen_targets}
+ DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION}
+ COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT}
+ )
+
+
+ ####################################################################
+ elseif(VOLK_PYTHON_INSTALL_PROGRAMS)
+ ####################################################################
+ file(TO_NATIVE_PATH ${PYTHON_EXECUTABLE} pyexe_native)
+
+ if (CMAKE_CROSSCOMPILING)
+ set(pyexe_native "/usr/bin/env python")
+ endif()
+
+ foreach(pyfile ${VOLK_PYTHON_INSTALL_PROGRAMS})
+ get_filename_component(pyfile_name ${pyfile} NAME)
+ get_filename_component(pyfile ${pyfile} ABSOLUTE)
+ string(REPLACE "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" pyexefile "${pyfile}.exe")
+ list(APPEND python_install_gen_targets ${pyexefile})
+
+ get_filename_component(pyexefile_path ${pyexefile} PATH)
+ file(MAKE_DIRECTORY ${pyexefile_path})
+
+ add_custom_command(
+ OUTPUT ${pyexefile} DEPENDS ${pyfile}
+ COMMAND ${PYTHON_EXECUTABLE} -c
+ "open('${pyexefile}','w').write('\#!${pyexe_native}\\n'+open('${pyfile}').read())"
+ COMMENT "Shebangin ${pyfile_name}"
+ VERBATIM
+ )
+
+ #on windows, python files need an extension to execute
+ get_filename_component(pyfile_ext ${pyfile} EXT)
+ if(WIN32 AND NOT pyfile_ext)
+ set(pyfile_name "${pyfile_name}.py")
+ endif()
+
+ install(PROGRAMS ${pyexefile} RENAME ${pyfile_name}
+ DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION}
+ COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT}
+ )
+ endforeach(pyfile)
+
+ endif()
+
+ VOLK_UNIQUE_TARGET("pygen" ${python_install_gen_targets})
+
+endfunction(VOLK_PYTHON_INSTALL)
+
+########################################################################
+# Write the python helper script that generates byte code files
+########################################################################
+file(WRITE ${CMAKE_BINARY_DIR}/python_compile_helper.py "
+import sys, py_compile
+files = sys.argv[1:]
+srcs, gens = files[:len(files)/2], files[len(files)/2:]
+for src, gen in zip(srcs, gens):
+ py_compile.compile(file=src, cfile=gen, doraise=True)
+")
diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/VolkBoost.cmake b/src/algorithms/libs/volk_gnsssdr/cmake/VolkBoost.cmake
new file mode 100644
index 000000000..318820e10
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/cmake/VolkBoost.cmake
@@ -0,0 +1,98 @@
+# Copyright 2010-2011 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+
+if(DEFINED __INCLUDED_VOLK_BOOST_CMAKE)
+ return()
+endif()
+set(__INCLUDED_VOLK_BOOST_CMAKE TRUE)
+
+########################################################################
+# Setup Boost and handle some system specific things
+########################################################################
+
+set(BOOST_REQUIRED_COMPONENTS
+ filesystem
+ system
+ unit_test_framework
+ program_options
+)
+
+if(UNIX AND NOT BOOST_ROOT AND EXISTS "/usr/lib64")
+ list(APPEND BOOST_LIBRARYDIR "/usr/lib64") #fedora 64-bit fix
+endif(UNIX AND NOT BOOST_ROOT AND EXISTS "/usr/lib64")
+
+if(MSVC)
+ set(BOOST_REQUIRED_COMPONENTS ${BOOST_REQUIRED_COMPONENTS} chrono)
+
+ if (NOT DEFINED BOOST_ALL_DYN_LINK)
+ set(BOOST_ALL_DYN_LINK TRUE)
+ endif()
+ set(BOOST_ALL_DYN_LINK "${BOOST_ALL_DYN_LINK}" CACHE BOOL "boost enable dynamic linking")
+ if(BOOST_ALL_DYN_LINK)
+ add_definitions(-DBOOST_ALL_DYN_LINK) #setup boost auto-linking in msvc
+ else(BOOST_ALL_DYN_LINK)
+ unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link
+ endif(BOOST_ALL_DYN_LINK)
+endif(MSVC)
+
+find_package(Boost "1.35" COMPONENTS ${BOOST_REQUIRED_COMPONENTS})
+
+# This does not allow us to disable specific versions. It is used
+# internally by cmake to know the formation newer versions. As newer
+# Boost version beyond what is shown here are produced, we must extend
+# this list. To disable Boost versions, see below.
+set(Boost_ADDITIONAL_VERSIONS
+ "1.35.0" "1.35" "1.36.0" "1.36" "1.37.0" "1.37" "1.38.0" "1.38" "1.39.0" "1.39"
+ "1.40.0" "1.40" "1.41.0" "1.41" "1.42.0" "1.42" "1.43.0" "1.43" "1.44.0" "1.44"
+ "1.45.0" "1.45" "1.46.0" "1.46" "1.47.0" "1.47" "1.48.0" "1.48" "1.49.0" "1.49"
+ "1.50.0" "1.50" "1.51.0" "1.51" "1.52.0" "1.52" "1.53.0" "1.53" "1.54.0" "1.54"
+ "1.55.0" "1.55" "1.56.0" "1.56" "1.57.0" "1.57" "1.58.0" "1.58" "1.59.0" "1.59"
+ "1.60.0" "1.60" "1.61.0" "1.61" "1.62.0" "1.62" "1.63.0" "1.63" "1.64.0" "1.64"
+ "1.65.0" "1.65" "1.66.0" "1.66" "1.67.0" "1.67" "1.68.0" "1.68" "1.69.0" "1.69"
+)
+
+# Boost 1.52 disabled, see https://svn.boost.org/trac/boost/ticket/7669
+# Similar problems with Boost 1.46 and 1.47.
+
+OPTION(ENABLE_BAD_BOOST "Enable known bad versions of Boost" OFF)
+if(ENABLE_BAD_BOOST)
+ MESSAGE(STATUS "Enabling use of known bad versions of Boost.")
+endif(ENABLE_BAD_BOOST)
+
+# For any unsuitable Boost version, add the version number below in
+# the following format: XXYYZZ
+# Where:
+# XX is the major version ('10' for version 1)
+# YY is the minor version number ('46' for 1.46)
+# ZZ is the patcher version number (typically just '00')
+set(Boost_NOGO_VERSIONS
+ 104600 104601 104700 105200
+ )
+
+foreach(ver ${Boost_NOGO_VERSIONS})
+ if(${Boost_VERSION} EQUAL ${ver})
+ if(NOT ENABLE_BAD_BOOST)
+ MESSAGE(STATUS "WARNING: Found a known bad version of Boost (v${Boost_VERSION}). Disabling.")
+ set(Boost_FOUND FALSE)
+ else(NOT ENABLE_BAD_BOOST)
+ MESSAGE(STATUS "WARNING: Found a known bad version of Boost (v${Boost_VERSION}). Continuing anyway.")
+ set(Boost_FOUND TRUE)
+ endif(NOT ENABLE_BAD_BOOST)
+ endif(${Boost_VERSION} EQUAL ${ver})
+endforeach(ver)
diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/VolkConfig.cmake b/src/algorithms/libs/volk_gnsssdr/cmake/VolkConfig.cmake
new file mode 100644
index 000000000..7d58b1923
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/cmake/VolkConfig.cmake
@@ -0,0 +1,26 @@
+INCLUDE(FindPkgConfig)
+PKG_CHECK_MODULES(PC_VOLK volk_gnsssdr)
+
+FIND_PATH(
+ VOLK_INCLUDE_DIRS
+ NAMES volk_gnsssdr/volk_gnsssdr.h
+ HINTS $ENV{VOLK_DIR}/include
+ ${PC_VOLK_INCLUDEDIR}
+ PATHS /usr/local/include
+ /usr/include
+)
+
+FIND_LIBRARY(
+ VOLK_LIBRARIES
+ NAMES volk_gnsssdr
+ HINTS $ENV{VOLK_DIR}/lib
+ ${PC_VOLK_LIBDIR}
+ PATHS /usr/local/lib
+ /usr/local/lib64
+ /usr/lib
+ /usr/lib64
+)
+
+INCLUDE(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
+MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/msvc/config.h b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/config.h
new file mode 100644
index 000000000..43792c783
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/config.h
@@ -0,0 +1,58 @@
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_CONFIG_H_ // [
+#define _MSC_CONFIG_H_
+
+////////////////////////////////////////////////////////////////////////
+// enable inline functions for C code
+////////////////////////////////////////////////////////////////////////
+#ifndef __cplusplus
+# define inline __inline
+#endif
+
+////////////////////////////////////////////////////////////////////////
+// signed size_t
+////////////////////////////////////////////////////////////////////////
+#include
+typedef ptrdiff_t ssize_t;
+
+////////////////////////////////////////////////////////////////////////
+// rint functions
+////////////////////////////////////////////////////////////////////////
+#include
+static inline long lrint(double x){return (long)(x > 0.0 ? x + 0.5 : x - 0.5);}
+static inline long lrintf(float x){return (long)(x > 0.0f ? x + 0.5f : x - 0.5f);}
+static inline long long llrint(double x){return (long long)(x > 0.0 ? x + 0.5 : x - 0.5);}
+static inline long long llrintf(float x){return (long long)(x > 0.0f ? x + 0.5f : x - 0.5f);}
+static inline double rint(double x){return (x > 0.0)? floor(x + 0.5) : ceil(x - 0.5);}
+static inline float rintf(float x){return (x > 0.0f)? floorf(x + 0.5f) : ceilf(x - 0.5f);}
+
+////////////////////////////////////////////////////////////////////////
+// math constants
+////////////////////////////////////////////////////////////////////////
+#define INFINITY HUGE_VAL
+
+# define M_E 2.7182818284590452354 /* e */
+# define M_LOG2E 1.4426950408889634074 /* log_2 e */
+# define M_LOG10E 0.43429448190325182765 /* log_10 e */
+# define M_LN2 0.69314718055994530942 /* log_e 2 */
+# define M_LN10 2.30258509299404568402 /* log_e 10 */
+# define M_PI 3.14159265358979323846 /* pi */
+# define M_PI_2 1.57079632679489661923 /* pi/2 */
+# define M_PI_4 0.78539816339744830962 /* pi/4 */
+# define M_1_PI 0.31830988618379067154 /* 1/pi */
+# define M_2_PI 0.63661977236758134308 /* 2/pi */
+# define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
+# define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
+# define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
+
+////////////////////////////////////////////////////////////////////////
+// random and srandom
+////////////////////////////////////////////////////////////////////////
+#include
+static inline long int random (void) { return rand(); }
+static inline void srandom (unsigned int seed) { srand(seed); }
+
+#endif // _MSC_CONFIG_H_ ]
diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/msvc/inttypes.h b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/inttypes.h
new file mode 100644
index 000000000..0a1b60fc1
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/inttypes.h
@@ -0,0 +1,301 @@
+// ISO C9x compliant inttypes.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
+// Copyright (c) 2006 Alexander Chemeris
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. The name of the author may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_INTTYPES_H_ // [
+#define _MSC_INTTYPES_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include
+
+// 7.8 Format conversion of integer types
+
+typedef struct {
+ intmax_t quot;
+ intmax_t rem;
+} imaxdiv_t;
+
+// 7.8.1 Macros for format specifiers
+
+// The fprintf macros for signed integers are:
+#define PRId8 "d"
+#define PRIi8 "i"
+#define PRIdLEAST8 "d"
+#define PRIiLEAST8 "i"
+#define PRIdFAST8 "d"
+#define PRIiFAST8 "i"
+
+#define PRId16 "hd"
+#define PRIi16 "hi"
+#define PRIdLEAST16 "hd"
+#define PRIiLEAST16 "hi"
+#define PRIdFAST16 "hd"
+#define PRIiFAST16 "hi"
+
+#define PRId32 "I32d"
+#define PRIi32 "I32i"
+#define PRIdLEAST32 "I32d"
+#define PRIiLEAST32 "I32i"
+#define PRIdFAST32 "I32d"
+#define PRIiFAST32 "I32i"
+
+#define PRId64 "I64d"
+#define PRIi64 "I64i"
+#define PRIdLEAST64 "I64d"
+#define PRIiLEAST64 "I64i"
+#define PRIdFAST64 "I64d"
+#define PRIiFAST64 "I64i"
+
+#define PRIdMAX "I64d"
+#define PRIiMAX "I64i"
+
+#define PRIdPTR "Id"
+#define PRIiPTR "Ii"
+
+// The fprintf macros for unsigned integers are:
+#define PRIo8 "o"
+#define PRIu8 "u"
+#define PRIx8 "x"
+#define PRIX8 "X"
+#define PRIoLEAST8 "o"
+#define PRIuLEAST8 "u"
+#define PRIxLEAST8 "x"
+#define PRIXLEAST8 "X"
+#define PRIoFAST8 "o"
+#define PRIuFAST8 "u"
+#define PRIxFAST8 "x"
+#define PRIXFAST8 "X"
+
+#define PRIo16 "ho"
+#define PRIu16 "hu"
+#define PRIx16 "hx"
+#define PRIX16 "hX"
+#define PRIoLEAST16 "ho"
+#define PRIuLEAST16 "hu"
+#define PRIxLEAST16 "hx"
+#define PRIXLEAST16 "hX"
+#define PRIoFAST16 "ho"
+#define PRIuFAST16 "hu"
+#define PRIxFAST16 "hx"
+#define PRIXFAST16 "hX"
+
+#define PRIo32 "I32o"
+#define PRIu32 "I32u"
+#define PRIx32 "I32x"
+#define PRIX32 "I32X"
+#define PRIoLEAST32 "I32o"
+#define PRIuLEAST32 "I32u"
+#define PRIxLEAST32 "I32x"
+#define PRIXLEAST32 "I32X"
+#define PRIoFAST32 "I32o"
+#define PRIuFAST32 "I32u"
+#define PRIxFAST32 "I32x"
+#define PRIXFAST32 "I32X"
+
+#define PRIo64 "I64o"
+#define PRIu64 "I64u"
+#define PRIx64 "I64x"
+#define PRIX64 "I64X"
+#define PRIoLEAST64 "I64o"
+#define PRIuLEAST64 "I64u"
+#define PRIxLEAST64 "I64x"
+#define PRIXLEAST64 "I64X"
+#define PRIoFAST64 "I64o"
+#define PRIuFAST64 "I64u"
+#define PRIxFAST64 "I64x"
+#define PRIXFAST64 "I64X"
+
+#define PRIoMAX "I64o"
+#define PRIuMAX "I64u"
+#define PRIxMAX "I64x"
+#define PRIXMAX "I64X"
+
+#define PRIoPTR "Io"
+#define PRIuPTR "Iu"
+#define PRIxPTR "Ix"
+#define PRIXPTR "IX"
+
+// The fscanf macros for signed integers are:
+#define SCNd8 "d"
+#define SCNi8 "i"
+#define SCNdLEAST8 "d"
+#define SCNiLEAST8 "i"
+#define SCNdFAST8 "d"
+#define SCNiFAST8 "i"
+
+#define SCNd16 "hd"
+#define SCNi16 "hi"
+#define SCNdLEAST16 "hd"
+#define SCNiLEAST16 "hi"
+#define SCNdFAST16 "hd"
+#define SCNiFAST16 "hi"
+
+#define SCNd32 "ld"
+#define SCNi32 "li"
+#define SCNdLEAST32 "ld"
+#define SCNiLEAST32 "li"
+#define SCNdFAST32 "ld"
+#define SCNiFAST32 "li"
+
+#define SCNd64 "I64d"
+#define SCNi64 "I64i"
+#define SCNdLEAST64 "I64d"
+#define SCNiLEAST64 "I64i"
+#define SCNdFAST64 "I64d"
+#define SCNiFAST64 "I64i"
+
+#define SCNdMAX "I64d"
+#define SCNiMAX "I64i"
+
+#ifdef _WIN64 // [
+# define SCNdPTR "I64d"
+# define SCNiPTR "I64i"
+#else // _WIN64 ][
+# define SCNdPTR "ld"
+# define SCNiPTR "li"
+#endif // _WIN64 ]
+
+// The fscanf macros for unsigned integers are:
+#define SCNo8 "o"
+#define SCNu8 "u"
+#define SCNx8 "x"
+#define SCNX8 "X"
+#define SCNoLEAST8 "o"
+#define SCNuLEAST8 "u"
+#define SCNxLEAST8 "x"
+#define SCNXLEAST8 "X"
+#define SCNoFAST8 "o"
+#define SCNuFAST8 "u"
+#define SCNxFAST8 "x"
+#define SCNXFAST8 "X"
+
+#define SCNo16 "ho"
+#define SCNu16 "hu"
+#define SCNx16 "hx"
+#define SCNX16 "hX"
+#define SCNoLEAST16 "ho"
+#define SCNuLEAST16 "hu"
+#define SCNxLEAST16 "hx"
+#define SCNXLEAST16 "hX"
+#define SCNoFAST16 "ho"
+#define SCNuFAST16 "hu"
+#define SCNxFAST16 "hx"
+#define SCNXFAST16 "hX"
+
+#define SCNo32 "lo"
+#define SCNu32 "lu"
+#define SCNx32 "lx"
+#define SCNX32 "lX"
+#define SCNoLEAST32 "lo"
+#define SCNuLEAST32 "lu"
+#define SCNxLEAST32 "lx"
+#define SCNXLEAST32 "lX"
+#define SCNoFAST32 "lo"
+#define SCNuFAST32 "lu"
+#define SCNxFAST32 "lx"
+#define SCNXFAST32 "lX"
+
+#define SCNo64 "I64o"
+#define SCNu64 "I64u"
+#define SCNx64 "I64x"
+#define SCNX64 "I64X"
+#define SCNoLEAST64 "I64o"
+#define SCNuLEAST64 "I64u"
+#define SCNxLEAST64 "I64x"
+#define SCNXLEAST64 "I64X"
+#define SCNoFAST64 "I64o"
+#define SCNuFAST64 "I64u"
+#define SCNxFAST64 "I64x"
+#define SCNXFAST64 "I64X"
+
+#define SCNoMAX "I64o"
+#define SCNuMAX "I64u"
+#define SCNxMAX "I64x"
+#define SCNXMAX "I64X"
+
+#ifdef _WIN64 // [
+# define SCNoPTR "I64o"
+# define SCNuPTR "I64u"
+# define SCNxPTR "I64x"
+# define SCNXPTR "I64X"
+#else // _WIN64 ][
+# define SCNoPTR "lo"
+# define SCNuPTR "lu"
+# define SCNxPTR "lx"
+# define SCNXPTR "lX"
+#endif // _WIN64 ]
+
+// 7.8.2 Functions for greatest-width integer types
+
+// 7.8.2.1 The imaxabs function
+#define imaxabs _abs64
+
+// 7.8.2.2 The imaxdiv function
+
+// This is modified version of div() function from Microsoft's div.c found
+// in %MSVC.NET%\crt\src\div.c
+#ifdef STATIC_IMAXDIV // [
+static
+#else // STATIC_IMAXDIV ][
+_inline
+#endif // STATIC_IMAXDIV ]
+imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
+{
+ imaxdiv_t result;
+
+ result.quot = numer / denom;
+ result.rem = numer % denom;
+
+ if (numer < 0 && result.rem > 0) {
+ // did division wrong; must fix up
+ ++result.quot;
+ result.rem -= denom;
+ }
+
+ return result;
+}
+
+// 7.8.2.3 The strtoimax and strtoumax functions
+#define strtoimax _strtoi64
+#define strtoumax _strtoui64
+
+// 7.8.2.4 The wcstoimax and wcstoumax functions
+#define wcstoimax _wcstoi64
+#define wcstoumax _wcstoui64
+
+
+#endif // _MSC_INTTYPES_H_ ]
diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdbool.h b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdbool.h
new file mode 100644
index 000000000..ca4581d37
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdbool.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2005, 2006 Apple Computer, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef STDBOOL_WIN32_H
+#define STDBOOL_WIN32_H
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef __cplusplus
+
+typedef unsigned char bool;
+
+#define true 1
+#define false 0
+
+#ifndef CASSERT
+#define CASSERT(exp, name) typedef int dummy##name [(exp) ? 1 : -1];
+#endif
+
+CASSERT(sizeof(bool) == 1, bool_is_one_byte)
+CASSERT(true, true_is_true)
+CASSERT(!false, false_is_false)
+
+#endif
+
+#endif
diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdint.h b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdint.h
new file mode 100644
index 000000000..108bc8982
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdint.h
@@ -0,0 +1,251 @@
+// ISO C9x compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
+// Copyright (c) 2006-2008 Alexander Chemeris
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. The name of the author may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include
+
+// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
+// compiling for ARM we should wrap include with 'extern "C++" {}'
+// or compiler give many errors like this:
+// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#ifdef __cplusplus
+extern "C" {
+#endif
+# include
+#ifdef __cplusplus
+}
+#endif
+
+// Define _W64 macros to mark types changing their size, like intptr_t.
+#ifndef _W64
+# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
+# define _W64 __w64
+# else
+# define _W64
+# endif
+#endif
+
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+
+// Visual Studio 6 and Embedded Visual C++ 4 doesn't
+// realize that, e.g. char has the same size as __int8
+// so we give up on __intX for them.
+#if (_MSC_VER < 1300)
+ typedef signed char int8_t;
+ typedef signed short int16_t;
+ typedef signed int int32_t;
+ typedef unsigned char uint8_t;
+ typedef unsigned short uint16_t;
+ typedef unsigned int uint32_t;
+#else
+ typedef signed __int8 int8_t;
+ typedef signed __int16 int16_t;
+ typedef signed __int32 int32_t;
+ typedef unsigned __int8 uint8_t;
+ typedef unsigned __int16 uint16_t;
+ typedef unsigned __int32 uint32_t;
+#endif
+typedef signed __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t int_least8_t;
+typedef int16_t int_least16_t;
+typedef int32_t int_least32_t;
+typedef int64_t int_least64_t;
+typedef uint8_t uint_least8_t;
+typedef uint16_t uint_least16_t;
+typedef uint32_t uint_least32_t;
+typedef uint64_t uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t int_fast8_t;
+typedef int16_t int_fast16_t;
+typedef int32_t int_fast32_t;
+typedef int64_t int_fast64_t;
+typedef uint8_t uint_fast8_t;
+typedef uint16_t uint_fast16_t;
+typedef uint32_t uint_fast32_t;
+typedef uint64_t uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+ typedef signed __int64 intptr_t;
+ typedef unsigned __int64 uintptr_t;
+#else // _WIN64 ][
+ typedef _W64 signed int intptr_t;
+ typedef _W64 unsigned int uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t intmax_t;
+typedef uint64_t uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN ((int8_t)_I8_MIN)
+#define INT8_MAX _I8_MAX
+#define INT16_MIN ((int16_t)_I16_MIN)
+#define INT16_MAX _I16_MAX
+#define INT32_MIN ((int32_t)_I32_MIN)
+#define INT32_MAX _I32_MAX
+#define INT64_MIN ((int64_t)_I64_MIN)
+#define INT64_MAX _I64_MAX
+#define UINT8_MAX _UI8_MAX
+#define UINT16_MAX _UI16_MAX
+#define UINT32_MAX _UI32_MAX
+#define UINT64_MAX _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN INT8_MIN
+#define INT_LEAST8_MAX INT8_MAX
+#define INT_LEAST16_MIN INT16_MIN
+#define INT_LEAST16_MAX INT16_MAX
+#define INT_LEAST32_MIN INT32_MIN
+#define INT_LEAST32_MAX INT32_MAX
+#define INT_LEAST64_MIN INT64_MIN
+#define INT_LEAST64_MAX INT64_MAX
+#define UINT_LEAST8_MAX UINT8_MAX
+#define UINT_LEAST16_MAX UINT16_MAX
+#define UINT_LEAST32_MAX UINT32_MAX
+#define UINT_LEAST64_MAX UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN INT8_MIN
+#define INT_FAST8_MAX INT8_MAX
+#define INT_FAST16_MIN INT16_MIN
+#define INT_FAST16_MAX INT16_MAX
+#define INT_FAST32_MIN INT32_MIN
+#define INT_FAST32_MAX INT32_MAX
+#define INT_FAST64_MIN INT64_MIN
+#define INT_FAST64_MAX INT64_MAX
+#define UINT_FAST8_MAX UINT8_MAX
+#define UINT_FAST16_MAX UINT16_MAX
+#define UINT_FAST32_MAX UINT32_MAX
+#define UINT_FAST64_MAX UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+# define INTPTR_MIN INT64_MIN
+# define INTPTR_MAX INT64_MAX
+# define UINTPTR_MAX UINT64_MAX
+#else // _WIN64 ][
+# define INTPTR_MIN INT32_MIN
+# define INTPTR_MAX INT32_MAX
+# define UINTPTR_MAX UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN INT64_MIN
+#define INTMAX_MAX INT64_MAX
+#define UINTMAX_MAX UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+# define PTRDIFF_MIN _I64_MIN
+# define PTRDIFF_MAX _I64_MAX
+#else // _WIN64 ][
+# define PTRDIFF_MIN _I32_MIN
+# define PTRDIFF_MAX _I32_MAX
+#endif // _WIN64 ]
+
+#define SIG_ATOMIC_MIN INT_MIN
+#define SIG_ATOMIC_MAX INT_MAX
+
+#ifndef SIZE_MAX // [
+# ifdef _WIN64 // [
+# define SIZE_MAX _UI64_MAX
+# else // _WIN64 ][
+# define SIZE_MAX _UI32_MAX
+# endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in
+#ifndef WCHAR_MIN // [
+# define WCHAR_MIN 0
+#endif // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+# define WCHAR_MAX _UI16_MAX
+#endif // WCHAR_MAX ]
+
+#define WINT_MIN 0
+#define WINT_MAX _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val) val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val) val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#ifndef INTMAX_C
+#define INTMAX_C INT64_C
+#endif
+#ifndef UINTMAX_C
+#define UINTMAX_C UINT64_C
+#endif
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
diff --git a/src/algorithms/libs/volk_gnsssdr/gen/archs.xml b/src/algorithms/libs/volk_gnsssdr/gen/archs.xml
new file mode 100644
index 000000000..e570fe5d2
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/gen/archs.xml
@@ -0,0 +1,204 @@
+
+
+
+
+
+
+
+ -maltivec
+ 16
+
+
+
+
+ -mfloat-abi=softfp
+
+
+
+ -mfloat-abi=hard
+
+
+
+ -mfpu=neon
+ -funsafe-math-optimizations
+ 16
+
+
+
+
+ -m32
+
+
+
+
+ 0x80000001
+
+
+ 3
+ 0x80000001
+ 29
+
+ -m64
+ -m64
+
+
+
+
+ 3
+ 0x80000001
+ 31
+
+ -m3dnow
+ -m3dnow
+ 8
+
+
+
+
+ 3
+ 0x80000001
+ 5
+
+ -msse4.2
+ -msse4.2
+ 16
+
+
+
+
+ 2
+ 0x00000001
+ 23
+
+ -mpopcnt
+ -mpopcnt
+ /arch:AVX
+
+
+
+
+ 3
+ 0x00000001
+ 23
+
+ -mmmx
+ -mmmx
+ /arch:SSE
+ 8
+
+
+
+
+ 3
+ 0x00000001
+ 25
+
+ -msse
+ -msse
+ /arch:SSE
+ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+ xmmintrin.h
+ 16
+
+
+
+
+ 3
+ 0x00000001
+ 26
+
+ -msse2
+ -msse2
+ /arch:SSE2
+ 16
+
+
+
+
+
+
+
+
+
+
+
+ 2
+ 0x00000001
+ 0
+
+ -msse3
+ -msse3
+ /arch:AVX
+ _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+ pmmintrin.h
+ 16
+
+
+
+
+ 2
+ 0x00000001
+ 9
+
+ -mssse3
+ -mssse3
+ /arch:AVX
+ 16
+
+
+
+
+ 2
+ 0x80000001
+ 6
+
+ -msse4a
+ -msse4a
+ 16
+
+
+
+
+ 2
+ 0x00000001
+ 19
+
+ -msse4.1
+ -msse4.1
+ /arch:AVX
+ 16
+
+
+
+
+ 2
+ 0x00000001
+ 20
+
+ -msse4.2
+ -msse4.2
+ /arch:AVX
+ 16
+
+
+
+
+ 2
+ 0x00000001
+ 28
+
+
+
+ 2
+ 0x00000001
+ 27
+
+
+
+ -mavx
+ -mavx
+ /arch:AVX
+ 32
+
+
+
diff --git a/src/algorithms/libs/volk_gnsssdr/gen/machines.xml b/src/algorithms/libs/volk_gnsssdr/gen/machines.xml
new file mode 100644
index 000000000..357bf7519
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/gen/machines.xml
@@ -0,0 +1,55 @@
+
+
+
+generic orc|
+
+
+
+
+
+generic neon softfp|hardfp orc|
+
+
+
+
+generic 32|64| mmx| sse sse2 orc|
+
+
+
+generic 32|64 mmx sse sse2 sse3 orc|
+
+
+
+generic 32|64 mmx sse sse2 sse3 ssse3 orc|
+
+
+
+generic 32|64 mmx sse sse2 sse3 sse4_a popcount orc|
+
+
+
+generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 orc|
+
+
+
+generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount orc|
+
+
+
+
+generic 32|64| mmx| sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount avx orc|
+
+
+
+generic altivec
+
+
+
diff --git a/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_arch_defs.py b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_arch_defs.py
new file mode 100644
index 000000000..3c75e1374
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_arch_defs.py
@@ -0,0 +1,85 @@
+#
+# Copyright 2012 Free Software Foundation, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+
+archs = list()
+arch_dict = dict()
+
+class arch_class:
+ def __init__(self, flags, checks, **kwargs):
+ for key, cast, failval in (
+ ('name', str, None),
+ ('environment', str, None),
+ ('include', str, None),
+ ('alignment', int, 1)
+ ):
+ try: setattr(self, key, cast(kwargs[key]))
+ except: setattr(self, key, failval)
+ self.checks = checks
+ assert(self.name)
+ self._flags = flags
+
+ def is_supported(self, compiler):
+ if not self._flags.keys(): return True
+ return compiler in self._flags.keys()
+
+ def get_flags(self, compiler):
+ try: return self._flags[compiler]
+ except KeyError: return list()
+
+ def __repr__(self): return self.name
+
+def register_arch(**kwargs):
+ arch = arch_class(**kwargs)
+ archs.append(arch)
+ arch_dict[arch.name] = arch
+
+########################################################################
+# register the arches
+########################################################################
+#TODO skip the XML and put it here
+from xml.dom import minidom
+import os
+gendir = os.path.dirname(__file__)
+archs_xml = minidom.parse(os.path.join(gendir, 'archs.xml')).getElementsByTagName('arch')
+for arch_xml in archs_xml:
+ kwargs = dict()
+ for attr in arch_xml.attributes.keys():
+ kwargs[attr] = arch_xml.attributes[attr].value
+ for node in arch_xml.childNodes:
+ try:
+ name = node.tagName
+ val = arch_xml.getElementsByTagName(name)[0].firstChild.data
+ kwargs[name] = val
+ except: pass
+ checks = list()
+ for check_xml in arch_xml.getElementsByTagName("check"):
+ name = check_xml.attributes["name"].value
+ params = list()
+ for param_xml in check_xml.getElementsByTagName("param"):
+ params.append(param_xml.firstChild.data)
+ checks.append([name, params])
+ flags = dict()
+ for flag_xml in arch_xml.getElementsByTagName("flag"):
+ name = flag_xml.attributes["compiler"].value
+ if not flags.has_key(name): flags[name] = list()
+ flags[name].append(flag_xml.firstChild.data)
+ #force kwargs keys to be of type str, not unicode for py25
+ kwargs = dict((str(k), v) for k, v in kwargs.iteritems())
+ register_arch(flags=flags, checks=checks, **kwargs)
+
+if __name__ == '__main__':
+ print archs
diff --git a/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_compile_utils.py b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_compile_utils.py
new file mode 100644
index 000000000..05de9a546
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_compile_utils.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+#
+# Copyright 2012 Free Software Foundation, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+
+import optparse
+import volk_gnsssdr_arch_defs
+import volk_gnsssdr_machine_defs
+
+def do_arch_flags_list(compiler):
+ output = list()
+ for arch in volk_gnsssdr_arch_defs.archs:
+ if not arch.is_supported(compiler): continue
+ fields = [arch.name] + arch.get_flags(compiler)
+ output.append(','.join(fields))
+ print ';'.join(output)
+
+def do_machines_list(arch_names):
+ output = list()
+ for machine in volk_gnsssdr_machine_defs.machines:
+ machine_arch_set = set(machine.arch_names)
+ if set(arch_names).intersection(machine_arch_set) == machine_arch_set:
+ output.append(machine.name)
+ print ';'.join(output)
+
+def do_machine_flags_list(compiler, machine_name):
+ output = list()
+ machine = volk_gnsssdr_machine_defs.machine_dict[machine_name]
+ for arch in machine.archs:
+ output.extend(arch.get_flags(compiler))
+ print ' '.join(output)
+
+def main():
+ parser = optparse.OptionParser()
+ parser.add_option('--mode', type='string')
+ parser.add_option('--compiler', type='string')
+ parser.add_option('--archs', type='string')
+ parser.add_option('--machine', type='string')
+ (opts, args) = parser.parse_args()
+
+ if opts.mode == 'arch_flags': return do_arch_flags_list(opts.compiler.lower())
+ if opts.mode == 'machines': return do_machines_list(opts.archs.split(';'))
+ if opts.mode == 'machine_flags': return do_machine_flags_list(opts.compiler.lower(), opts.machine)
+
+if __name__ == '__main__': main()
diff --git a/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py
new file mode 100644
index 000000000..b3f03f627
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py
@@ -0,0 +1,209 @@
+#
+# Copyright 2011-2012 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+import os
+import re
+import sys
+import glob
+
+########################################################################
+# Strip comments from a c/cpp file.
+# Input is code string, output is code string without comments.
+# http://stackoverflow.com/questions/241327/python-snippet-to-remove-c-and-c-comments
+########################################################################
+def comment_remover(text):
+ def replacer(match):
+ s = match.group(0)
+ if s.startswith('/'):
+ return ""
+ else:
+ return s
+ pattern = re.compile(
+ r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
+ re.DOTALL | re.MULTILINE
+ )
+ return re.sub(pattern, replacer, text)
+
+########################################################################
+# Split code into nested sections according to ifdef preprocessor macros
+########################################################################
+def split_into_nested_ifdef_sections(code):
+ sections = list()
+ section = ''
+ header = 'text'
+ in_section_depth = 0
+ for i, line in enumerate(code.splitlines()):
+ m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line)
+ line_is = 'normal'
+ if m:
+ p0, p1, fcn, stuff = m.groups()
+ if fcn in ('if', 'ifndef', 'ifdef'): line_is = 'if'
+ if fcn in ('else', 'elif'): line_is = 'else'
+ if fcn in ('endif',): line_is = 'end'
+
+ if line_is == 'if': in_section_depth += 1
+ if line_is == 'end': in_section_depth -= 1
+
+ if in_section_depth == 1 and line_is == 'if':
+ sections.append((header, section))
+ section = ''
+ header = line
+ continue
+
+ if in_section_depth == 1 and line_is == 'else':
+ sections.append((header, section))
+ section = ''
+ header = line
+ continue
+
+ if in_section_depth == 0 and line_is == 'end':
+ sections.append((header, section))
+ section = ''
+ header = 'text'
+ continue
+
+ section += line + '\n'
+
+ sections.append((header, section)) #and pack remainder into sections
+ sections = [sec for sec in sections if sec[1].strip()] #filter empty sections
+
+ #recurse into non-text sections to fill subsections
+ for i, (header, section) in enumerate(sections):
+ if header == 'text': continue
+ sections[i] = (header, split_into_nested_ifdef_sections(section))
+
+ return sections
+
+########################################################################
+# Recursive print of sections to test code above
+########################################################################
+def print_sections(sections, indent = ' '):
+ for header, body in sections:
+ if header == 'text':
+ print indent, ('\n'+indent).join(body.splitlines())
+ continue
+ print indent.replace(' ', '-') + '>', header
+ print_sections(body, indent + ' ')
+
+########################################################################
+# Flatten a section to just body text
+########################################################################
+def flatten_section_text(sections):
+ output = ''
+ for hdr, bdy in sections:
+ if hdr != 'text': output += flatten_section_text(bdy)
+ else: output += bdy
+ return output
+
+########################################################################
+# Extract kernel info from section, represent as an implementation
+########################################################################
+class impl_class:
+ def __init__(self, kern_name, header, body):
+ #extract LV_HAVE_*
+ self.deps = set(map(str.lower, re.findall('LV_HAVE_(\w+)', header)))
+ #extract function suffix and args
+ body = flatten_section_text(body)
+ try:
+ fcn_matcher = re.compile('^.*(%s\\w*)\\s*\\((.*)$'%kern_name, re.DOTALL | re.MULTILINE)
+ body = body.split('{')[0].rsplit(')', 1)[0] #get the part before the open ){ bracket
+ m = fcn_matcher.match(body)
+ impl_name, the_rest = m.groups()
+ self.name = impl_name.replace(kern_name+'_', '')
+ self.args = list()
+ fcn_args = the_rest.split(',')
+ for fcn_arg in fcn_args:
+ arg_matcher = re.compile('^\s*(.*\\W)\s*(\w+)\s*$', re.DOTALL | re.MULTILINE)
+ m = arg_matcher.match(fcn_arg)
+ arg_type, arg_name = m.groups()
+ self.args.append((arg_type, arg_name))
+ except Exception as ex:
+ raise Exception, 'I cant parse the function prototype from: %s in %s\n%s'%(kern_name, body, ex)
+
+ assert self.name
+ self.is_aligned = self.name.startswith('a_')
+
+ def __repr__(self):
+ return self.name
+
+########################################################################
+# Get sets of LV_HAVE_* from the code
+########################################################################
+def extract_lv_haves(code):
+ haves = list()
+ for line in code.splitlines():
+ if not line.strip().startswith('#'): continue
+ have_set = set(map(str.lower, re.findall('LV_HAVE_(\w+)', line)))
+ if have_set: haves.append(have_set)
+ return haves
+
+########################################################################
+# Represent a processing kernel, parse from file
+########################################################################
+class kernel_class:
+ def __init__(self, kernel_file):
+ self.name = os.path.splitext(os.path.basename(kernel_file))[0]
+ self.pname = self.name.replace('volk_gnsssdr_', 'p_')
+ code = open(kernel_file, 'r').read()
+ code = comment_remover(code)
+ sections = split_into_nested_ifdef_sections(code)
+ self._impls = list()
+ for header, section in sections:
+ if 'ifndef' not in header.lower(): continue
+ for sub_hdr, body in section:
+ if 'if' not in sub_hdr.lower(): continue
+ if 'LV_HAVE_' not in sub_hdr: continue
+ self._impls.append(impl_class(
+ kern_name=self.name, header=sub_hdr, body=body,
+ ))
+ assert(self._impls)
+ self.has_dispatcher = False
+ for impl in self._impls:
+ if impl.name == 'dispatcher':
+ self._impls.remove(impl)
+ self.has_dispatcher = True
+ break
+ self.args = self._impls[0].args
+ self.arglist_types = ', '.join([a[0] for a in self.args])
+ self.arglist_full = ', '.join(['%s %s'%a for a in self.args])
+ self.arglist_names = ', '.join([a[1] for a in self.args])
+
+ def get_impls(self, archs):
+ archs = set(archs)
+ impls = list()
+ for impl in self._impls:
+ if impl.deps.intersection(archs) == impl.deps:
+ impls.append(impl)
+ return impls
+
+ def __repr__(self):
+ return self.name
+
+########################################################################
+# Extract information from the VOLK kernels
+########################################################################
+__file__ = os.path.abspath(__file__)
+srcdir = os.path.dirname(os.path.dirname(__file__))
+kernel_files = glob.glob(os.path.join(srcdir, "kernels", "volk_gnsssdr", "*.h"))
+kernels = map(kernel_class, kernel_files)
+
+if __name__ == '__main__':
+ print kernels
diff --git a/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_machine_defs.py b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_machine_defs.py
new file mode 100644
index 000000000..174106634
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_machine_defs.py
@@ -0,0 +1,74 @@
+#
+# Copyright 2012 Free Software Foundation, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+
+from volk_gnsssdr_arch_defs import arch_dict
+
+machines = list()
+machine_dict = dict()
+
+class machine_class:
+ def __init__(self, name, archs):
+ self.name = name
+ self.archs = list()
+ self.arch_names = list()
+ for arch_name in archs:
+ if not arch_name: continue
+ arch = arch_dict[arch_name]
+ self.archs.append(arch)
+ self.arch_names.append(arch_name)
+ self.alignment = max(map(lambda a: a.alignment, self.archs))
+
+ def __repr__(self): return self.name
+
+def register_machine(name, archs):
+ for i, arch_name in enumerate(archs):
+ if '|' in arch_name: #handle special arch names with the '|'
+ for arch_sub in arch_name.split('|'):
+ if arch_sub:
+ register_machine(name+'_'+arch_sub, archs[:i] + [arch_sub] + archs[i+1:])
+ else:
+ register_machine(name, archs[:i] + archs[i+1:])
+ return
+ machine = machine_class(name=name, archs=archs)
+ machines.append(machine)
+ machine_dict[machine.name] = machine
+
+########################################################################
+# register the machines
+########################################################################
+#TODO skip the XML and put it here
+from xml.dom import minidom
+import os
+gendir = os.path.dirname(__file__)
+machines_xml = minidom.parse(os.path.join(gendir, 'machines.xml')).getElementsByTagName('machine')
+for machine_xml in machines_xml:
+ kwargs = dict()
+ for attr in machine_xml.attributes.keys():
+ kwargs[attr] = machine_xml.attributes[attr].value
+ for node in machine_xml.childNodes:
+ try:
+ name = node.tagName
+ val = machine_xml.getElementsByTagName(name)[0].firstChild.data
+ kwargs[name] = val
+ except: pass
+ kwargs['archs'] = kwargs['archs'].split()
+ #force kwargs keys to be of type str, not unicode for py25
+ kwargs = dict((str(k), v) for k, v in kwargs.iteritems())
+ register_machine(**kwargs)
+
+if __name__ == '__main__':
+ print machines
diff --git a/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_tmpl_utils.py b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_tmpl_utils.py
new file mode 100644
index 000000000..c4577af62
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_tmpl_utils.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+#
+# Copyright 2012 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+import os
+import re
+import sys
+import optparse
+import volk_gnsssdr_arch_defs
+import volk_gnsssdr_machine_defs
+import volk_gnsssdr_kernel_defs
+from Cheetah import Template
+
+def __escape_pre_processor(code):
+ out = list()
+ for line in code.splitlines():
+ m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line)
+ if m:
+ p0, p1, fcn, stuff = m.groups()
+ conly = fcn in ('include', 'define', 'ifdef', 'ifndef', 'endif', 'elif', 'pragma')
+ both = fcn in ('if', 'else')
+ istmpl = '$' in stuff
+ if 'defined' in stuff: istmpl = False
+ if conly or (both and not istmpl):
+ line = '%s\\#%s%s%s'%(p0, p1, fcn, stuff)
+ out.append(line)
+ return '\n'.join(out)
+
+def __parse_tmpl(_tmpl, **kwargs):
+ defs = {
+ 'archs': volk_gnsssdr_arch_defs.archs,
+ 'arch_dict': volk_gnsssdr_arch_defs.arch_dict,
+ 'machines': volk_gnsssdr_machine_defs.machines,
+ 'machine_dict': volk_gnsssdr_machine_defs.machine_dict,
+ 'kernels': volk_gnsssdr_kernel_defs.kernels,
+ }
+ defs.update(kwargs)
+ _tmpl = __escape_pre_processor(_tmpl)
+ _tmpl = """
+
+/* this file was generated by volk_gnsssdr template utils, do not edit! */
+
+""" + _tmpl
+ return str(Template.Template(_tmpl, defs))
+
+def main():
+ parser = optparse.OptionParser()
+ parser.add_option('--input', type='string')
+ parser.add_option('--output', type='string')
+ (opts, args) = parser.parse_args()
+
+ output = __parse_tmpl(open(opts.input).read(), args=args)
+ if opts.output: open(opts.output, 'w').write(output)
+ else: print output
+
+if __name__ == '__main__': main()
diff --git a/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/constants.h b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/constants.h
new file mode 100644
index 000000000..f08960557
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/constants.h
@@ -0,0 +1,39 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2006,2009,2013 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_VOLK_CONSTANTS_H
+#define INCLUDED_VOLK_CONSTANTS_H
+
+#include
+
+__VOLK_DECL_BEGIN
+
+VOLK_API char* volk_gnsssdr_prefix();
+VOLK_API char* volk_gnsssdr_build_date();
+VOLK_API char* volk_gnsssdr_version();
+VOLK_API char* volk_gnsssdr_c_compiler();
+VOLK_API char* volk_gnsssdr_compiler_flags();
+VOLK_API char* volk_gnsssdr_available_machines();
+
+__VOLK_DECL_END
+
+#endif /* INCLUDED_VOLK_CONSTANTS_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_common.h b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_common.h
new file mode 100644
index 000000000..c48057cd9
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_common.h
@@ -0,0 +1,96 @@
+#ifndef INCLUDED_LIBVOLK_COMMON_H
+#define INCLUDED_LIBVOLK_COMMON_H
+
+////////////////////////////////////////////////////////////////////////
+// Cross-platform attribute macros
+////////////////////////////////////////////////////////////////////////
+#if defined __GNUC__
+# define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
+# define __VOLK_ATTR_UNUSED __attribute__((unused))
+# define __VOLK_ATTR_INLINE __attribute__((always_inline))
+# define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
+# if __GNUC__ >= 4
+# define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
+# define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
+# else
+# define __VOLK_ATTR_EXPORT
+# define __VOLK_ATTR_IMPORT
+# endif
+#elif _MSC_VER
+# define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
+# define __VOLK_ATTR_UNUSED
+# define __VOLK_ATTR_INLINE __forceinline
+# define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
+# define __VOLK_ATTR_EXPORT __declspec(dllexport)
+# define __VOLK_ATTR_IMPORT __declspec(dllimport)
+#else
+# define __VOLK_ATTR_ALIGNED(x)
+# define __VOLK_ATTR_UNUSED
+# define __VOLK_ATTR_INLINE
+# define __VOLK_ATTR_DEPRECATED
+# define __VOLK_ATTR_EXPORT
+# define __VOLK_ATTR_IMPORT
+#endif
+
+////////////////////////////////////////////////////////////////////////
+// Ignore annoying warnings in MSVC
+////////////////////////////////////////////////////////////////////////
+#if defined(_MSC_VER)
+# pragma warning(disable: 4244) //'conversion' conversion from 'type1' to 'type2', possible loss of data
+# pragma warning(disable: 4305) //'identifier' : truncation from 'type1' to 'type2'
+#endif
+
+////////////////////////////////////////////////////////////////////////
+// C-linkage declaration macros
+// FIXME: due to the usage of complex.h, require gcc for c-linkage
+////////////////////////////////////////////////////////////////////////
+#if defined(__cplusplus) && (__GNUC__)
+# define __VOLK_DECL_BEGIN extern "C" {
+# define __VOLK_DECL_END }
+#else
+# define __VOLK_DECL_BEGIN
+# define __VOLK_DECL_END
+#endif
+
+////////////////////////////////////////////////////////////////////////
+// Define VOLK_API for library symbols
+// http://gcc.gnu.org/wiki/Visibility
+////////////////////////////////////////////////////////////////////////
+#ifdef volk_gnsssdr_EXPORTS
+# define VOLK_API __VOLK_ATTR_EXPORT
+#else
+# define VOLK_API __VOLK_ATTR_IMPORT
+#endif
+
+////////////////////////////////////////////////////////////////////////
+// The bit128 union used by some
+////////////////////////////////////////////////////////////////////////
+#include
+
+#ifdef LV_HAVE_SSE
+#include
+#endif
+
+#ifdef LV_HAVE_SSE2
+#include
+#endif
+
+union bit128{
+ uint16_t i16[8];
+ uint32_t i[4];
+ float f[4];
+ double d[2];
+
+ #ifdef LV_HAVE_SSE
+ __m128 float_vec;
+ #endif
+
+ #ifdef LV_HAVE_SSE2
+ __m128i int_vec;
+ __m128d double_vec;
+ #endif
+};
+
+#define bit128_p(x) ((union bit128 *)(x))
+
+#endif /*INCLUDED_LIBVOLK_COMMON_H*/
diff --git a/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_complex.h b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_complex.h
new file mode 100644
index 000000000..5bd925044
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_complex.h
@@ -0,0 +1,86 @@
+#ifndef INCLUDE_VOLK_COMPLEX_H
+#define INCLUDE_VOLK_COMPLEX_H
+
+/*!
+ * \brief Provide typedefs and operators for all complex types in C and C++.
+ *
+ * The typedefs encompass all signed integer and floating point types.
+ * Each operator function is intended to work across all data types.
+ * Under C++, these operators are defined as inline templates.
+ * Under C, these operators are defined as preprocessor macros.
+ * The use of macros makes the operators agnostic to the type.
+ *
+ * The following operator functions are defined:
+ * - lv_cmake - make a complex type from components
+ * - lv_creal - get the real part of the complex number
+ * - lv_cimag - get the imaginary part of the complex number
+ * - lv_conj - take the conjugate of the complex number
+ */
+
+#ifdef __cplusplus
+
+#include
+#include
+
+typedef std::complex lv_8sc_t;
+typedef std::complex lv_16sc_t;
+typedef std::complex lv_32sc_t;
+typedef std::complex lv_64sc_t;
+typedef std::complex lv_32fc_t;
+typedef std::complex lv_64fc_t;
+
+template inline std::complex lv_cmake(const T &r, const T &i){
+ return std::complex(r, i);
+}
+
+template inline typename T::value_type lv_creal(const T &x){
+ return x.real();
+}
+
+template inline typename T::value_type lv_cimag(const T &x){
+ return x.imag();
+}
+
+template inline T lv_conj(const T &x){
+ return std::conj(x);
+}
+
+#else /* __cplusplus */
+
+#include
+
+typedef char complex lv_8sc_t;
+typedef short complex lv_16sc_t;
+typedef long complex lv_32sc_t;
+typedef long long complex lv_64sc_t;
+typedef float complex lv_32fc_t;
+typedef double complex lv_64fc_t;
+
+#define lv_cmake(r, i) ((r) + _Complex_I*(i))
+
+// When GNUC is available, use the complex extensions.
+// The extensions always return the correct value type.
+// http://gcc.gnu.org/onlinedocs/gcc/Complex.html
+#ifdef __GNUC__
+
+#define lv_creal(x) (__real__(x))
+
+#define lv_cimag(x) (__imag__(x))
+
+#define lv_conj(x) (~(x))
+
+// When not available, use the c99 complex function family,
+// which always returns double regardless of the input type.
+#else /* __GNUC__ */
+
+#define lv_creal(x) (creal(x))
+
+#define lv_cimag(x) (cimag(x))
+
+#define lv_conj(x) (conj(x))
+
+#endif /* __GNUC__ */
+
+#endif /* __cplusplus */
+
+#endif /* INCLUDE_VOLK_COMPLEX_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_malloc.h b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_malloc.h
new file mode 100644
index 000000000..7136bc135
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_malloc.h
@@ -0,0 +1,66 @@
+/* -*- c -*- */
+/*
+ * Copyright 2014 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef INCLUDED_VOLK_MALLOC_H
+#define INCLUDED_VOLK_MALLOC_H
+
+#include
+#include
+
+__VOLK_DECL_BEGIN
+
+/*!
+ * \brief Allocate \p size bytes of data aligned to \p alignment.
+ *
+ * \details
+ * Because we don't have a standard method to allocate buffers in
+ * memory that are guaranteed to be on an alignment, VOLK handles this
+ * itself. The volk_gnsssdr_malloc function behaves like malloc in that it
+ * returns a pointer to the allocated memory. However, it also takes
+ * in an alignment specfication, which is usually something like 16 or
+ * 32 to ensure that the aligned memory is located on a particular
+ * byte boundary for use with SIMD.
+ *
+ * Internally, the volk_gnsssdr_malloc first checks if the compiler is C11
+ * compliant and uses the new aligned_alloc method. If not, it checks
+ * if the system is POSIX compliant and uses posix_memalign. If that
+ * fails, volk_gnsssdr_malloc handles the memory allocation and alignment
+ * internally.
+ *
+ * Because of the ways in which volk_gnsssdr_malloc may allocate memory, it is
+ * important to always free volk_gnsssdr_malloc pointers using volk_gnsssdr_free.
+ *
+ * \param size The number of bytes to allocate.
+ * \param alignment The byte alignment of the allocated memory.
+ * \return pointer to aligned memory.
+ */
+VOLK_API void *volk_gnsssdr_malloc(size_t size, size_t alignment);
+
+/*!
+ * \brief Free's memory allocated by volk_gnsssdr_malloc.
+ * \param aptr The aligned pointer allocaed by volk_gnsssdr_malloc.
+ */
+VOLK_API void volk_gnsssdr_free(void *aptr);
+
+__VOLK_DECL_END
+
+#endif /* INCLUDED_VOLK_MALLOC_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_prefs.h b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_prefs.h
new file mode 100644
index 000000000..6e13fc07a
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_prefs.h
@@ -0,0 +1,28 @@
+#ifndef INCLUDED_VOLK_PREFS_H
+#define INCLUDED_VOLK_PREFS_H
+
+#include
+#include
+
+__VOLK_DECL_BEGIN
+
+typedef struct volk_gnsssdr_arch_pref
+{
+ char name[128]; //name of the kernel
+ char impl_a[128]; //best aligned impl
+ char impl_u[128]; //best unaligned impl
+} volk_gnsssdr_arch_pref_t;
+
+////////////////////////////////////////////////////////////////////////
+// get path to volk_gnsssdr_config profiling info
+////////////////////////////////////////////////////////////////////////
+VOLK_API void volk_gnsssdr_get_config_path(char *);
+
+////////////////////////////////////////////////////////////////////////
+// load prefs into global prefs struct
+////////////////////////////////////////////////////////////////////////
+VOLK_API size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **);
+
+__VOLK_DECL_END
+
+#endif //INCLUDED_VOLK_PREFS_H
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros.h b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros.h
new file mode 100644
index 000000000..ec9937ef5
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros.h
@@ -0,0 +1,174 @@
+/*!
+ * \file CommonMacros.h
+ * \brief Common macros used inside the volk protokernels.
+ * \authors
+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+#ifndef INCLUDED_gnsssdr_CommonMacros_u_H
+#define INCLUDED_gnsssdr_CommonMacros_u_H
+
+ #ifdef LV_HAVE_SSE4_1
+ /*!
+ \brief Macros for U_SSE4_1
+ */
+
+ #ifndef CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1
+ #define CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(input1, input2, real, imag)\
+ imag = _mm_srli_si128 (input1, 2);\
+ imag = _mm_blend_epi16 (input2, imag, 85);\
+ real = _mm_slli_si128 (input2, 2);\
+ real = _mm_blend_epi16 (real, input1, 85);
+ #endif /* CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1 */
+
+ #ifndef CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1
+ #define CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_ps)\
+ input_i_1 = _mm_cvtepi16_epi32(input);\
+ input = _mm_srli_si128 (input, 8);\
+ input_i_2 = _mm_cvtepi16_epi32(input);\
+ output_i32 = _mm_add_epi32 (input_i_1, input_i_2);\
+ output_ps = _mm_cvtepi32_ps(output_i32);
+ #endif /* CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */
+
+ #ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1
+ #define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)\
+ input_i_1 = _mm_cvtepi8_epi32(input);\
+ input = _mm_srli_si128 (input, 4);\
+ input_i_2 = _mm_cvtepi8_epi32(input);\
+ input = _mm_srli_si128 (input, 4);\
+ output_i32_1 = _mm_add_epi32 (input_i_1, input_i_2);\
+ input_i_1 = _mm_cvtepi8_epi32(input);\
+ input = _mm_srli_si128 (input, 4);\
+ input_i_2 = _mm_cvtepi8_epi32(input);\
+ input = _mm_srli_si128 (input, 4);\
+ output_i32_2 = _mm_add_epi32 (input_i_1, input_i_2);\
+ output_i32 = _mm_add_epi32 (output_i32_1, output_i32_2);\
+ output_ps = _mm_cvtepi32_ps(output_i32);
+ #endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */
+
+ #endif /* LV_HAVE_SSE4_1 */
+
+ #ifdef LV_HAVE_SSE2
+ /*!
+ \brief Macros for U_SSE2
+ */
+
+ #ifdef LV_HAVE_SSSE3
+ /*!
+ \brief Macros for U_SSSE3
+ */
+
+ #ifndef CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3
+ #define CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, x, check_sign_sequence, rearrange_sequence, y_aux, x_abs, real_output, imag_output)\
+ y_aux = _mm_sign_epi8 (y, x);\
+ y_aux = _mm_sign_epi8 (y_aux, check_sign_sequence);\
+ real_output = _mm_maddubs_epi16 (x_abs, y_aux);\
+ \
+ y_aux = _mm_shuffle_epi8 (y, rearrange_sequence);\
+ y_aux = _mm_sign_epi8 (y_aux, x);\
+ imag_output = _mm_maddubs_epi16 (x_abs, y_aux);
+ #endif /* CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3 */
+
+ #endif /* LV_HAVE_SSSE3 */
+
+ #ifndef CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2
+ #define CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
+ realx_mult_realy = _mm_mullo_epi16 (realx, realy);\
+ imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);\
+ realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);\
+ imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);\
+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);\
+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+ #endif /* CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2 */
+
+ #ifndef CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2
+ #define CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(input, mult1, real, imag)\
+ imag = _mm_srli_si128 (input, 1);\
+ imag = _mm_and_si128 (imag, mult1);\
+ real = _mm_and_si128 (input, mult1);
+ #endif /* CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2 */
+
+ #ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2
+ #define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2(input, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\
+ input_i_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), input);\
+ input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\
+ input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\
+ input_i_1 = _mm_srai_epi32(input_i_1, 24);\
+ input_i_2 = _mm_srai_epi32(input_i_2, 24);\
+ output_i32 = _mm_add_epi32(input_i_1, input_i_2);\
+ output_ps_1 = _mm_cvtepi32_ps(output_i32);\
+ \
+ input_i_1 = _mm_unpackhi_epi8(_mm_setzero_si128(), input);\
+ input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\
+ input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\
+ input_i_1 = _mm_srai_epi32(input_i_1, 24);\
+ input_i_2 = _mm_srai_epi32(input_i_2, 24);\
+ output_i32 = _mm_add_epi32(input_i_1, input_i_2);\
+ output_ps_2 = _mm_cvtepi32_ps(output_i32);
+ #endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2 */
+
+ #ifndef CM_8IC_CONTROLMINUS128_8IC_U_SSE2
+ #define CM_8IC_CONTROLMINUS128_8IC_U_SSE2(y, minus128, minus128control)\
+ minus128control = _mm_cmpeq_epi8 (y, minus128);\
+ y = _mm_sub_epi8 (y, minus128control);
+ #endif /* CM_8IC_CONTROLMINUS128_8IC_U_SSE2 */
+
+ #endif /* LV_HAVE_SSE2 */
+
+ #ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Macros for U_GENERIC
+ */
+
+ #endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_CommonMacros_u_H */
+
+
+#ifndef INCLUDED_gnsssdr_CommonMacros_a_H
+#define INCLUDED_gnsssdr_CommonMacros_a_H
+
+ #ifdef LV_HAVE_SSE4_1
+ /*!
+ \brief Macros for A_SSE4_1
+ */
+
+ #endif /* LV_HAVE_SSE4_1 */
+
+ #ifdef LV_HAVE_SSE2
+ /*!
+ \brief Macros for U_SSE2
+ */
+
+ #endif /* LV_HAVE_SSE2 */
+
+ #ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Macros for A_GENERIC
+ */
+
+ #endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_CommonMacros_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h
new file mode 100644
index 000000000..4fa054480
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h
@@ -0,0 +1,76 @@
+/*!
+ * \file CommonMacros_16ic_cw_corr_32fc.h
+ * \brief Common macros used inside the 16ic_cw_corr_32fc volk protokernels.
+ * \authors
+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+#ifndef INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_u_H
+#define INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_u_H
+#include "CommonMacros/CommonMacros.h"
+
+ #ifdef LV_HAVE_SSE4_1
+ /*!
+ \brief Macros for U_SSE4_1
+ */
+
+ #ifndef CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1
+ #define CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)\
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)\
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)\
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
+ #endif /* CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1 */
+
+ #endif /* LV_HAVE_SSE4_1 */
+
+ #ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Macros for U_GENERIC
+ */
+
+ #endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_u_H */
+
+
+#ifndef INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_a_H
+#define INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_a_H
+
+ #ifdef LV_HAVE_SSE4_1
+ /*!
+ \brief Macros for A_SSE4_1
+ */
+
+ #endif /* LV_HAVE_SSE4_1 */
+
+ #ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Macros for A_GENERIC
+ */
+
+ #endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h
new file mode 100644
index 000000000..a8a778057
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h
@@ -0,0 +1,114 @@
+/*!
+ * \file CommonMacros_8ic_cw_corr_32fc.h
+ * \brief Common macros used inside the 8ic_cw_corr_32fc volk protokernels.
+ * \authors
+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+#ifndef INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H
+#define INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H
+#include "CommonMacros/CommonMacros.h"
+
+ #ifdef LV_HAVE_SSE4_1
+ /*!
+ \brief Macros for U_SSE4_1
+ */
+
+ #ifndef CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1
+ #define CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)\
+ CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)\
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
+ \
+ imag_output = _mm_slli_si128 (imag_output, 1);\
+ output = _mm_blendv_epi8 (imag_output, real_output, mult1);\
+ \
+ CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
+ #endif /* CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1 */
+
+ #ifndef CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1
+ #define CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)\
+ CM_8IC_CONTROLMINUS128_8IC_U_SSE2(y, minus128, minus128control)\
+ CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output)\
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)\
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
+ #endif /* CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1 */
+
+ #ifndef CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1
+ #define CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)\
+ CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output)\
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)\
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
+ #endif /* CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1 */
+
+ #endif /* LV_HAVE_SSE4_1 */
+
+ #ifdef LV_HAVE_SSE2
+ /*!
+ \brief Macros for U_SSE2
+ */
+
+ #ifndef CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2
+ #define CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\
+ CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)\
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
+ \
+ real_output = _mm_and_si128 (real_output, mult1);\
+ imag_output = _mm_and_si128 (imag_output, mult1);\
+ imag_output = _mm_slli_si128 (imag_output, 1);\
+ output = _mm_or_si128 (real_output, imag_output);\
+ \
+ CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2(output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
+ #endif /* CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2 */
+
+ #endif /* LV_HAVE_SSE2 */
+
+ #ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Macros for U_GENERIC
+ */
+
+ #endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H */
+
+
+#ifndef INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H
+#define INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H
+
+ #ifdef LV_HAVE_SSE4_1
+ /*!
+ \brief Macros for A_SSE4_1
+ */
+
+ #endif /* LV_HAVE_SSE4_1 */
+
+ #ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Macros for A_GENERIC
+ */
+
+ #endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/README.txt b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/README.txt
new file mode 100644
index 000000000..3d610256a
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/CommonMacros/README.txt
@@ -0,0 +1,34 @@
+####################################################################
+Common Macros inside volk_gnsssdr module
+####################################################################
+
+First of all, sorry for making you need to read this: macros are evil, they can not be debugged, you do not know where the errors come from, syntax is annoying.. BUT this is the only way I found that allows to share one piece of code between various proto-kernels without performance penalties.
+Inline functions have been tested, and they introduce a really small time penalty, but it becomes huge because of long loops, with thousands of samples.
+
+####################################################################
+Syntax
+####################################################################
+
+In order to allow better understanding of the code I created the macros with an specific syntax.
+
+1) Inside CommonMacros.h you will find macros for common operations. I will explain the syntax with an example:
+
+example: CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
+
+First of all, you find the characters “CM”, which means CommonMacros. After that the type and the amount of inputs is placed: “_16IC_X4” (16 bits complex integers, four inputs). The syntax for type is the same as the one used with volk protokernels, refer to GNURadio documentation for more help. The it comes the name of the macro (“_SCALAR_PRODUCT”), and after that the type and the amount of outputs (“_16IC_X2”). Finally it is placed the SSE minimum version needed to run (“_U_SSE2”). In the arguments you will find (from left to right) the inputs (four inputs: realx, imagx, realy, imagy), some variables that the macro needs to work (realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy) and finally the outputs (two outputs: real_output, imag_output).
+The variables that the macro needs are specified when calling it in order to avoid after-compile problems: if you want to use a macro you will need to declare all the variables it needs before, or you will not be able to compile.
+
+2) Inside all the other headers, CommonMacros_XXXXXX.h you will find macros for a specific group of proto-kernels. The syntax is the same as the CommonMacros.h
+
+####################################################################
+Workflow
+####################################################################
+
+In order to use the macros easily, I usually test the code without macros inside a testing proto-kernel, where you are able to test it, debug it and use breakpoints.
+When it works I place code inside a macro an I test it again.
+
+####################################################################
+Why macros
+####################################################################
+1) They are the only way I could find for sharing code between proto-kernels without performance penalty.
+2) It is true that they are really difficult to debug, but if you work with them responsibly it is not so hard. Volk_gnsssdr checks all the SSE proto-kernels implementations results against the generic implementation results, so if your macro is not working you will appreciate it after profiling it.
\ No newline at end of file
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/README.txt b/src/algorithms/libs/volk_gnsssdr/kernels/README.txt
new file mode 100644
index 000000000..69ee93d06
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/README.txt
@@ -0,0 +1,67 @@
+########################################################################
+# How to create custom kernel dispatchers
+########################################################################
+A kernel dispatcher is kernel implementation that calls other kernel implementations.
+By default, a dispatcher is generated by the build system for every kernel such that:
+ * the best aligned implemention is called when all pointer arguments are aligned,
+ * and otherwise the best unaligned implementation is called.
+
+The author of a VOLK kernel may create a custom dispatcher,
+to be called in place of the automatically generated one.
+A custom dispatcher may be useful to handle head and tail cases,
+or to implement different alignment and bounds checking logic.
+
+########################################################################
+# Code for an example dispatcher w/ tail case
+########################################################################
+#include
+
+#ifdef LV_HAVE_DISPATCHER
+
+static inline void volk_gnsssdr_32f_x2_add_32f_dispatcher(float* cVector, const float* aVector, const float* bVector, unsigned int num_points)
+{
+ const unsigned int num_points_r = num_points%4;
+ const unsigned int num_points_x = num_points - num_points_r;
+
+ if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(cVector, VOLK_OR_PTR(aVector, bVector))))
+ {
+ volk_gnsssdr_32f_x2_add_32f_a(cVector, aVector, bVector, num_points_x);
+ }
+ else
+ {
+ volk_gnsssdr_32f_x2_add_32f_u(cVector, aVector, bVector, num_points_x);
+ }
+
+ volk_gnsssdr_32f_x2_add_32f_g(cVector+num_points_x, aVector+num_points_x, bVector+num_points_x, num_points_r);
+}
+
+#endif //LV_HAVE_DISPATCHER
+
+########################################################################
+# Code for an example dispatcher w/ tail case and accumulator
+########################################################################
+#include
+
+#ifdef LV_HAVE_DISPATCHER
+
+static inline void volk_gnsssdr_32f_x2_dot_prod_32f_dispatcher(float * result, const float * input, const float * taps, unsigned int num_points)
+{
+ const unsigned int num_points_r = num_points%16;
+ const unsigned int num_points_x = num_points - num_points_r;
+
+ if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(input, taps)))
+ {
+ volk_gnsssdr_32f_x2_dot_prod_32f_a(result, input, taps, num_points_x);
+ }
+ else
+ {
+ volk_gnsssdr_32f_x2_dot_prod_32f_u(result, input, taps, num_points_x);
+ }
+
+ float result_tail = 0;
+ volk_gnsssdr_32f_x2_dot_prod_32f_g(&result_tail, input+num_points_x, taps+num_points_x, num_points_r);
+
+ *result += result_tail;
+}
+
+#endif //LV_HAVE_DISPATCHER
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_s32f_convert_32f.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_s32f_convert_32f.h
new file mode 100644
index 000000000..ccb13171c
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_s32f_convert_32f.h
@@ -0,0 +1,241 @@
+#ifndef INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H
+#define INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H
+
+#include
+#include
+
+#ifdef LV_HAVE_SSE4_1
+#include
+
+ /*!
+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
+ \param inputVector The 16 bit input data buffer
+ \param outputVector The floating point output data buffer
+ \param scalar The value divided against each point in the output buffer
+ \param num_points The number of data values to be converted
+ \note Output buffer does NOT need to be properly aligned
+ */
+static inline void volk_gnsssdr_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int eighthPoints = num_points / 8;
+
+ float* outputVectorPtr = outputVector;
+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
+ int16_t* inputPtr = (int16_t*)inputVector;
+ __m128i inputVal;
+ __m128i inputVal2;
+ __m128 ret;
+
+ for(;number < eighthPoints; number++){
+
+ // Load the 8 values
+ inputVal = _mm_loadu_si128((__m128i*)inputPtr);
+
+ // Shift the input data to the right by 64 bits ( 8 bytes )
+ inputVal2 = _mm_srli_si128(inputVal, 8);
+
+ // Convert the lower 4 values into 32 bit words
+ inputVal = _mm_cvtepi16_epi32(inputVal);
+ inputVal2 = _mm_cvtepi16_epi32(inputVal2);
+
+ ret = _mm_cvtepi32_ps(inputVal);
+ ret = _mm_mul_ps(ret, invScalar);
+ _mm_storeu_ps(outputVectorPtr, ret);
+ outputVectorPtr += 4;
+
+ ret = _mm_cvtepi32_ps(inputVal2);
+ ret = _mm_mul_ps(ret, invScalar);
+ _mm_storeu_ps(outputVectorPtr, ret);
+
+ outputVectorPtr += 4;
+
+ inputPtr += 8;
+ }
+
+ number = eighthPoints * 8;
+ for(; number < num_points; number++){
+ outputVector[number] =((float)(inputVector[number])) / scalar;
+ }
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_SSE
+#include
+
+ /*!
+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
+ \param inputVector The 16 bit input data buffer
+ \param outputVector The floating point output data buffer
+ \param scalar The value divided against each point in the output buffer
+ \param num_points The number of data values to be converted
+ \note Output buffer does NOT need to be properly aligned
+ */
+static inline void volk_gnsssdr_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* outputVectorPtr = outputVector;
+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
+ int16_t* inputPtr = (int16_t*)inputVector;
+ __m128 ret;
+
+ for(;number < quarterPoints; number++){
+ ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
+
+ ret = _mm_mul_ps(ret, invScalar);
+ _mm_storeu_ps(outputVectorPtr, ret);
+
+ inputPtr += 4;
+ outputVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ outputVector[number] = (float)(inputVector[number]) / scalar;
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
+ \param inputVector The 16 bit input data buffer
+ \param outputVector The floating point output data buffer
+ \param scalar The value divided against each point in the output buffer
+ \param num_points The number of data values to be converted
+ \note Output buffer does NOT need to be properly aligned
+ */
+static inline void volk_gnsssdr_16i_s32f_convert_32f_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+ float* outputVectorPtr = outputVector;
+ const int16_t* inputVectorPtr = inputVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+#endif /* INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H */
+#ifndef INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H
+#define INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H
+
+#include
+#include
+
+#ifdef LV_HAVE_SSE4_1
+#include
+
+ /*!
+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
+ \param inputVector The 16 bit input data buffer
+ \param outputVector The floating point output data buffer
+ \param scalar The value divided against each point in the output buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int eighthPoints = num_points / 8;
+
+ float* outputVectorPtr = outputVector;
+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
+ int16_t* inputPtr = (int16_t*)inputVector;
+ __m128i inputVal;
+ __m128i inputVal2;
+ __m128 ret;
+
+ for(;number < eighthPoints; number++){
+
+ // Load the 8 values
+ inputVal = _mm_loadu_si128((__m128i*)inputPtr);
+
+ // Shift the input data to the right by 64 bits ( 8 bytes )
+ inputVal2 = _mm_srli_si128(inputVal, 8);
+
+ // Convert the lower 4 values into 32 bit words
+ inputVal = _mm_cvtepi16_epi32(inputVal);
+ inputVal2 = _mm_cvtepi16_epi32(inputVal2);
+
+ ret = _mm_cvtepi32_ps(inputVal);
+ ret = _mm_mul_ps(ret, invScalar);
+ _mm_storeu_ps(outputVectorPtr, ret);
+ outputVectorPtr += 4;
+
+ ret = _mm_cvtepi32_ps(inputVal2);
+ ret = _mm_mul_ps(ret, invScalar);
+ _mm_storeu_ps(outputVectorPtr, ret);
+
+ outputVectorPtr += 4;
+
+ inputPtr += 8;
+ }
+
+ number = eighthPoints * 8;
+ for(; number < num_points; number++){
+ outputVector[number] =((float)(inputVector[number])) / scalar;
+ }
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_SSE
+#include
+
+ /*!
+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
+ \param inputVector The 16 bit input data buffer
+ \param outputVector The floating point output data buffer
+ \param scalar The value divided against each point in the output buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* outputVectorPtr = outputVector;
+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
+ int16_t* inputPtr = (int16_t*)inputVector;
+ __m128 ret;
+
+ for(;number < quarterPoints; number++){
+ ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
+
+ ret = _mm_mul_ps(ret, invScalar);
+ _mm_storeu_ps(outputVectorPtr, ret);
+
+ inputPtr += 4;
+ outputVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ outputVector[number] = (float)(inputVector[number]) / scalar;
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
+ \param inputVector The 16 bit input data buffer
+ \param outputVector The floating point output data buffer
+ \param scalar The value divided against each point in the output buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_16i_s32f_convert_32f_a_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
+ float* outputVectorPtr = outputVector;
+ const int16_t* inputVectorPtr = inputVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+#endif /* INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3.h
new file mode 100644
index 000000000..95b0b093d
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3.h
@@ -0,0 +1,461 @@
+/*!
+ * \file volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3.h
+ * \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 32 bits vectors
+ * \authors
+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * Volk protokernel that performs the carrier wipe-off mixing and the
+ * Early, Prompt, and Late correlation with 32 bits vectors (16 bits the
+ * real part and 16 bits the imaginary part):
+ * - The carrier wipe-off is done by multiplying the input signal by the
+ * carrier (multiplication of 32 bits vectors) It returns the input
+ * signal in base band (BB)
+ * - Early values are calculated by multiplying the input signal in BB by the
+ * early code (multiplication of 32 bits vectors), accumulating the results
+ * - Prompt values are calculated by multiplying the input signal in BB by the
+ * prompt code (multiplication of 32 bits vectors), accumulating the results
+ * - Late values are calculated by multiplying the input signal in BB by the
+ * late code (multiplication of 32 bits vectors), accumulating the results
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_H
+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_H
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE4_1
+#include "smmintrin.h"
+#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
+#include "CommonMacros/CommonMacros.h"
+ /*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 8;
+
+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
+ __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
+
+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
+ __m128i input_i_1, input_i_2, output_i32;
+ __m128 real_output_ps, imag_output_ps;
+
+ float E_out_real = 0;
+ float E_out_imag = 0;
+ float P_out_real = 0;
+ float P_out_imag = 0;
+ float L_out_real = 0;
+ float L_out_imag = 0;
+
+ const lv_16sc_t* input_ptr = input;
+ const lv_16sc_t* carrier_ptr = carrier;
+
+ const lv_16sc_t* E_code_ptr = E_code;
+ lv_32fc_t* E_out_ptr = E_out;
+ const lv_16sc_t* L_code_ptr = L_code;
+ lv_32fc_t* L_out_ptr = L_out;
+ const lv_16sc_t* P_code_ptr = P_code;
+ lv_32fc_t* P_out_ptr = P_out;
+
+ *E_out_ptr = 0;
+ *P_out_ptr = 0;
+ *L_out_ptr = 0;
+
+ mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ real_E_code_acc = _mm_setzero_ps();
+ imag_E_code_acc = _mm_setzero_ps();
+ real_P_code_acc = _mm_setzero_ps();
+ imag_P_code_acc = _mm_setzero_ps();
+ real_L_code_acc = _mm_setzero_ps();
+ imag_L_code_acc = _mm_setzero_ps();
+
+ if (sse_iters>0)
+ {
+ for(int number = 0;number < sse_iters; number++){
+
+ //Perform the carrier wipe-off
+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
+ input_ptr += 4;
+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
+
+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+ carrier_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
+
+ //Get early values
+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+ E_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ //Adds the float 32 results
+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
+
+ //Get prompt values
+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+ P_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
+
+ //Get late values
+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+ L_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
+
+ input_ptr += 4;
+ carrier_ptr += 4;
+ E_code_ptr += 4;
+ P_code_ptr += 4;
+ L_code_ptr += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
+
+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
+
+ for (int i = 0; i<4; ++i)
+ {
+ E_out_real += real_E_dotProductVector[i];
+ E_out_imag += imag_E_dotProductVector[i];
+ P_out_real += real_P_dotProductVector[i];
+ P_out_imag += imag_P_dotProductVector[i];
+ L_out_real += real_L_dotProductVector[i];
+ L_out_imag += imag_L_dotProductVector[i];
+ }
+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
+ }
+
+ lv_16sc_t bb_signal_sample;
+ for(int i=0; i < num_points%8; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
+ // Now get early, late, and prompt values for each
+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
+ }
+
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ lv_16sc_t bb_signal_sample;
+ lv_16sc_t tmp1;
+ lv_16sc_t tmp2;
+ lv_16sc_t tmp3;
+
+ bb_signal_sample = lv_cmake(0, 0);
+
+ *E_out = 0;
+ *P_out = 0;
+ *L_out = 0;
+ // perform Early, Prompt and Late correlation
+
+ for(int i=0; i < num_points; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = input[i] * carrier[i];
+
+ tmp1 = bb_signal_sample * E_code[i];
+ tmp2 = bb_signal_sample * P_code[i];
+ tmp3 = bb_signal_sample * L_code[i];
+
+ // Now get early, late, and prompt values for each
+ *E_out += (lv_32fc_t)tmp1;
+ *P_out += (lv_32fc_t)tmp2;
+ *L_out += (lv_32fc_t)tmp3;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_H */
+
+
+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_H
+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_H
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE4_1
+#include "smmintrin.h"
+#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
+#include "CommonMacros/CommonMacros.h"
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 8;
+
+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
+ __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
+
+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
+ __m128i input_i_1, input_i_2, output_i32;
+ __m128 real_output_ps, imag_output_ps;
+
+ float E_out_real = 0;
+ float E_out_imag = 0;
+ float P_out_real = 0;
+ float P_out_imag = 0;
+ float L_out_real = 0;
+ float L_out_imag = 0;
+
+ const lv_16sc_t* input_ptr = input;
+ const lv_16sc_t* carrier_ptr = carrier;
+
+ const lv_16sc_t* E_code_ptr = E_code;
+ lv_32fc_t* E_out_ptr = E_out;
+ const lv_16sc_t* L_code_ptr = L_code;
+ lv_32fc_t* L_out_ptr = L_out;
+ const lv_16sc_t* P_code_ptr = P_code;
+ lv_32fc_t* P_out_ptr = P_out;
+
+ *E_out_ptr = 0;
+ *P_out_ptr = 0;
+ *L_out_ptr = 0;
+
+ mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ real_E_code_acc = _mm_setzero_ps();
+ imag_E_code_acc = _mm_setzero_ps();
+ real_P_code_acc = _mm_setzero_ps();
+ imag_P_code_acc = _mm_setzero_ps();
+ real_L_code_acc = _mm_setzero_ps();
+ imag_L_code_acc = _mm_setzero_ps();
+
+ if (sse_iters>0)
+ {
+ for(int number = 0;number < sse_iters; number++){
+
+ //Perform the carrier wipe-off
+ x1 = _mm_load_si128((__m128i*)input_ptr);
+ input_ptr += 4;
+ x2 = _mm_load_si128((__m128i*)input_ptr);
+
+ y1 = _mm_load_si128((__m128i*)carrier_ptr);
+ carrier_ptr += 4;
+ y2 = _mm_load_si128((__m128i*)carrier_ptr);
+
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
+
+ //Get early values
+ y1 = _mm_load_si128((__m128i*)E_code_ptr);
+ E_code_ptr += 4;
+ y2 = _mm_load_si128((__m128i*)E_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ //Adds the float 32 results
+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
+
+ //Get prompt values
+ y1 = _mm_load_si128((__m128i*)P_code_ptr);
+ P_code_ptr += 4;
+ y2 = _mm_load_si128((__m128i*)P_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
+
+ //Get late values
+ y1 = _mm_load_si128((__m128i*)L_code_ptr);
+ L_code_ptr += 4;
+ y2 = _mm_load_si128((__m128i*)L_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
+
+ input_ptr += 4;
+ carrier_ptr += 4;
+ E_code_ptr += 4;
+ P_code_ptr += 4;
+ L_code_ptr += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
+
+ _mm_store_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
+
+ for (int i = 0; i<4; ++i)
+ {
+ E_out_real += real_E_dotProductVector[i];
+ E_out_imag += imag_E_dotProductVector[i];
+ P_out_real += real_P_dotProductVector[i];
+ P_out_imag += imag_P_dotProductVector[i];
+ L_out_real += real_L_dotProductVector[i];
+ L_out_imag += imag_L_dotProductVector[i];
+ }
+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
+ }
+
+ lv_16sc_t bb_signal_sample;
+ for(int i=0; i < num_points%8; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
+ // Now get early, late, and prompt values for each
+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
+ }
+
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ lv_16sc_t bb_signal_sample;
+ lv_16sc_t tmp1;
+ lv_16sc_t tmp2;
+ lv_16sc_t tmp3;
+
+ bb_signal_sample = lv_cmake(0, 0);
+
+ *E_out = 0;
+ *P_out = 0;
+ *L_out = 0;
+ // perform Early, Prompt and Late correlation
+
+ for(int i=0; i < num_points; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = input[i] * carrier[i];
+
+ tmp1 = bb_signal_sample * E_code[i];
+ tmp2 = bb_signal_sample * P_code[i];
+ tmp3 = bb_signal_sample * L_code[i];
+
+ // Now get early, late, and prompt values for each
+ *E_out += (lv_32fc_t)tmp1;
+ *P_out += (lv_32fc_t)tmp2;
+ *L_out += (lv_32fc_t)tmp3;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3.h
new file mode 100644
index 000000000..34d1fd715
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3.h
@@ -0,0 +1,1568 @@
+/*!
+ * \file volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3.h
+ * \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 32 bits vectors using different methods: inside u_sse4_1_first there is one method, inside u_sse4_1_second there is another... This protokernel has been created to test the performance of different methods.
+ * \authors
+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * Volk protokernel that performs the carrier wipe-off mixing and the
+ * Early, Prompt, and Late correlation with 32 bits vectors (16 bits the
+ * real part and 16 bits the imaginary part):
+ * - The carrier wipe-off is done by multiplying the input signal by the
+ * carrier (multiplication of 32 bits vectors) It returns the input
+ * signal in base band (BB)
+ * - Early values are calculated by multiplying the input signal in BB by the
+ * early code (multiplication of 32 bits vectors), accumulating the results
+ * - Prompt values are calculated by multiplying the input signal in BB by the
+ * prompt code (multiplication of 32 bits vectors), accumulating the results
+ * - Late values are calculated by multiplying the input signal in BB by the
+ * late code (multiplication of 32 bits vectors), accumulating the results
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_H
+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_H
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE4_1
+#include "smmintrin.h"
+ /*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_first(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 4;
+
+ __m128i x, y, yaux, yl, yh, tmp1, tmp2, z, bb_signal_sample, bb_signal_sample_suffled;
+
+ __m128 z_ps_1, z_ps_2, z_E, z_P, z_L;
+ __m128i z_i_1, z_i_2;
+
+ lv_32fc_t dotProduct_E;
+ lv_32fc_t dotProduct_P;
+ lv_32fc_t dotProduct_L;
+
+ z_E = _mm_setzero_ps();
+ z_P = _mm_setzero_ps();
+ z_L = _mm_setzero_ps();
+
+ const lv_16sc_t* _input = input;
+ const lv_16sc_t* _carrier = carrier;
+ const lv_16sc_t* _E_code = E_code;
+ const lv_16sc_t* _P_code = P_code;
+ const lv_16sc_t* _L_code = L_code;
+
+ if (sse_iters>0)
+ {
+ for(int number = 0;number < sse_iters; number++)
+ {
+ //Perform the carrier wipe-off
+ x = _mm_lddqu_si128((__m128i*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_lddqu_si128((__m128i*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ // Load yl with cr,cr,dr,dr
+ // Load yh with ci,ci,di,di
+ yaux = _mm_shuffle_epi8 (y, _mm_set_epi8 (15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
+ yl = _mm_unpacklo_epi16(yaux, yaux);
+ yh = _mm_unpackhi_epi16(yaux, yaux);
+
+ tmp1 = _mm_mullo_epi16(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_epi8 (x, _mm_set_epi8 (13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mullo_epi16(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ tmp2 = _mm_mullo_epi16(tmp2,_mm_set_epi16 (1, -1, 1, -1, 1, -1, 1, -1));
+ bb_signal_sample = _mm_add_epi16(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ bb_signal_sample_suffled = _mm_shuffle_epi8 (bb_signal_sample, _mm_set_epi8 (13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)); // Re-arrange bb_signal_sample to be ai,ar,bi,br
+
+ // correlation E,P,L (3x vector scalar product)
+ // Early
+ y = _mm_lddqu_si128((__m128i*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yaux = _mm_shuffle_epi8 (y, _mm_set_epi8 (15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
+ yl = _mm_unpacklo_epi16(yaux, yaux);
+ yh = _mm_unpackhi_epi16(yaux, yaux);
+
+ tmp1 = _mm_mullo_epi16(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ tmp2 = _mm_mullo_epi16(bb_signal_sample_suffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ tmp2 = _mm_mullo_epi16(tmp2,_mm_set_epi16 (1, -1, 1, -1, 1, -1, 1, -1));
+ z = _mm_add_epi16(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ z_i_1 = _mm_cvtepi16_epi32(z);
+ z_ps_1 = _mm_cvtepi32_ps(z_i_1);
+ z = _mm_srli_si128 (z, 8);
+ z_i_2 = _mm_cvtepi16_epi32(z);
+ z_ps_2 = _mm_cvtepi32_ps(z_i_2);
+
+ z_E = _mm_add_ps(z_E, z_ps_1); // Add the complex multiplication results together
+ z_E = _mm_add_ps(z_E, z_ps_2); // Add the complex multiplication results together
+
+ // Prompt
+ y = _mm_lddqu_si128((__m128i*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yaux = _mm_shuffle_epi8 (y, _mm_set_epi8 (15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
+ yl = _mm_unpacklo_epi16(yaux, yaux);
+ yh = _mm_unpackhi_epi16(yaux, yaux);
+
+ tmp1 = _mm_mullo_epi16(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ tmp2 = _mm_mullo_epi16(bb_signal_sample_suffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ tmp2 = _mm_mullo_epi16(tmp2,_mm_set_epi16 (1, -1, 1, -1, 1, -1, 1, -1));
+ z = _mm_add_epi16(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ z_i_1 = _mm_cvtepi16_epi32(z);
+ z_ps_1 = _mm_cvtepi32_ps(z_i_1);
+ z = _mm_srli_si128 (z, 8);
+ z_i_2 = _mm_cvtepi16_epi32(z);
+ z_ps_2 = _mm_cvtepi32_ps(z_i_2);
+
+ z_P = _mm_add_ps(z_P, z_ps_1); // Add the complex multiplication results together
+ z_P = _mm_add_ps(z_P, z_ps_2); // Add the complex multiplication results together
+
+ // Late
+ y = _mm_lddqu_si128((__m128i*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yaux = _mm_shuffle_epi8 (y, _mm_set_epi8 (15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
+ yl = _mm_unpacklo_epi16(yaux, yaux);
+ yh = _mm_unpackhi_epi16(yaux, yaux);
+
+ tmp1 = _mm_mullo_epi16(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ tmp2 = _mm_mullo_epi16(bb_signal_sample_suffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ tmp2 = _mm_mullo_epi16(tmp2,_mm_set_epi16 (1, -1, 1, -1, 1, -1, 1, -1));
+ z = _mm_add_epi16(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ z_i_1 = _mm_cvtepi16_epi32(z);
+ z_ps_1 = _mm_cvtepi32_ps(z_i_1);
+ z = _mm_srli_si128 (z, 8);
+ z_i_2 = _mm_cvtepi16_epi32(z);
+ z_ps_2 = _mm_cvtepi32_ps(z_i_2);
+
+ z_L = _mm_add_ps(z_L, z_ps_1); // Add the complex multiplication results together
+ z_L = _mm_add_ps(z_L, z_ps_2); // Add the complex multiplication results together
+
+ _input += 4;
+ _carrier += 4;
+ _E_code += 4;
+ _L_code += 4;
+ _P_code += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
+
+ _mm_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
+
+ dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] );
+ dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] );
+ dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] );
+ }
+
+ for(int i=0; i < num_points%4; ++i)
+ {
+ dotProduct_E += (lv_32fc_t)((*_input) * (*_E_code++)*(*_carrier));
+ dotProduct_P += (lv_32fc_t)((*_input) * (*_P_code++)*(*_carrier));
+ dotProduct_L += (lv_32fc_t)((*_input++) * (*_L_code++)*(*_carrier++));
+ }
+
+ *E_out = dotProduct_E;
+ *P_out = dotProduct_P;
+ *L_out = dotProduct_L;
+
+
+
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_SSE4_1
+#include "smmintrin.h"
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_second(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 8;
+
+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
+ __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
+
+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
+ __m128i real_output_i_1, real_output_i_2, imag_output_i_1, imag_output_i_2;
+ __m128 real_output_ps_1, real_output_ps_2, imag_output_ps_1, imag_output_ps_2;
+
+ float E_out_real = 0;
+ float E_out_imag = 0;
+ float P_out_real = 0;
+ float P_out_imag = 0;
+ float L_out_real = 0;
+ float L_out_imag = 0;
+
+ const lv_16sc_t* input_ptr = input;
+ const lv_16sc_t* carrier_ptr = carrier;
+
+ const lv_16sc_t* E_code_ptr = E_code;
+ lv_32fc_t* E_out_ptr = E_out;
+ const lv_16sc_t* L_code_ptr = L_code;
+ lv_32fc_t* L_out_ptr = L_out;
+ const lv_16sc_t* P_code_ptr = P_code;
+ lv_32fc_t* P_out_ptr = P_out;
+
+ *E_out_ptr = 0;
+ *P_out_ptr = 0;
+ *L_out_ptr = 0;
+
+ mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+
+ real_E_code_acc = _mm_setzero_ps();
+ imag_E_code_acc = _mm_setzero_ps();
+ real_P_code_acc = _mm_setzero_ps();
+ imag_P_code_acc = _mm_setzero_ps();
+ real_L_code_acc = _mm_setzero_ps();
+ imag_L_code_acc = _mm_setzero_ps();
+
+ if (sse_iters>0)
+ {
+ for(int number = 0;number < sse_iters; number++){
+
+ //Perform the carrier wipe-off
+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
+ input_ptr += 4;
+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
+
+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+ carrier_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+
+ imagx = _mm_srli_si128 (x1, 2);
+ imagx = _mm_blend_epi16 (x2, imagx, 85);
+ realx = _mm_slli_si128 (x2, 2);
+ realx = _mm_blend_epi16 (realx, x1, 85);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (realx, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
+
+ real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ //Get early values
+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+ E_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+
+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+ real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
+ real_output = _mm_srli_si128 (real_output, 8);
+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+ real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
+
+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
+ imag_output = _mm_srli_si128 (imag_output, 8);
+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
+
+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps_1);
+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps_2);
+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps_1);
+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps_2);
+
+ //Get prompt values
+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+ P_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+
+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+ real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
+ real_output = _mm_srli_si128 (real_output, 8);
+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+ real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
+
+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
+ imag_output = _mm_srli_si128 (imag_output, 8);
+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
+
+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps_1);
+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps_2);
+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps_1);
+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps_2);
+
+ //Get late values
+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+ L_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+
+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+ real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
+ real_output = _mm_srli_si128 (real_output, 8);
+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+ real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
+
+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
+ imag_output = _mm_srli_si128 (imag_output, 8);
+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
+
+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps_1);
+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps_2);
+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps_1);
+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps_2);
+
+ input_ptr += 4;
+ carrier_ptr += 4;
+ E_code_ptr += 4;
+ L_code_ptr += 4;
+ P_code_ptr += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
+
+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
+
+ for (int i = 0; i<4; ++i)
+ {
+ E_out_real += real_E_dotProductVector[i];
+ E_out_imag += imag_E_dotProductVector[i];
+ P_out_real += real_P_dotProductVector[i];
+ P_out_imag += imag_P_dotProductVector[i];
+ L_out_real += real_L_dotProductVector[i];
+ L_out_imag += imag_L_dotProductVector[i];
+ }
+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
+ }
+
+ lv_16sc_t bb_signal_sample;
+ for(int i=0; i < num_points%8; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
+ // Now get early, late, and prompt values for each
+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
+ }
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_SSE4_1
+#include "smmintrin.h"
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_third(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 8;
+ unsigned int index = 0;
+ unsigned int indexPlus4 = 0;
+
+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
+ __m128i realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, real_output_i32, imag_output_i32;
+
+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
+ __m128i real_output_i_1, real_output_i_2, imag_output_i_1, imag_output_i_2;
+ __m128 real_output_ps, imag_output_ps;
+
+ float E_out_real = 0;
+ float E_out_imag = 0;
+ float P_out_real = 0;
+ float P_out_imag = 0;
+ float L_out_real = 0;
+ float L_out_imag = 0;
+
+ const lv_16sc_t* input_ptr = input;
+ const lv_16sc_t* carrier_ptr = carrier;
+
+ const lv_16sc_t* E_code_ptr = E_code;
+ lv_32fc_t* E_out_ptr = E_out;
+ const lv_16sc_t* L_code_ptr = L_code;
+ lv_32fc_t* L_out_ptr = L_out;
+ const lv_16sc_t* P_code_ptr = P_code;
+ lv_32fc_t* P_out_ptr = P_out;
+
+ *E_out_ptr = 0;
+ *P_out_ptr = 0;
+ *L_out_ptr = 0;
+
+ real_E_code_acc = _mm_setzero_ps();
+ imag_E_code_acc = _mm_setzero_ps();
+ real_P_code_acc = _mm_setzero_ps();
+ imag_P_code_acc = _mm_setzero_ps();
+ real_L_code_acc = _mm_setzero_ps();
+ imag_L_code_acc = _mm_setzero_ps();
+
+ if (sse_iters>0)
+ {
+ for(index = 0;index < 8*sse_iters; index+=8){
+ indexPlus4 = index + 4;
+ //Perform the carrier wipe-off
+ x1 = _mm_lddqu_si128((__m128i*)&input_ptr[index]);
+ x2 = _mm_lddqu_si128((__m128i*)&input_ptr[indexPlus4]);
+
+ y1 = _mm_lddqu_si128((__m128i*)&carrier_ptr[index]);
+ y2 = _mm_lddqu_si128((__m128i*)&carrier_ptr[indexPlus4]);
+
+ imagx = _mm_srli_si128 (x1, 2);
+ imagx = _mm_blend_epi16 (x2, imagx, 85);
+ realx = _mm_slli_si128 (x2, 2);
+ realx = _mm_blend_epi16 (realx, x1, 85);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (realx, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
+
+ real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ //Get early values
+ y1 = _mm_lddqu_si128((__m128i*)&E_code_ptr[index]);
+ y2 = _mm_lddqu_si128((__m128i*)&E_code_ptr[indexPlus4]);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+
+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+ real_output = _mm_srli_si128 (real_output, 8);
+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
+
+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+ imag_output = _mm_srli_si128 (imag_output, 8);
+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
+
+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
+
+ //Get prompt values
+ y1 = _mm_lddqu_si128((__m128i*)&P_code_ptr[index]);
+ y2 = _mm_lddqu_si128((__m128i*)&P_code_ptr[indexPlus4]);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+
+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+ real_output = _mm_srli_si128 (real_output, 8);
+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
+
+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+ imag_output = _mm_srli_si128 (imag_output, 8);
+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
+
+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
+
+ //Get late values
+ y1 = _mm_lddqu_si128((__m128i*)&L_code_ptr[index]);
+ y2 = _mm_lddqu_si128((__m128i*)&L_code_ptr[indexPlus4]);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+
+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+ real_output = _mm_srli_si128 (real_output, 8);
+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
+
+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+ imag_output = _mm_srli_si128 (imag_output, 8);
+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
+
+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
+ }
+
+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
+
+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
+
+ for (int i = 0; i<4; ++i)
+ {
+ E_out_real += real_E_dotProductVector[i];
+ E_out_imag += imag_E_dotProductVector[i];
+ P_out_real += real_P_dotProductVector[i];
+ P_out_imag += imag_P_dotProductVector[i];
+ L_out_real += real_L_dotProductVector[i];
+ L_out_imag += imag_L_dotProductVector[i];
+ }
+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
+ }
+
+ lv_16sc_t bb_signal_sample;
+ for(; index < num_points; index++)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = input_ptr[index] * carrier_ptr[index];
+ // Now get early, late, and prompt values for each
+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * E_code_ptr[index]);
+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * P_code_ptr[index]);
+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * L_code_ptr[index]);
+ }
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_SSE4_1
+#include "smmintrin.h"
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_fourth(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 8;
+
+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
+ __m128i realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, real_output_i32, imag_output_i32;
+
+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
+ __m128i real_output_i_1, real_output_i_2, imag_output_i_1, imag_output_i_2;
+ __m128 real_output_ps, imag_output_ps;
+
+ float E_out_real = 0;
+ float E_out_imag = 0;
+ float P_out_real = 0;
+ float P_out_imag = 0;
+ float L_out_real = 0;
+ float L_out_imag = 0;
+
+ const lv_16sc_t* input_ptr = input;
+ const lv_16sc_t* carrier_ptr = carrier;
+
+ const lv_16sc_t* E_code_ptr = E_code;
+ lv_32fc_t* E_out_ptr = E_out;
+ const lv_16sc_t* L_code_ptr = L_code;
+ lv_32fc_t* L_out_ptr = L_out;
+ const lv_16sc_t* P_code_ptr = P_code;
+ lv_32fc_t* P_out_ptr = P_out;
+
+ *E_out_ptr = 0;
+ *P_out_ptr = 0;
+ *L_out_ptr = 0;
+
+ real_E_code_acc = _mm_setzero_ps();
+ imag_E_code_acc = _mm_setzero_ps();
+ real_P_code_acc = _mm_setzero_ps();
+ imag_P_code_acc = _mm_setzero_ps();
+ real_L_code_acc = _mm_setzero_ps();
+ imag_L_code_acc = _mm_setzero_ps();
+
+ if (sse_iters>0)
+ {
+ for(int number = 0;number < sse_iters; number++){
+
+ //Perform the carrier wipe-off
+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
+ input_ptr += 4;
+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
+
+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+ carrier_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+
+ imagx = _mm_srli_si128 (x1, 2);
+ imagx = _mm_blend_epi16 (x2, imagx, 85);
+ realx = _mm_slli_si128 (x2, 2);
+ realx = _mm_blend_epi16 (realx, x1, 85);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (realx, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
+
+ real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ //Get early values
+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+ E_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+
+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+ real_output = _mm_srli_si128 (real_output, 8);
+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
+
+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+ imag_output = _mm_srli_si128 (imag_output, 8);
+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
+
+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
+
+ //Get prompt values
+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+ P_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+
+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+ real_output = _mm_srli_si128 (real_output, 8);
+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
+
+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+ imag_output = _mm_srli_si128 (imag_output, 8);
+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
+
+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
+
+ //Get late values
+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+ L_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+
+ imagy = _mm_srli_si128 (y1, 2);
+ imagy = _mm_blend_epi16 (y2, imagy, 85);
+ realy = _mm_slli_si128 (y2, 2);
+ realy = _mm_blend_epi16 (realy, y1, 85);
+
+ realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+ imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+ realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+ imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+
+ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+
+ real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+ real_output = _mm_srli_si128 (real_output, 8);
+ real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+ real_output_i32 = _mm_add_epi32 (real_output_i_1, real_output_i_2);
+ real_output_ps = _mm_cvtepi32_ps(real_output_i32);
+
+ imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+ imag_output = _mm_srli_si128 (imag_output, 8);
+ imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+ imag_output_i32 = _mm_add_epi32 (imag_output_i_1, imag_output_i_2);
+ imag_output_ps = _mm_cvtepi32_ps(imag_output_i32);
+
+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
+
+ input_ptr += 4;
+ carrier_ptr += 4;
+ E_code_ptr += 4;
+ L_code_ptr += 4;
+ P_code_ptr += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
+
+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
+
+ for (int i = 0; i<4; ++i)
+ {
+ E_out_real += real_E_dotProductVector[i];
+ E_out_imag += imag_E_dotProductVector[i];
+ P_out_real += real_P_dotProductVector[i];
+ P_out_imag += imag_P_dotProductVector[i];
+ L_out_real += real_L_dotProductVector[i];
+ L_out_imag += imag_L_dotProductVector[i];
+ }
+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
+ }
+
+ lv_16sc_t bb_signal_sample;
+ for(int i=0; i < num_points%8; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
+ // Now get early, late, and prompt values for each
+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
+ }
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_SSE4_1
+#include "smmintrin.h"
+#include "CommonMacros/CommonMacros.h"
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_fifth(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 8;
+
+ __m128i realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy;
+ __m128i input_i_1, input_i_2, output_i32;
+
+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
+ __m128i realx, imagx, realy, imagy, real_output, imag_output;
+
+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
+ __m128 real_output_ps, imag_output_ps;
+
+ float E_out_real = 0;
+ float E_out_imag = 0;
+ float P_out_real = 0;
+ float P_out_imag = 0;
+ float L_out_real = 0;
+ float L_out_imag = 0;
+
+ const lv_16sc_t* input_ptr = input;
+ const lv_16sc_t* carrier_ptr = carrier;
+
+ const lv_16sc_t* E_code_ptr = E_code;
+ lv_32fc_t* E_out_ptr = E_out;
+ const lv_16sc_t* L_code_ptr = L_code;
+ lv_32fc_t* L_out_ptr = L_out;
+ const lv_16sc_t* P_code_ptr = P_code;
+ lv_32fc_t* P_out_ptr = P_out;
+
+ *E_out_ptr = 0;
+ *P_out_ptr = 0;
+ *L_out_ptr = 0;
+
+ real_E_code_acc = _mm_setzero_ps();
+ imag_E_code_acc = _mm_setzero_ps();
+ real_P_code_acc = _mm_setzero_ps();
+ imag_P_code_acc = _mm_setzero_ps();
+ real_L_code_acc = _mm_setzero_ps();
+ imag_L_code_acc = _mm_setzero_ps();
+
+ if (sse_iters>0)
+ {
+ for(int number = 0;number < sse_iters; number++){
+
+ //Perform the carrier wipe-off
+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
+ input_ptr += 4;
+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
+
+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+ carrier_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
+
+ //Get early values
+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+ E_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
+
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
+
+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
+
+ //Get prompt values
+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+ P_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
+
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
+
+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
+
+ //Get late values
+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+ L_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
+
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)
+ CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
+
+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
+
+ input_ptr += 4;
+ carrier_ptr += 4;
+ E_code_ptr += 4;
+ L_code_ptr += 4;
+ P_code_ptr += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
+
+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
+
+ for (int i = 0; i<4; ++i)
+ {
+ E_out_real += real_E_dotProductVector[i];
+ E_out_imag += imag_E_dotProductVector[i];
+ P_out_real += real_P_dotProductVector[i];
+ P_out_imag += imag_P_dotProductVector[i];
+ L_out_real += real_L_dotProductVector[i];
+ L_out_imag += imag_L_dotProductVector[i];
+ }
+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
+ }
+
+ lv_16sc_t bb_signal_sample;
+ for(int i=0; i < num_points%8; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
+ // Now get early, late, and prompt values for each
+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
+ }
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_SSE4_1
+#include "smmintrin.h"
+#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
+#include "CommonMacros/CommonMacros.h"
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_sse4_1_sixth(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 8;
+
+ __m128i realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy;
+ __m128i input_i_1, input_i_2, output_i32;
+
+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
+ __m128i realx, imagx, realy, imagy, real_output, imag_output;
+
+ __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
+ __m128 real_output_ps, imag_output_ps;
+
+ float E_out_real = 0;
+ float E_out_imag = 0;
+ float P_out_real = 0;
+ float P_out_imag = 0;
+ float L_out_real = 0;
+ float L_out_imag = 0;
+
+ const lv_16sc_t* input_ptr = input;
+ const lv_16sc_t* carrier_ptr = carrier;
+
+ const lv_16sc_t* E_code_ptr = E_code;
+ lv_32fc_t* E_out_ptr = E_out;
+ const lv_16sc_t* L_code_ptr = L_code;
+ lv_32fc_t* L_out_ptr = L_out;
+ const lv_16sc_t* P_code_ptr = P_code;
+ lv_32fc_t* P_out_ptr = P_out;
+
+ *E_out_ptr = 0;
+ *P_out_ptr = 0;
+ *L_out_ptr = 0;
+
+ real_E_code_acc = _mm_setzero_ps();
+ imag_E_code_acc = _mm_setzero_ps();
+ real_P_code_acc = _mm_setzero_ps();
+ imag_P_code_acc = _mm_setzero_ps();
+ real_L_code_acc = _mm_setzero_ps();
+ imag_L_code_acc = _mm_setzero_ps();
+
+ if (sse_iters>0)
+ {
+ for(int number = 0;number < sse_iters; number++){
+
+ //Perform the carrier wipe-off
+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
+ input_ptr += 4;
+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
+
+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+ carrier_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
+
+ //Get early values
+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+ E_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
+
+ //Get prompt values
+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+ P_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
+
+ //Get late values
+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+ L_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
+
+ input_ptr += 4;
+ carrier_ptr += 4;
+ E_code_ptr += 4;
+ L_code_ptr += 4;
+ P_code_ptr += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
+
+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
+
+ for (int i = 0; i<4; ++i)
+ {
+ E_out_real += real_E_dotProductVector[i];
+ E_out_imag += imag_E_dotProductVector[i];
+ P_out_real += real_P_dotProductVector[i];
+ P_out_imag += imag_P_dotProductVector[i];
+ L_out_real += real_L_dotProductVector[i];
+ L_out_imag += imag_L_dotProductVector[i];
+ }
+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
+ }
+
+ lv_16sc_t bb_signal_sample;
+ for(int i=0; i < num_points%8; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
+ // Now get early, late, and prompt values for each
+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
+ }
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ lv_16sc_t bb_signal_sample;
+ lv_16sc_t tmp1;
+ lv_16sc_t tmp2;
+ lv_16sc_t tmp3;
+
+ bb_signal_sample = lv_cmake(0, 0);
+
+ *E_out = 0;
+ *P_out = 0;
+ *L_out = 0;
+ // perform Early, Prompt and Late correlation
+
+ for(int i=0; i < num_points; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = input[i] * carrier[i];
+
+ tmp1 = bb_signal_sample * E_code[i];
+ tmp2 = bb_signal_sample * P_code[i];
+ tmp3 = bb_signal_sample * L_code[i];
+
+ // Now get early, late, and prompt values for each
+ *E_out += (lv_32fc_t)tmp1;
+ *P_out += (lv_32fc_t)tmp2;
+ *L_out += (lv_32fc_t)tmp3;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_u_H */
+
+
+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_a_H
+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_a_H
+
+#include
+#include
+#include
+#include
+#include
+//
+//#ifdef LV_HAVE_SSE4_1
+//#include "smmintrin.h"
+///*!
+// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+// \param input The input signal input
+// \param carrier The carrier signal input
+// \param E_code Early PRN code replica input
+// \param P_code Early PRN code replica input
+// \param L_code Early PRN code replica input
+// \param E_out Early correlation output
+// \param P_out Early correlation output
+// \param L_out Early correlation output
+// \param num_points The number of complex values in vectors
+// */
+//static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_a_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+//{
+// const unsigned int sse_iters = num_points / 8;
+//
+// __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
+// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
+//
+// __m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
+// __m128i real_output_i_1, real_output_i_2, imag_output_i_1, imag_output_i_2;
+// __m128 real_output_ps_1, real_output_ps_2, imag_output_ps_1, imag_output_ps_2;
+//
+// float E_out_real = 0;
+// float E_out_imag = 0;
+// float P_out_real = 0;
+// float P_out_imag = 0;
+// float L_out_real = 0;
+// float L_out_imag = 0;
+//
+// const lv_16sc_t* input_ptr = input;
+// const lv_16sc_t* carrier_ptr = carrier;
+//
+// const lv_16sc_t* E_code_ptr = E_code;
+// lv_32fc_t* E_out_ptr = E_out;
+// const lv_16sc_t* L_code_ptr = L_code;
+// lv_32fc_t* L_out_ptr = L_out;
+// const lv_16sc_t* P_code_ptr = P_code;
+// lv_32fc_t* P_out_ptr = P_out;
+//
+// *E_out_ptr = 0;
+// *P_out_ptr = 0;
+// *L_out_ptr = 0;
+//
+// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
+//
+// real_E_code_acc = _mm_setzero_ps();
+// imag_E_code_acc = _mm_setzero_ps();
+// real_P_code_acc = _mm_setzero_ps();
+// imag_P_code_acc = _mm_setzero_ps();
+// real_L_code_acc = _mm_setzero_ps();
+// imag_L_code_acc = _mm_setzero_ps();
+//
+// if (sse_iters>0)
+// {
+// for(int number = 0;number < sse_iters; number++){
+//
+// //Perform the carrier wipe-off
+// x1 = _mm_lddqu_si128((__m128i*)input_ptr);
+// input_ptr += 4;
+// x2 = _mm_lddqu_si128((__m128i*)input_ptr);
+//
+// y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+// carrier_ptr += 4;
+// y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+//
+// imagx = _mm_srli_si128 (x1, 2);
+// imagx = _mm_blend_epi16 (x2, imagx, 85);
+// realx = _mm_slli_si128 (x2, 2);
+// realx = _mm_blend_epi16 (realx, x1, 85);
+//
+// imagy = _mm_srli_si128 (y1, 2);
+// imagy = _mm_blend_epi16 (y2, imagy, 85);
+// realy = _mm_slli_si128 (y2, 2);
+// realy = _mm_blend_epi16 (realy, y1, 85);
+//
+// realx_mult_realy = _mm_mullo_epi16 (realx, realy);
+// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
+// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
+// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
+//
+// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+//
+// //Get early values
+// y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+// E_code_ptr += 4;
+// y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+//
+// imagy = _mm_srli_si128 (y1, 2);
+// imagy = _mm_blend_epi16 (y2, imagy, 85);
+// realy = _mm_slli_si128 (y2, 2);
+// realy = _mm_blend_epi16 (realy, y1, 85);
+//
+// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+//
+// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+//
+// real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+// real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
+// real_output = _mm_srli_si128 (real_output, 8);
+// real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+// real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
+//
+// imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+// imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
+// imag_output = _mm_srli_si128 (imag_output, 8);
+// imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+// imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
+//
+// real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps_1);
+// real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps_2);
+// imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps_1);
+// imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps_2);
+//
+// //Get prompt values
+// y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+// P_code_ptr += 4;
+// y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+//
+// imagy = _mm_srli_si128 (y1, 2);
+// imagy = _mm_blend_epi16 (y2, imagy, 85);
+// realy = _mm_slli_si128 (y2, 2);
+// realy = _mm_blend_epi16 (realy, y1, 85);
+//
+// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+//
+// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+//
+// real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+// real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
+// real_output = _mm_srli_si128 (real_output, 8);
+// real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+// real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
+//
+// imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+// imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
+// imag_output = _mm_srli_si128 (imag_output, 8);
+// imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+// imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
+//
+// real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps_1);
+// real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps_2);
+// imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps_1);
+// imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps_2);
+//
+// //Get late values
+// y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+// L_code_ptr += 4;
+// y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+//
+// imagy = _mm_srli_si128 (y1, 2);
+// imagy = _mm_blend_epi16 (y2, imagy, 85);
+// realy = _mm_slli_si128 (y2, 2);
+// realy = _mm_blend_epi16 (realy, y1, 85);
+//
+// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
+// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
+// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
+// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
+//
+// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
+// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
+//
+// real_output_i_1 = _mm_cvtepi16_epi32(real_output);
+// real_output_ps_1 = _mm_cvtepi32_ps(real_output_i_1);
+// real_output = _mm_srli_si128 (real_output, 8);
+// real_output_i_2 = _mm_cvtepi16_epi32(real_output);
+// real_output_ps_2 = _mm_cvtepi32_ps(real_output_i_2);
+//
+// imag_output_i_1 = _mm_cvtepi16_epi32(imag_output);
+// imag_output_ps_1 = _mm_cvtepi32_ps(imag_output_i_1);
+// imag_output = _mm_srli_si128 (imag_output, 8);
+// imag_output_i_2 = _mm_cvtepi16_epi32(imag_output);
+// imag_output_ps_2 = _mm_cvtepi32_ps(imag_output_i_2);
+//
+// real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps_1);
+// real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps_2);
+// imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps_1);
+// imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps_2);
+//
+// input_ptr += 4;
+// carrier_ptr += 4;
+// E_code_ptr += 4;
+// L_code_ptr += 4;
+// P_code_ptr += 4;
+// }
+//
+// __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
+// __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
+// __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
+// __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
+// __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
+// __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
+//
+// _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
+// _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
+// _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
+// _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
+// _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
+// _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
+//
+// for (int i = 0; i<4; ++i)
+// {
+// E_out_real += real_E_dotProductVector[i];
+// E_out_imag += imag_E_dotProductVector[i];
+// P_out_real += real_P_dotProductVector[i];
+// P_out_imag += imag_P_dotProductVector[i];
+// L_out_real += real_L_dotProductVector[i];
+// L_out_imag += imag_L_dotProductVector[i];
+// }
+// *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
+// *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
+// *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
+// }
+//
+// lv_16sc_t bb_signal_sample;
+// for(int i=0; i < num_points%8; ++i)
+// {
+// //Perform the carrier wipe-off
+// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
+// // Now get early, late, and prompt values for each
+// *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
+// *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
+// *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
+// }
+//}
+//#endif /* LV_HAVE_SSE4_1 */
+//
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
+{
+ lv_16sc_t bb_signal_sample;
+ lv_16sc_t tmp1;
+ lv_16sc_t tmp2;
+ lv_16sc_t tmp3;
+
+ bb_signal_sample = lv_cmake(0, 0);
+
+ *E_out = 0;
+ *P_out = 0;
+ *L_out = 0;
+ // perform Early, Prompt and Late correlation
+
+ for(int i=0; i < num_points; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = input[i] * carrier[i];
+
+ tmp1 = bb_signal_sample * E_code[i];
+ tmp2 = bb_signal_sample * P_code[i];
+ tmp3 = bb_signal_sample * L_code[i];
+
+ // Now get early, late, and prompt values for each
+ *E_out += (lv_32fc_t)tmp1;
+ *P_out += (lv_32fc_t)tmp2;
+ *L_out += (lv_32fc_t)tmp3;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5.h
new file mode 100644
index 000000000..af207b92f
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5.h
@@ -0,0 +1,595 @@
+/*!
+ * \file volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5.h
+ * \brief Volk protokernel: performs the carrier wipe-off mixing and the Very early, Early, Prompt, Late and very late correlation with 32 bits vectors and returns float32 values.
+ * \authors
+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * Volk protokernel that performs the carrier wipe-off mixing and the
+ * Very Early, Early, Prompt, Late and Very Late correlation with 32 bits vectors (16 bits the
+ * real part and 16 bits the imaginary part) and accumulates into float32 values, returning them:
+ * - The carrier wipe-off is done by multiplying the input signal by the
+ * carrier (multiplication of 32 bits vectors) It returns the input
+ * signal in base band (BB)
+ * - Very Early values are calculated by multiplying the input signal in BB by the
+ * very early code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
+ * - Early values are calculated by multiplying the input signal in BB by the
+ * early code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
+ * - Prompt values are calculated by multiplying the input signal in BB by the
+ * prompt code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
+ * - Late values are calculated by multiplying the input signal in BB by the
+ * late code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
+ * - Very Late values are calculated by multiplying the input signal in BB by the
+ * very late code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_H
+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_H
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE4_1
+#include "smmintrin.h"
+#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
+#include "CommonMacros/CommonMacros.h"
+ /*!
+ \brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param VE_code Very Early PRN code replica input
+ \param E_code Early PRN code replica input
+ \param P_code Prompt PRN code replica input
+ \param L_code Late PRN code replica input
+ \param VL_code Very Late PRN code replica input
+ \param VE_out Very Early correlation output
+ \param E_out Early correlation output
+ \param P_out Prompt correlation output
+ \param L_out Late correlation output
+ \param VL_out Very Late correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 8;
+
+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
+ __m128i realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
+
+ __m128 real_VE_code_acc, imag_VE_code_acc, real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc, real_VL_code_acc, imag_VL_code_acc;
+ __m128i input_i_1, input_i_2, output_i32;
+ __m128 real_output_ps, imag_output_ps;
+
+ float VE_out_real = 0;
+ float VE_out_imag = 0;
+ float E_out_real = 0;
+ float E_out_imag = 0;
+ float P_out_real = 0;
+ float P_out_imag = 0;
+ float L_out_real = 0;
+ float L_out_imag = 0;
+ float VL_out_real = 0;
+ float VL_out_imag = 0;
+
+ const lv_16sc_t* input_ptr = input;
+ const lv_16sc_t* carrier_ptr = carrier;
+
+ const lv_16sc_t* VE_code_ptr = VE_code;
+ lv_32fc_t* VE_out_ptr = VE_out;
+ const lv_16sc_t* E_code_ptr = E_code;
+ lv_32fc_t* E_out_ptr = E_out;
+ const lv_16sc_t* L_code_ptr = L_code;
+ lv_32fc_t* L_out_ptr = L_out;
+ const lv_16sc_t* P_code_ptr = P_code;
+ lv_32fc_t* P_out_ptr = P_out;
+ const lv_16sc_t* VL_code_ptr = VL_code;
+ lv_32fc_t* VL_out_ptr = VL_out;
+
+ *VE_out_ptr = 0;
+ *E_out_ptr = 0;
+ *P_out_ptr = 0;
+ *L_out_ptr = 0;
+ *VL_out_ptr = 0;
+
+ real_VE_code_acc = _mm_setzero_ps();
+ imag_VE_code_acc = _mm_setzero_ps();
+ real_E_code_acc = _mm_setzero_ps();
+ imag_E_code_acc = _mm_setzero_ps();
+ real_P_code_acc = _mm_setzero_ps();
+ imag_P_code_acc = _mm_setzero_ps();
+ real_L_code_acc = _mm_setzero_ps();
+ imag_L_code_acc = _mm_setzero_ps();
+ real_VL_code_acc = _mm_setzero_ps();
+ imag_VL_code_acc = _mm_setzero_ps();
+
+ if (sse_iters>0)
+ {
+ for(int number = 0;number < sse_iters; number++){
+
+ //Perform the carrier wipe-off
+ x1 = _mm_lddqu_si128((__m128i*)input_ptr);
+ input_ptr += 4;
+ x2 = _mm_lddqu_si128((__m128i*)input_ptr);
+
+ y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+ carrier_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
+
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
+
+ //Get very early values
+ y1 = _mm_lddqu_si128((__m128i*)VE_code_ptr);
+ VE_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)VE_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_VE_code_acc = _mm_add_ps (real_VE_code_acc, real_output_ps);
+ imag_VE_code_acc = _mm_add_ps (imag_VE_code_acc, imag_output_ps);
+
+ //Get early values
+ y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+ E_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
+
+ //Get prompt values
+ y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+ P_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
+
+ //Get late values
+ y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+ L_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
+
+ //Get very late values
+ y1 = _mm_lddqu_si128((__m128i*)VL_code_ptr);
+ VL_code_ptr += 4;
+ y2 = _mm_lddqu_si128((__m128i*)VL_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_VL_code_acc = _mm_add_ps (real_VL_code_acc, real_output_ps);
+ imag_VL_code_acc = _mm_add_ps (imag_VL_code_acc, imag_output_ps);
+
+ input_ptr += 4;
+ carrier_ptr += 4;
+ VE_code_ptr += 4;
+ E_code_ptr += 4;
+ P_code_ptr += 4;
+ L_code_ptr += 4;
+ VL_code_ptr += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) float real_VE_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_VE_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_VL_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_VL_dotProductVector[4];
+
+ _mm_storeu_ps((float*)real_VE_dotProductVector,real_VE_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_VE_dotProductVector,imag_VE_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)real_VL_dotProductVector,real_VL_code_acc); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)imag_VL_dotProductVector,imag_VL_code_acc); // Store the results back into the dot product vector
+
+ for (int i = 0; i<4; ++i)
+ {
+ VE_out_real += real_VE_dotProductVector[i];
+ VE_out_imag += imag_VE_dotProductVector[i];
+ E_out_real += real_E_dotProductVector[i];
+ E_out_imag += imag_E_dotProductVector[i];
+ P_out_real += real_P_dotProductVector[i];
+ P_out_imag += imag_P_dotProductVector[i];
+ L_out_real += real_L_dotProductVector[i];
+ L_out_imag += imag_L_dotProductVector[i];
+ VL_out_real += real_VL_dotProductVector[i];
+ VL_out_imag += imag_VL_dotProductVector[i];
+ }
+ *VE_out_ptr = lv_cmake(VE_out_real, VE_out_imag);
+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
+ *VL_out_ptr = lv_cmake(VL_out_real, VL_out_imag);
+ }
+
+ lv_16sc_t bb_signal_sample;
+ for(int i=0; i < num_points%8; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
+ // Now get early, late, and prompt values for each
+ *VE_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VE_code_ptr++));
+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
+ *VL_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VL_code_ptr++));
+ }
+
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param VE_code Very Early PRN code replica input
+ \param E_code Early PRN code replica input
+ \param P_code Prompt PRN code replica input
+ \param L_code Late PRN code replica input
+ \param VL_code Very Late PRN code replica input
+ \param VE_out Very Early correlation output
+ \param E_out Early correlation output
+ \param P_out Prompt correlation output
+ \param L_out Late correlation output
+ \param VL_out Very Late correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
+{
+ lv_16sc_t bb_signal_sample;
+ lv_16sc_t tmp1;
+ lv_16sc_t tmp2;
+ lv_16sc_t tmp3;
+ lv_16sc_t tmp4;
+ lv_16sc_t tmp5;
+
+ bb_signal_sample = lv_cmake(0, 0);
+
+ *VE_out = 0;
+ *E_out = 0;
+ *P_out = 0;
+ *L_out = 0;
+ *VL_out = 0;
+ // perform Early, Prompt and Late correlation
+
+ for(int i=0; i < num_points; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = input[i] * carrier[i];
+
+ tmp1 = bb_signal_sample * VE_code[i];
+ tmp2 = bb_signal_sample * E_code[i];
+ tmp3 = bb_signal_sample * P_code[i];
+ tmp4 = bb_signal_sample * L_code[i];
+ tmp5 = bb_signal_sample * VL_code[i];
+
+ // Now get early, late, and prompt values for each
+ *VE_out += (lv_32fc_t)tmp1;
+ *E_out += (lv_32fc_t)tmp2;
+ *P_out += (lv_32fc_t)tmp3;
+ *L_out += (lv_32fc_t)tmp4;
+ *VL_out += (lv_32fc_t)tmp5;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_H */
+
+
+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_H
+#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_H
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE4_1
+#include "smmintrin.h"
+#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
+#include "CommonMacros/CommonMacros.h"
+/*!
+ \brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param VE_code Very Early PRN code replica input
+ \param E_code Early PRN code replica input
+ \param P_code Prompt PRN code replica input
+ \param L_code Late PRN code replica input
+ \param VL_code Very Late PRN code replica input
+ \param VE_out Very Early correlation output
+ \param E_out Early correlation output
+ \param P_out Prompt correlation output
+ \param L_out Late correlation output
+ \param VL_out Very Late correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
+{
+ const unsigned int sse_iters = num_points / 8;
+
+ __m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
+ __m128i realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
+
+ __m128 real_VE_code_acc, imag_VE_code_acc, real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc, real_VL_code_acc, imag_VL_code_acc;
+ __m128i input_i_1, input_i_2, output_i32;
+ __m128 real_output_ps, imag_output_ps;
+
+ float VE_out_real = 0;
+ float VE_out_imag = 0;
+ float E_out_real = 0;
+ float E_out_imag = 0;
+ float P_out_real = 0;
+ float P_out_imag = 0;
+ float L_out_real = 0;
+ float L_out_imag = 0;
+ float VL_out_real = 0;
+ float VL_out_imag = 0;
+
+ const lv_16sc_t* input_ptr = input;
+ const lv_16sc_t* carrier_ptr = carrier;
+
+ const lv_16sc_t* VE_code_ptr = VE_code;
+ lv_32fc_t* VE_out_ptr = VE_out;
+ const lv_16sc_t* E_code_ptr = E_code;
+ lv_32fc_t* E_out_ptr = E_out;
+ const lv_16sc_t* L_code_ptr = L_code;
+ lv_32fc_t* L_out_ptr = L_out;
+ const lv_16sc_t* P_code_ptr = P_code;
+ lv_32fc_t* P_out_ptr = P_out;
+ const lv_16sc_t* VL_code_ptr = VL_code;
+ lv_32fc_t* VL_out_ptr = VL_out;
+
+ *VE_out_ptr = 0;
+ *E_out_ptr = 0;
+ *P_out_ptr = 0;
+ *L_out_ptr = 0;
+ *VL_out_ptr = 0;
+
+ real_VE_code_acc = _mm_setzero_ps();
+ imag_VE_code_acc = _mm_setzero_ps();
+ real_E_code_acc = _mm_setzero_ps();
+ imag_E_code_acc = _mm_setzero_ps();
+ real_P_code_acc = _mm_setzero_ps();
+ imag_P_code_acc = _mm_setzero_ps();
+ real_L_code_acc = _mm_setzero_ps();
+ imag_L_code_acc = _mm_setzero_ps();
+ real_VL_code_acc = _mm_setzero_ps();
+ imag_VL_code_acc = _mm_setzero_ps();
+
+ if (sse_iters>0)
+ {
+ for(int number = 0;number < sse_iters; number++){
+
+ //Perform the carrier wipe-off
+ x1 = _mm_load_si128((__m128i*)input_ptr);
+ input_ptr += 4;
+ x2 = _mm_load_si128((__m128i*)input_ptr);
+
+ y1 = _mm_load_si128((__m128i*)carrier_ptr);
+ carrier_ptr += 4;
+ y2 = _mm_load_si128((__m128i*)carrier_ptr);
+
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
+ CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
+ CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
+
+ //Get very early values
+ y1 = _mm_load_si128((__m128i*)VE_code_ptr);
+ VE_code_ptr += 4;
+ y2 = _mm_load_si128((__m128i*)VE_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_VE_code_acc = _mm_add_ps (real_VE_code_acc, real_output_ps);
+ imag_VE_code_acc = _mm_add_ps (imag_VE_code_acc, imag_output_ps);
+
+ //Get early values
+ y1 = _mm_load_si128((__m128i*)E_code_ptr);
+ E_code_ptr += 4;
+ y2 = _mm_load_si128((__m128i*)E_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
+ imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
+
+ //Get prompt values
+ y1 = _mm_load_si128((__m128i*)P_code_ptr);
+ P_code_ptr += 4;
+ y2 = _mm_load_si128((__m128i*)P_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
+ imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
+
+ //Get late values
+ y1 = _mm_load_si128((__m128i*)L_code_ptr);
+ L_code_ptr += 4;
+ y2 = _mm_load_si128((__m128i*)L_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
+ imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
+
+ //Get very late values
+ y1 = _mm_load_si128((__m128i*)VL_code_ptr);
+ VL_code_ptr += 4;
+ y2 = _mm_load_si128((__m128i*)VL_code_ptr);
+
+ CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
+
+ real_VL_code_acc = _mm_add_ps (real_VL_code_acc, real_output_ps);
+ imag_VL_code_acc = _mm_add_ps (imag_VL_code_acc, imag_output_ps);
+
+ input_ptr += 4;
+ carrier_ptr += 4;
+ VE_code_ptr += 4;
+ E_code_ptr += 4;
+ P_code_ptr += 4;
+ L_code_ptr += 4;
+ VL_code_ptr += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) float real_VE_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_VE_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float real_VL_dotProductVector[4];
+ __VOLK_ATTR_ALIGNED(16) float imag_VL_dotProductVector[4];
+
+ _mm_store_ps((float*)real_VE_dotProductVector,real_VE_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)imag_VE_dotProductVector,imag_VE_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)real_VL_dotProductVector,real_VL_code_acc); // Store the results back into the dot product vector
+ _mm_store_ps((float*)imag_VL_dotProductVector,imag_VL_code_acc); // Store the results back into the dot product vector
+
+ for (int i = 0; i<4; ++i)
+ {
+ VE_out_real += real_VE_dotProductVector[i];
+ VE_out_imag += imag_VE_dotProductVector[i];
+ E_out_real += real_E_dotProductVector[i];
+ E_out_imag += imag_E_dotProductVector[i];
+ P_out_real += real_P_dotProductVector[i];
+ P_out_imag += imag_P_dotProductVector[i];
+ L_out_real += real_L_dotProductVector[i];
+ L_out_imag += imag_L_dotProductVector[i];
+ VL_out_real += real_VL_dotProductVector[i];
+ VL_out_imag += imag_VL_dotProductVector[i];
+ }
+ *VE_out_ptr = lv_cmake(VE_out_real, VE_out_imag);
+ *E_out_ptr = lv_cmake(E_out_real, E_out_imag);
+ *P_out_ptr = lv_cmake(P_out_real, P_out_imag);
+ *L_out_ptr = lv_cmake(L_out_real, L_out_imag);
+ *VL_out_ptr = lv_cmake(VL_out_real, VL_out_imag);
+ }
+
+ lv_16sc_t bb_signal_sample;
+ for(int i=0; i < num_points%8; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
+ // Now get early, late, and prompt values for each
+ *VE_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VE_code_ptr++));
+ *E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
+ *P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
+ *L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
+ *VL_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VL_code_ptr++));
+ }
+
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param VE_code Very Early PRN code replica input
+ \param E_code Early PRN code replica input
+ \param P_code Prompt PRN code replica input
+ \param L_code Late PRN code replica input
+ \param VL_code Very Late PRN code replica input
+ \param VE_out Very Early correlation output
+ \param E_out Early correlation output
+ \param P_out Prompt correlation output
+ \param L_out Late correlation output
+ \param VL_out Very Late correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
+{
+ lv_16sc_t bb_signal_sample;
+ lv_16sc_t tmp1;
+ lv_16sc_t tmp2;
+ lv_16sc_t tmp3;
+ lv_16sc_t tmp4;
+ lv_16sc_t tmp5;
+
+ bb_signal_sample = lv_cmake(0, 0);
+
+ *VE_out = 0;
+ *E_out = 0;
+ *P_out = 0;
+ *L_out = 0;
+ *VL_out = 0;
+ // perform Early, Prompt and Late correlation
+
+ for(int i=0; i < num_points; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = input[i] * carrier[i];
+
+ tmp1 = bb_signal_sample * VE_code[i];
+ tmp2 = bb_signal_sample * E_code[i];
+ tmp3 = bb_signal_sample * P_code[i];
+ tmp4 = bb_signal_sample * L_code[i];
+ tmp5 = bb_signal_sample * VL_code[i];
+
+ // Now get early, late, and prompt values for each
+ *VE_out += (lv_32fc_t)tmp1;
+ *E_out += (lv_32fc_t)tmp2;
+ *P_out += (lv_32fc_t)tmp3;
+ *L_out += (lv_32fc_t)tmp4;
+ *VL_out += (lv_32fc_t)tmp5;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_accumulator_s32f.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_accumulator_s32f.h
new file mode 100644
index 000000000..82f1b3efd
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_accumulator_s32f.h
@@ -0,0 +1,68 @@
+#ifndef INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H
+#define INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H
+
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE
+#include
+/*!
+ \brief Accumulates the values in the input buffer
+ \param result The accumulated result
+ \param inputBuffer The buffer of data to be accumulated
+ \param num_points The number of values in inputBuffer to be accumulated
+*/
+static inline void volk_gnsssdr_32f_accumulator_s32f_a_sse(float* result, const float* inputBuffer, unsigned int num_points){
+ float returnValue = 0;
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* aPtr = inputBuffer;
+ __VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
+
+ __m128 accumulator = _mm_setzero_ps();
+ __m128 aVal = _mm_setzero_ps();
+
+ for(;number < quarterPoints; number++){
+ aVal = _mm_load_ps(aPtr);
+ accumulator = _mm_add_ps(accumulator, aVal);
+ aPtr += 4;
+ }
+ _mm_store_ps(tempBuffer,accumulator); // Store the results back into the C container
+ returnValue = tempBuffer[0];
+ returnValue += tempBuffer[1];
+ returnValue += tempBuffer[2];
+ returnValue += tempBuffer[3];
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ returnValue += (*aPtr++);
+ }
+ *result = returnValue;
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Accumulates the values in the input buffer
+ \param result The accumulated result
+ \param inputBuffer The buffer of data to be accumulated
+ \param num_points The number of values in inputBuffer to be accumulated
+*/
+static inline void volk_gnsssdr_32f_accumulator_s32f_generic(float* result, const float* inputBuffer, unsigned int num_points){
+ const float* aPtr = inputBuffer;
+ unsigned int number = 0;
+ float returnValue = 0;
+
+ for(;number < num_points; number++){
+ returnValue += (*aPtr++);
+ }
+ *result = returnValue;
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+#endif /* INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_16u.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_16u.h
new file mode 100644
index 000000000..c815609b2
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_16u.h
@@ -0,0 +1,149 @@
+#ifndef INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H
+#define INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE4_1
+#include
+
+static inline void volk_gnsssdr_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
+ if(num_points > 0){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* inputPtr = (float*)src0;
+
+ __m128 indexIncrementValues = _mm_set1_ps(4);
+ __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
+
+ float max = src0[0];
+ float index = 0;
+ __m128 maxValues = _mm_set1_ps(max);
+ __m128 maxValuesIndex = _mm_setzero_ps();
+ __m128 compareResults;
+ __m128 currentValues;
+
+ __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
+ __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
+
+ for(;number < quarterPoints; number++){
+
+ currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
+ currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
+
+ compareResults = _mm_cmpgt_ps(maxValues, currentValues);
+
+ maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
+ maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults);
+ }
+
+ // Calculate the largest value from the remaining 4 points
+ _mm_store_ps(maxValuesBuffer, maxValues);
+ _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
+
+ for(number = 0; number < 4; number++){
+ if(maxValuesBuffer[number] > max){
+ index = maxIndexesBuffer[number];
+ max = maxValuesBuffer[number];
+ }
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ if(src0[number] > max){
+ index = number;
+ max = src0[number];
+ }
+ }
+ target[0] = (unsigned int)index;
+ }
+}
+
+#endif /*LV_HAVE_SSE4_1*/
+
+#ifdef LV_HAVE_SSE
+#include
+
+static inline void volk_gnsssdr_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) {
+ if(num_points > 0){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* inputPtr = (float*)src0;
+
+ __m128 indexIncrementValues = _mm_set1_ps(4);
+ __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
+
+ float max = src0[0];
+ float index = 0;
+ __m128 maxValues = _mm_set1_ps(max);
+ __m128 maxValuesIndex = _mm_setzero_ps();
+ __m128 compareResults;
+ __m128 currentValues;
+
+ __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
+ __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
+
+ for(;number < quarterPoints; number++){
+
+ currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
+ currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
+
+ compareResults = _mm_cmpgt_ps(maxValues, currentValues);
+
+ maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
+
+ maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
+ }
+
+ // Calculate the largest value from the remaining 4 points
+ _mm_store_ps(maxValuesBuffer, maxValues);
+ _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
+
+ for(number = 0; number < 4; number++){
+ if(maxValuesBuffer[number] > max){
+ index = maxIndexesBuffer[number];
+ max = maxValuesBuffer[number];
+ }
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ if(src0[number] > max){
+ index = number;
+ max = src0[number];
+ }
+ }
+ target[0] = (unsigned int)index;
+ }
+}
+
+#endif /*LV_HAVE_SSE*/
+
+#ifdef LV_HAVE_GENERIC
+static inline void volk_gnsssdr_32f_index_max_16u_generic(unsigned int* target, const float* src0, unsigned int num_points) {
+ if(num_points > 0){
+ float max = src0[0];
+ unsigned int index = 0;
+
+ unsigned int i = 1;
+
+ for(; i < num_points; ++i) {
+
+ if(src0[i] > max){
+ index = i;
+ max = src0[i];
+ }
+
+ }
+ target[0] = index;
+ }
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+
+#endif /*INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H*/
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_s32f_convert_16i.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_s32f_convert_16i.h
new file mode 100644
index 000000000..cd83cef9d
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_s32f_convert_16i.h
@@ -0,0 +1,302 @@
+#ifndef INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_u_H
+#define INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_u_H
+
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE2
+#include
+ /*!
+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param scalar The value multiplied against each point in the input buffer
+ \param num_points The number of data values to be converted
+ \note Input buffer does NOT need to be properly aligned
+ */
+static inline void volk_gnsssdr_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+
+ const unsigned int eighthPoints = num_points / 8;
+
+ const float* inputVectorPtr = (const float*)inputVector;
+ int16_t* outputVectorPtr = outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
+
+ __m128 vScalar = _mm_set_ps1(scalar);
+ __m128 inputVal1, inputVal2;
+ __m128i intInputVal1, intInputVal2;
+ __m128 ret1, ret2;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ for(;number < eighthPoints; number++){
+ inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
+ inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
+
+ // Scale and clip
+ ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+
+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
+
+ _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
+ outputVectorPtr += 8;
+ }
+
+ number = eighthPoints * 8;
+ for(; number < num_points; number++){
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)rintf(r);
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_SSE
+#include
+ /*!
+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param scalar The value multiplied against each point in the input buffer
+ \param num_points The number of data values to be converted
+ \note Input buffer does NOT need to be properly aligned
+ */
+static inline void volk_gnsssdr_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* inputVectorPtr = (const float*)inputVector;
+ int16_t* outputVectorPtr = outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
+
+ __m128 vScalar = _mm_set_ps1(scalar);
+ __m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
+
+ for(;number < quarterPoints; number++){
+ ret = _mm_loadu_ps(inputVectorPtr);
+ inputVectorPtr += 4;
+
+ // Scale and clip
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
+
+ _mm_store_ps(outputFloatBuffer, ret);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)rintf(r);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param scalar The value multiplied against each point in the input buffer
+ \param num_points The number of data values to be converted
+ \note Input buffer does NOT need to be properly aligned
+ */
+static inline void volk_gnsssdr_32f_s32f_convert_16i_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+ int16_t* outputVectorPtr = outputVector;
+ const float* inputVectorPtr = inputVector;
+ unsigned int number = 0;
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
+
+ for(number = 0; number < num_points; number++){
+ r = *inputVectorPtr++ * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ *outputVectorPtr++ = (int16_t)rintf(r);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+#endif /* INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_u_H */
+#ifndef INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_a_H
+#define INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_a_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE2
+#include
+ /*!
+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param scalar The value multiplied against each point in the input buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+
+ const unsigned int eighthPoints = num_points / 8;
+
+ const float* inputVectorPtr = (const float*)inputVector;
+ int16_t* outputVectorPtr = outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
+
+ __m128 vScalar = _mm_set_ps1(scalar);
+ __m128 inputVal1, inputVal2;
+ __m128i intInputVal1, intInputVal2;
+ __m128 ret1, ret2;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ for(;number < eighthPoints; number++){
+ inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
+ inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
+
+ // Scale and clip
+ ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+
+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
+
+ _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
+ outputVectorPtr += 8;
+ }
+
+ number = eighthPoints * 8;
+ for(; number < num_points; number++){
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)rintf(r);
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_SSE
+#include
+ /*!
+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param scalar The value multiplied against each point in the input buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+ unsigned int number = 0;
+
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* inputVectorPtr = (const float*)inputVector;
+ int16_t* outputVectorPtr = outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
+
+ __m128 vScalar = _mm_set_ps1(scalar);
+ __m128 ret;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
+
+ for(;number < quarterPoints; number++){
+ ret = _mm_load_ps(inputVectorPtr);
+ inputVectorPtr += 4;
+
+ // Scale and clip
+ ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
+
+ _mm_store_ps(outputFloatBuffer, ret);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
+ *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ r = inputVector[number] * scalar;
+ if(r > max_val)
+ r = max_val;
+ else if(r < min_val)
+ r = min_val;
+ outputVector[number] = (int16_t)rintf(r);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param scalar The value multiplied against each point in the input buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32f_s32f_convert_16i_a_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
+ int16_t* outputVectorPtr = outputVector;
+ const float* inputVectorPtr = inputVector;
+ unsigned int number = 0;
+ float min_val = -32768;
+ float max_val = 32767;
+ float r;
+
+ for(number = 0; number < num_points; number++){
+ r = *inputVectorPtr++ * scalar;
+ if(r < min_val)
+ r = min_val;
+ else if(r > max_val)
+ r = max_val;
+ *outputVectorPtr++ = (int16_t)rintf(r);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+#endif /* INCLUDED_volk_gnsssdr_32f_s32f_convert_16i_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_x2_add_32f.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_x2_add_32f.h
new file mode 100644
index 000000000..ee647b2d7
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_x2_add_32f.h
@@ -0,0 +1,147 @@
+#ifndef INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H
+#define INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H
+
+#include
+#include
+
+#ifdef LV_HAVE_SSE
+#include
+/*!
+ \brief Adds the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be added
+ \param bVector One of the vectors to be added
+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+static inline void volk_gnsssdr_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+
+ __m128 aVal, bVal, cVal;
+ for(;number < quarterPoints; number++){
+
+ aVal = _mm_loadu_ps(aPtr);
+ bVal = _mm_loadu_ps(bPtr);
+
+ cVal = _mm_add_ps(aVal, bVal);
+
+ _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 4;
+ bPtr += 4;
+ cPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *cPtr++ = (*aPtr++) + (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Adds the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be added
+ \param bVector One of the vectors to be added
+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+static inline void volk_gnsssdr_32f_x2_add_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = (*aPtr++) + (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H */
+#ifndef INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H
+#define INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H
+
+#include
+#include
+
+#ifdef LV_HAVE_SSE
+#include
+/*!
+ \brief Adds the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be added
+ \param bVector One of the vectors to be added
+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+static inline void volk_gnsssdr_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+
+ __m128 aVal, bVal, cVal;
+ for(;number < quarterPoints; number++){
+
+ aVal = _mm_load_ps(aPtr);
+ bVal = _mm_load_ps(bPtr);
+
+ cVal = _mm_add_ps(aVal, bVal);
+
+ _mm_store_ps(cPtr,cVal); // Store the results back into the C container
+
+ aPtr += 4;
+ bPtr += 4;
+ cPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(;number < num_points; number++){
+ *cPtr++ = (*aPtr++) + (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Adds the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be added
+ \param bVector One of the vectors to be added
+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+static inline void volk_gnsssdr_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ float* cPtr = cVector;
+ const float* aPtr = aVector;
+ const float* bPtr= bVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = (*aPtr++) + (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_ORC
+/*!
+ \brief Adds the two input vectors and store their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be added
+ \param bVector One of the vectors to be added
+ \param num_points The number of values in aVector and bVector to be added together and stored into cVector
+*/
+extern void volk_gnsssdr_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
+static inline void volk_gnsssdr_32f_x2_add_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
+ volk_gnsssdr_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
+
+#endif /* INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_conjugate_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_conjugate_32fc.h
new file mode 100644
index 000000000..a3b8848aa
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_conjugate_32fc.h
@@ -0,0 +1,127 @@
+#ifndef INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H
+#define INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE3
+#include
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+
+ __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
+
+ x = _mm_xor_ps(x, conjugator); // conjugate register
+
+ _mm_storeu_ps((float*)c,x); // Store the results back into the C container
+
+ a += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = lv_conj(*a);
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_conjugate_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = lv_conj(*aPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H */
+#ifndef INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H
+#define INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE3
+#include
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+
+ __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
+
+ x = _mm_xor_ps(x, conjugator); // conjugate register
+
+ _mm_store_ps((float*)c,x); // Store the results back into the C container
+
+ a += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = lv_conj(*a);
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = lv_conj(*aPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h
new file mode 100644
index 000000000..ade8b6763
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h
@@ -0,0 +1,295 @@
+/*!
+ * \file volk_gnsssdr_32fc_convert_16ic.h
+ * \brief Volk protokernel: converts float32 complex values to 16 integer complex values taking care of overflow
+ * \authors
+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H
+#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H
+
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE2
+#include
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+ const unsigned int sse_iters = num_points/4;
+
+ float* inputVectorPtr = (float*)inputVector;
+ int16_t* outputVectorPtr = (int16_t*)outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+
+ __m128 inputVal1, inputVal2;
+ __m128i intInputVal1, intInputVal2;
+ __m128 ret1, ret2;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ for(unsigned int i = 0;i < sse_iters; i++){
+ inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+
+ // Clip
+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+
+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
+
+ _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
+ outputVectorPtr += 8;
+ }
+
+ for(unsigned int i = 0; i < (num_points%4)*2; i++){
+ if(inputVectorPtr[i] > max_val)
+ inputVectorPtr[i] = max_val;
+ else if(inputVectorPtr[i] < min_val)
+ inputVectorPtr[i] = min_val;
+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_SSE
+#include
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+ const unsigned int sse_iters = num_points/4;
+
+ float* inputVectorPtr = (float*)inputVector;
+ int16_t* outputVectorPtr = (int16_t*)outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+
+ __m128 inputVal1, inputVal2;
+ __m128i intInputVal1, intInputVal2;
+ __m128 ret1, ret2;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ for(unsigned int i = 0;i < sse_iters; i++){
+ inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+
+ // Clip
+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+
+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
+
+ _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
+ outputVectorPtr += 8;
+ }
+
+ for(unsigned int i = 0; i < (num_points%4)*2; i++){
+ if(inputVectorPtr[i] > max_val)
+ inputVectorPtr[i] = max_val;
+ else if(inputVectorPtr[i] < min_val)
+ inputVectorPtr[i] = min_val;
+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+ float* inputVectorPtr = (float*)inputVector;
+ int16_t* outputVectorPtr = (int16_t*)outputVector;
+ float min_val = -32768;
+ float max_val = 32767;
+
+ for(unsigned int i = 0; i < num_points*2; i++){
+ if(inputVectorPtr[i] > max_val)
+ inputVectorPtr[i] = max_val;
+ else if(inputVectorPtr[i] < min_val)
+ inputVectorPtr[i] = min_val;
+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H */
+
+
+#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H
+#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE2
+#include
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+ const unsigned int sse_iters = num_points/4;
+
+ float* inputVectorPtr = (float*)inputVector;
+ int16_t* outputVectorPtr = (int16_t*)outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+
+ __m128 inputVal1, inputVal2;
+ __m128i intInputVal1, intInputVal2;
+ __m128 ret1, ret2;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ for(unsigned int i = 0;i < sse_iters; i++){
+ inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+
+ // Clip
+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+
+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
+
+ _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
+ outputVectorPtr += 8;
+ }
+
+ for(unsigned int i = 0; i < (num_points%4)*2; i++){
+ if(inputVectorPtr[i] > max_val)
+ inputVectorPtr[i] = max_val;
+ else if(inputVectorPtr[i] < min_val)
+ inputVectorPtr[i] = min_val;
+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_SSE
+#include
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+ const unsigned int sse_iters = num_points/4;
+
+ float* inputVectorPtr = (float*)inputVector;
+ int16_t* outputVectorPtr = (int16_t*)outputVector;
+
+ float min_val = -32768;
+ float max_val = 32767;
+
+ __m128 inputVal1, inputVal2;
+ __m128i intInputVal1, intInputVal2;
+ __m128 ret1, ret2;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ for(unsigned int i = 0;i < sse_iters; i++){
+ inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+
+ // Clip
+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+
+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
+
+ _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
+ outputVectorPtr += 8;
+ }
+
+ for(unsigned int i = 0; i < (num_points%4)*2; i++){
+ if(inputVectorPtr[i] > max_val)
+ inputVectorPtr[i] = max_val;
+ else if(inputVectorPtr[i] < min_val)
+ inputVectorPtr[i] = min_val;
+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_16ic_a_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+ float* inputVectorPtr = (float*)inputVector;
+ int16_t* outputVectorPtr = (int16_t*)outputVector;
+ float min_val = -32768;
+ float max_val = 32767;
+
+ for(unsigned int i = 0; i < num_points*2; i++){
+ if(inputVectorPtr[i] > max_val)
+ inputVectorPtr[i] = max_val;
+ else if(inputVectorPtr[i] < min_val)
+ inputVectorPtr[i] = min_val;
+ outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
new file mode 100755
index 000000000..5a97b4827
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
@@ -0,0 +1,213 @@
+/*!
+ * \file volk_gnsssdr_32fc_convert_8ic.h
+ * \brief Volk protokernel: converts float32 complex values to 8 integer complex values taking care of overflow
+ * \authors
+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H
+#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H
+
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE2
+#include
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+ const unsigned int sse_iters = num_points/8;
+
+ float* inputVectorPtr = (float*)inputVector;
+ int8_t* outputVectorPtr = (int8_t*)outputVector;
+
+ float min_val = -128;
+ float max_val = 127;
+
+ __m128 inputVal1, inputVal2, inputVal3, inputVal4;
+ __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
+ __m128i int8InputVal;
+ __m128 ret1, ret2, ret3, ret4;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ for(unsigned int i = 0;i < sse_iters; i++){
+ inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal3 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal4 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+
+ // Clip
+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
+ ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
+ ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+ intInputVal3 = _mm_cvtps_epi32(ret3);
+ intInputVal4 = _mm_cvtps_epi32(ret4);
+
+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
+ intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
+ int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
+
+ _mm_storeu_si128((__m128i*)outputVectorPtr, int8InputVal);
+ outputVectorPtr += 16;
+ }
+
+ for(unsigned int i = 0; i < (num_points%4)*4; i++){
+ if(inputVectorPtr[i] > max_val)
+ inputVectorPtr[i] = max_val;
+ else if(inputVectorPtr[i] < min_val)
+ inputVectorPtr[i] = min_val;
+ outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+ float* inputVectorPtr = (float*)inputVector;
+ int8_t* outputVectorPtr = (int8_t*)outputVector;
+ float min_val = -128;
+ float max_val = 127;
+
+ for(unsigned int i = 0; i < num_points*2; i++){
+ if(inputVectorPtr[i] > max_val)
+ inputVectorPtr[i] = max_val;
+ else if(inputVectorPtr[i] < min_val)
+ inputVectorPtr[i] = min_val;
+ outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H */
+
+
+#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H
+#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE2
+#include
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+ const unsigned int sse_iters = num_points/8;
+
+ float* inputVectorPtr = (float*)inputVector;
+ int8_t* outputVectorPtr = (int8_t*)outputVector;
+
+ float min_val = -128;
+ float max_val = 127;
+
+ __m128 inputVal1, inputVal2, inputVal3, inputVal4;
+ __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
+ __m128i int8InputVal;
+ __m128 ret1, ret2, ret3, ret4;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ for(unsigned int i = 0;i < sse_iters; i++){
+ inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal3 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal4 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+
+ // Clip
+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
+ ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
+ ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+ intInputVal3 = _mm_cvtps_epi32(ret3);
+ intInputVal4 = _mm_cvtps_epi32(ret4);
+
+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
+ intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
+ int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
+
+ _mm_store_si128((__m128i*)outputVectorPtr, int8InputVal);
+ outputVectorPtr += 16;
+ }
+
+ for(unsigned int i = 0; i < (num_points%4)*4; i++){
+ if(inputVectorPtr[i] > max_val)
+ inputVectorPtr[i] = max_val;
+ else if(inputVectorPtr[i] < min_val)
+ inputVectorPtr[i] = min_val;
+ outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_convert_8ic_a_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
+ float* inputVectorPtr = (float*)inputVector;
+ int8_t* outputVectorPtr = (int8_t*)outputVector;
+ float min_val = -128;
+ float max_val = 127;
+
+ for(unsigned int i = 0; i < num_points*2; i++){
+ if(inputVectorPtr[i] > max_val)
+ inputVectorPtr[i] = max_val;
+ else if(inputVectorPtr[i] < min_val)
+ inputVectorPtr[i] = min_val;
+ outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_magnitude_squared_32f.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_magnitude_squared_32f.h
new file mode 100644
index 000000000..ce28f866e
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_magnitude_squared_32f.h
@@ -0,0 +1,228 @@
+#ifndef INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_u_H
+#define INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_u_H
+
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE3
+#include
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ __m128 cplxValue1, cplxValue2, result;
+ for(;number < quarterPoints; number++){
+ cplxValue1 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
+ cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
+
+ result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
+
+ _mm_storeu_ps(magnitudeVectorPtr, result);
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ float val1Real = *complexVectorPtr++;
+ float val1Imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_SSE
+#include
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ __m128 cplxValue1, cplxValue2, iValue, qValue, result;
+ for(;number < quarterPoints; number++){
+ cplxValue1 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_loadu_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ // Arrange in i1i2i3i4 format
+ iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
+ // Arrange in q1q2q3q4 format
+ qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
+
+ iValue = _mm_mul_ps(iValue, iValue); // Square the I values
+ qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
+
+ result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
+
+ _mm_storeu_ps(magnitudeVectorPtr, result);
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ float val1Real = *complexVectorPtr++;
+ float val1Imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+ unsigned int number = 0;
+ for(number = 0; number < num_points; number++){
+ const float real = *complexVectorPtr++;
+ const float imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (real*real) + (imag*imag);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */
+#ifndef INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_a_H
+#define INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_a_H
+
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE3
+#include
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ __m128 cplxValue1, cplxValue2, result;
+ for(;number < quarterPoints; number++){
+ cplxValue1 = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
+ cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
+
+ result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
+
+ _mm_store_ps(magnitudeVectorPtr, result);
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ float val1Real = *complexVectorPtr++;
+ float val1Imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+ }
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_SSE
+#include
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int quarterPoints = num_points / 4;
+
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+
+ __m128 cplxValue1, cplxValue2, iValue, qValue, result;
+ for(;number < quarterPoints; number++){
+ cplxValue1 = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ cplxValue2 = _mm_load_ps(complexVectorPtr);
+ complexVectorPtr += 4;
+
+ // Arrange in i1i2i3i4 format
+ iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
+ // Arrange in q1q2q3q4 format
+ qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
+
+ iValue = _mm_mul_ps(iValue, iValue); // Square the I values
+ qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
+
+ result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
+
+ _mm_store_ps(magnitudeVectorPtr, result);
+ magnitudeVectorPtr += 4;
+ }
+
+ number = quarterPoints * 4;
+ for(; number < num_points; number++){
+ float val1Real = *complexVectorPtr++;
+ float val1Imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
+ \param complexVector The vector containing the complex input values
+ \param magnitudeVector The vector containing the real output values
+ \param num_points The number of complex values in complexVector to be calculated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
+ const float* complexVectorPtr = (float*)complexVector;
+ float* magnitudeVectorPtr = magnitudeVector;
+ unsigned int number = 0;
+ for(number = 0; number < num_points; number++){
+ const float real = *complexVectorPtr++;
+ const float imag = *complexVectorPtr++;
+ *magnitudeVectorPtr++ = (real*real) + (imag*imag);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_convert_8ic.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_convert_8ic.h
new file mode 100644
index 000000000..9c33c9870
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_convert_8ic.h
@@ -0,0 +1,231 @@
+/*!
+ * \file volk_gnsssdr_32fc_s32f_convert_8ic.h
+ * \brief Volk protokernel: converts float32 complex values to 8 integer complex values taking care of overflow
+ * \authors
+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H
+#define INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H
+
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE2
+#include
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_s32f_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
+ const unsigned int sse_iters = num_points/8;
+
+ float* inputVectorPtr = (float*)inputVector;
+ int8_t* outputVectorPtr = (int8_t*)outputVector;
+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
+
+ float min_val = -128;
+ float max_val = 127;
+
+ __m128 inputVal1, inputVal2, inputVal3, inputVal4;
+ __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
+ __m128i int8InputVal;
+ __m128 ret1, ret2, ret3, ret4;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ for(unsigned int i = 0;i < sse_iters; i++){
+ inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal3 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal4 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+
+ inputVal1 = _mm_mul_ps(inputVal1, invScalar);
+ inputVal2 = _mm_mul_ps(inputVal2, invScalar);
+ inputVal3 = _mm_mul_ps(inputVal3, invScalar);
+ inputVal4 = _mm_mul_ps(inputVal4, invScalar);
+ // Clip
+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
+ ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
+ ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+ intInputVal3 = _mm_cvtps_epi32(ret3);
+ intInputVal4 = _mm_cvtps_epi32(ret4);
+
+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
+ intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
+ int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
+
+ _mm_storeu_si128((__m128i*)outputVectorPtr, int8InputVal);
+ outputVectorPtr += 16;
+ }
+
+ float scaled = 0;
+ for(unsigned int i = 0; i < (num_points%4)*4; i++){
+ scaled = inputVectorPtr[i]/scalar;
+ if(scaled > max_val)
+ scaled = max_val;
+ else if(scaled < min_val)
+ scaled = min_val;
+ outputVectorPtr[i] = (int8_t)rintf(scaled);
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_s32f_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
+ float* inputVectorPtr = (float*)inputVector;
+ int8_t* outputVectorPtr = (int8_t*)outputVector;
+ float scaled = 0;
+ float min_val = -128;
+ float max_val = 127;
+
+ for(unsigned int i = 0; i < num_points*2; i++){
+ scaled = (inputVectorPtr[i])/scalar;
+ if(scaled > max_val)
+ scaled = max_val;
+ else if(scaled < min_val)
+ scaled = min_val;
+ outputVectorPtr[i] = (int8_t)rintf(scaled);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H */
+
+
+#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H
+#define INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE2
+#include
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_s32f_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
+ const unsigned int sse_iters = num_points/8;
+
+ float* inputVectorPtr = (float*)inputVector;
+ int8_t* outputVectorPtr = (int8_t*)outputVector;
+ __m128 invScalar = _mm_set_ps1(1.0/scalar);
+
+ float min_val = -128;
+ float max_val = 127;
+
+ __m128 inputVal1, inputVal2, inputVal3, inputVal4;
+ __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
+ __m128i int8InputVal;
+ __m128 ret1, ret2, ret3, ret4;
+ __m128 vmin_val = _mm_set_ps1(min_val);
+ __m128 vmax_val = _mm_set_ps1(max_val);
+
+ for(unsigned int i = 0;i < sse_iters; i++){
+ inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal3 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+ inputVal4 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
+
+ inputVal1 = _mm_mul_ps(inputVal1, invScalar);
+ inputVal2 = _mm_mul_ps(inputVal2, invScalar);
+ inputVal3 = _mm_mul_ps(inputVal3, invScalar);
+ inputVal4 = _mm_mul_ps(inputVal4, invScalar);
+ // Clip
+ ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
+ ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
+ ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
+ ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
+
+ intInputVal1 = _mm_cvtps_epi32(ret1);
+ intInputVal2 = _mm_cvtps_epi32(ret2);
+ intInputVal3 = _mm_cvtps_epi32(ret3);
+ intInputVal4 = _mm_cvtps_epi32(ret4);
+
+ intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
+ intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
+ int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
+
+ _mm_store_si128((__m128i*)outputVectorPtr, int8InputVal);
+ outputVectorPtr += 16;
+ }
+
+ float scaled = 0;
+ for(unsigned int i = 0; i < (num_points%4)*4; i++){
+ scaled = inputVectorPtr[i]/scalar;
+ if(scaled > max_val)
+ scaled = max_val;
+ else if(scaled < min_val)
+ scaled = min_val;
+ outputVectorPtr[i] = (int8_t)rintf(scaled);
+ }
+}
+#endif /* LV_HAVE_SSE2 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
+ \param inputVector The floating point input data buffer
+ \param outputVector The 16 bit output data buffer
+ \param num_points The number of data values to be converted
+ */
+static inline void volk_gnsssdr_32fc_s32f_convert_8ic_a_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
+ float* inputVectorPtr = (float*)inputVector;
+ int8_t* outputVectorPtr = (int8_t*)outputVector;
+ float scaled = 0;
+ float min_val = -128;
+ float max_val = 127;
+
+ for(unsigned int i = 0; i < num_points*2; i++){
+ scaled = inputVectorPtr[i]/scalar;
+ if(scaled > max_val)
+ scaled = max_val;
+ else if(scaled < min_val)
+ scaled = min_val;
+ outputVectorPtr[i] = (int8_t)rintf(scaled);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc.h
new file mode 100644
index 000000000..0b5761176
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc.h
@@ -0,0 +1,266 @@
+/*!
+ * \file volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc
+ * \brief Volk protokernel: replaces the tracking function for update_local_code
+ * \authors
+ * - Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * Volk protokernel that replaces the tracking function for update_local_code
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H
+#define INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE4_1
+#include
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_sse4_1(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points){
+
+// float* pointer1 = (float*)&d_very_early_late_spc_chips;
+// *pointer1 = 1;
+// float* pointer2 = (float*)&code_length_half_chips;
+// *pointer2 = 6;
+// float* pointer3 = (float*)&code_phase_step_half_chips;
+// *pointer3 = 7;
+// float* pointer4 = (float*)&tcode_half_chips_input;
+// *pointer4 = 8;
+
+ const unsigned int sse_iters = num_points / 4;
+
+ __m128 tquot, fmod_num, fmod_result, associated_chip_index_array;
+
+ __m128 tcode_half_chips_array = _mm_set_ps (tcode_half_chips_input+3*code_phase_step_half_chips, tcode_half_chips_input+2*code_phase_step_half_chips, tcode_half_chips_input+code_phase_step_half_chips, tcode_half_chips_input);
+ __m128 code_phase_step_half_chips_array = _mm_set1_ps (code_phase_step_half_chips*4);
+ __m128 d_very_early_late_spc_chips_Multiplied_by_2 = _mm_set1_ps (2*d_very_early_late_spc_chips);
+ __m128 code_length_half_chips_array = _mm_set1_ps (code_length_half_chips);
+ __m128 twos = _mm_set1_ps (2);
+ __m128i associated_chip_index_array_int;
+
+ __VOLK_ATTR_ALIGNED(16) int32_t output[4];
+
+ for (unsigned int i = 0; i < sse_iters; i++)
+ {
+ //fmod = numer - tquot * denom; tquot = numer/denom truncated
+ //associated_chip_index = 2 + round(fmod(tcode_half_chips - 2*d_very_early_late_spc_chips, code_length_half_chips));
+ fmod_num = _mm_sub_ps (tcode_half_chips_array, d_very_early_late_spc_chips_Multiplied_by_2);
+ tquot = _mm_div_ps (fmod_num, code_length_half_chips_array);
+ tquot = _mm_round_ps (tquot, (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) );
+ fmod_result = _mm_sub_ps (fmod_num, _mm_mul_ps (tquot, code_length_half_chips_array));
+
+ associated_chip_index_array = _mm_round_ps (fmod_result, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC));
+ associated_chip_index_array = _mm_add_ps(twos, associated_chip_index_array);
+ associated_chip_index_array_int = _mm_cvtps_epi32 (associated_chip_index_array);
+ _mm_storeu_si128 ((__m128i*)output, associated_chip_index_array_int);
+
+ //d_very_early_code[i] = d_ca_code[associated_chip_index];
+ *d_very_early_code++ = d_ca_code[output[0]];
+ *d_very_early_code++ = d_ca_code[output[1]];
+ *d_very_early_code++ = d_ca_code[output[2]];
+ *d_very_early_code++ = d_ca_code[output[3]];
+
+ //tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
+ tcode_half_chips_array = _mm_add_ps (tcode_half_chips_array, code_phase_step_half_chips_array);
+ }
+
+ if (num_points%4!=0)
+ {
+ __VOLK_ATTR_ALIGNED(16) float tcode_half_chips_stored[4];
+ _mm_storeu_si128 ((__m128i*)tcode_half_chips_stored, tcode_half_chips_array);
+
+ int associated_chip_index;
+ float tcode_half_chips = tcode_half_chips_stored[0];
+ float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
+
+ for (unsigned int i = 0; i < num_points%4; i++)
+ {
+ associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
+ d_very_early_code[i] = d_ca_code[associated_chip_index];
+ tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
+ }
+ }
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_generic(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points){
+
+ float* pointer1 = (float*)&d_very_early_late_spc_chips;
+ *pointer1 = 1;
+ float* pointer2 = (float*)&code_length_half_chips;
+ *pointer2 = 6;
+ float* pointer3 = (float*)&code_phase_step_half_chips;
+ *pointer3 = 7;
+ float* pointer4 = (float*)&tcode_half_chips_input;
+ *pointer4 = 8;
+
+ int associated_chip_index;
+ float tcode_half_chips = tcode_half_chips_input;
+ float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
+
+ for (unsigned int i = 0; i < num_points; i++)
+ {
+ associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
+ d_very_early_code[i] = d_ca_code[associated_chip_index];
+ tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H */
+#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_H
+#define INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE4_1
+#include
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_sse4_1(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points){
+
+ // float* pointer1 = (float*)&d_very_early_late_spc_chips;
+ // *pointer1 = 1;
+ // float* pointer2 = (float*)&code_length_half_chips;
+ // *pointer2 = 6;
+ // float* pointer3 = (float*)&code_phase_step_half_chips;
+ // *pointer3 = 7;
+ // float* pointer4 = (float*)&tcode_half_chips_input;
+ // *pointer4 = 8;
+
+ const unsigned int sse_iters = num_points / 4;
+
+ __m128 tquot, fmod_num, fmod_result, associated_chip_index_array;
+
+ __m128 tcode_half_chips_array = _mm_set_ps (tcode_half_chips_input+3*code_phase_step_half_chips, tcode_half_chips_input+2*code_phase_step_half_chips, tcode_half_chips_input+code_phase_step_half_chips, tcode_half_chips_input);
+ __m128 code_phase_step_half_chips_array = _mm_set1_ps (code_phase_step_half_chips*4);
+ __m128 d_very_early_late_spc_chips_Multiplied_by_2 = _mm_set1_ps (2*d_very_early_late_spc_chips);
+ __m128 code_length_half_chips_array = _mm_set1_ps (code_length_half_chips);
+ __m128 twos = _mm_set1_ps (2);
+ __m128i associated_chip_index_array_int;
+
+ __VOLK_ATTR_ALIGNED(16) int32_t output[4];
+
+ for (unsigned int i = 0; i < sse_iters; i++)
+ {
+ //fmod = numer - tquot * denom; tquot = numer/denom truncated
+ //associated_chip_index = 2 + round(fmod(tcode_half_chips - 2*d_very_early_late_spc_chips, code_length_half_chips));
+ fmod_num = _mm_sub_ps (tcode_half_chips_array, d_very_early_late_spc_chips_Multiplied_by_2);
+ tquot = _mm_div_ps (fmod_num, code_length_half_chips_array);
+ tquot = _mm_round_ps (tquot, (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) );
+ fmod_result = _mm_sub_ps (fmod_num, _mm_mul_ps (tquot, code_length_half_chips_array));
+
+ associated_chip_index_array = _mm_round_ps (fmod_result, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC));
+ associated_chip_index_array = _mm_add_ps(twos, associated_chip_index_array);
+ associated_chip_index_array_int = _mm_cvtps_epi32 (associated_chip_index_array);
+ _mm_store_si128 ((__m128i*)output, associated_chip_index_array_int);
+
+ //d_very_early_code[i] = d_ca_code[associated_chip_index];
+ *d_very_early_code++ = d_ca_code[output[0]];
+ *d_very_early_code++ = d_ca_code[output[1]];
+ *d_very_early_code++ = d_ca_code[output[2]];
+ *d_very_early_code++ = d_ca_code[output[3]];
+
+ //tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
+ tcode_half_chips_array = _mm_add_ps (tcode_half_chips_array, code_phase_step_half_chips_array);
+ }
+
+ if (num_points%4!=0)
+ {
+ __VOLK_ATTR_ALIGNED(16) float tcode_half_chips_stored[4];
+ _mm_store_si128 ((__m128i*)tcode_half_chips_stored, tcode_half_chips_array);
+
+ int associated_chip_index;
+ float tcode_half_chips = tcode_half_chips_stored[0];
+ float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
+
+ for (unsigned int i = 0; i < num_points%4; i++)
+ {
+ associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
+ d_very_early_code[i] = d_ca_code[associated_chip_index];
+ tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
+ }
+ }
+
+}
+#endif /* LV_HAVE_SSE4_1 */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Takes the conjugate of a complex vector.
+ \param cVector The vector where the results will be stored
+ \param aVector Vector to be conjugated
+ \param num_points The number of complex values in aVector to be conjugated and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_generic(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points){
+
+ // float* pointer1 = (float*)&d_very_early_late_spc_chips;
+ // *pointer1 = 1;
+ // float* pointer2 = (float*)&code_length_half_chips;
+ // *pointer2 = 6;
+ // float* pointer3 = (float*)&code_phase_step_half_chips;
+ // *pointer3 = 7;
+ // float* pointer4 = (float*)&tcode_half_chips_input;
+ // *pointer4 = 8;
+
+ int associated_chip_index;
+ float tcode_half_chips = tcode_half_chips_input;
+ float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
+
+ for (unsigned int i = 0; i < num_points; i++)
+ {
+ associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
+ d_very_early_code[i] = d_ca_code[associated_chip_index];
+ tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32fc_multiply_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32fc_multiply_32fc.h
new file mode 100644
index 000000000..d5135d89f
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32fc_multiply_32fc.h
@@ -0,0 +1,178 @@
+#ifndef INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_u_H
+#define INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_u_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE3
+#include
+/*!
+ \brief Multiplies the input vector by a scalar and stores the results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be multiplied
+ \param scalar The complex scalar to multiply aVector
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x, yl, yh, z, tmp1, tmp2;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+
+ // Set up constant scalar vector
+ yl = _mm_set_ps1(lv_creal(scalar));
+ yh = _mm_set_ps1(lv_cimag(scalar));
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ _mm_storeu_ps((float*)c,z); // Store the results back into the C container
+
+ a += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = (*a) * scalar;
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Multiplies the input vector by a scalar and stores the results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector The vector to be multiplied
+ \param scalar The complex scalar to multiply aVector
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+*/
+static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = num_points;
+
+ // unwrap loop
+ while (number >= 8){
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ number -= 8;
+ }
+
+ // clean up any remaining
+ while (number-- > 0)
+ *cPtr++ = *aPtr++ * scalar;
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
+#ifndef INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_a_H
+#define INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_a_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE3
+#include
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x, yl, yh, z, tmp1, tmp2;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+
+ // Set up constant scalar vector
+ yl = _mm_set_ps1(lv_creal(scalar));
+ yh = _mm_set_ps1(lv_cimag(scalar));
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ _mm_store_ps((float*)c,z); // Store the results back into the C container
+
+ a += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = (*a) * scalar;
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ unsigned int number = num_points;
+
+ // unwrap loop
+ while (number >= 8){
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ *cPtr++ = (*aPtr++) * scalar;
+ number -= 8;
+ }
+
+ // clean up any remaining
+ while (number-- > 0)
+ *cPtr++ = *aPtr++ * scalar;
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+
+
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_dot_prod_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_dot_prod_32fc.h
new file mode 100644
index 000000000..08a10aa6e
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_dot_prod_32fc.h
@@ -0,0 +1,763 @@
+#ifndef INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H
+#define INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H
+
+#include
+#include
+#include
+#include
+
+
+#ifdef LV_HAVE_GENERIC
+
+
+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ float * res = (float*) result;
+ float * in = (float*) input;
+ float * tp = (float*) taps;
+ unsigned int n_2_ccomplex_blocks = num_points/2;
+ unsigned int isodd = num_points & 1;
+
+ float sum0[2] = {0,0};
+ float sum1[2] = {0,0};
+ unsigned int i = 0;
+
+ for(i = 0; i < n_2_ccomplex_blocks; ++i) {
+ sum0[0] += in[0] * tp[0] - in[1] * tp[1];
+ sum0[1] += in[0] * tp[1] + in[1] * tp[0];
+ sum1[0] += in[2] * tp[2] - in[3] * tp[3];
+ sum1[1] += in[2] * tp[3] + in[3] * tp[2];
+
+ in += 4;
+ tp += 4;
+ }
+
+ res[0] = sum0[0] + sum1[0];
+ res[1] = sum0[1] + sum1[1];
+
+ // Cleanup if we had an odd number of points
+ for(i = 0; i < isodd; ++i) {
+ *result += input[num_points - 1] * taps[num_points - 1];
+ }
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+
+
+#if LV_HAVE_SSE && LV_HAVE_64
+
+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
+ unsigned int isodd = num_points & 1;
+
+ asm
+ (
+ "# ccomplex_dotprod_generic (float* result, const float *input,\n\t"
+ "# const float *taps, unsigned num_bytes)\n\t"
+ "# float sum0 = 0;\n\t"
+ "# float sum1 = 0;\n\t"
+ "# float sum2 = 0;\n\t"
+ "# float sum3 = 0;\n\t"
+ "# do {\n\t"
+ "# sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
+ "# sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
+ "# sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
+ "# sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
+ "# input += 4;\n\t"
+ "# taps += 4; \n\t"
+ "# } while (--n_2_ccomplex_blocks != 0);\n\t"
+ "# result[0] = sum0 + sum2;\n\t"
+ "# result[1] = sum1 + sum3;\n\t"
+ "# TODO: prefetch and better scheduling\n\t"
+ " xor %%r9, %%r9\n\t"
+ " xor %%r10, %%r10\n\t"
+ " movq %%rcx, %%rax\n\t"
+ " movq %%rcx, %%r8\n\t"
+ " movq %[rsi], %%r9\n\t"
+ " movq %[rdx], %%r10\n\t"
+ " xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
+ " movups 0(%%r9), %%xmm0\n\t"
+ " xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
+ " movups 0(%%r10), %%xmm2\n\t"
+ " shr $5, %%rax # rax = n_2_ccomplex_blocks / 2\n\t"
+ " shr $4, %%r8\n\t"
+ " jmp .%=L1_test\n\t"
+ " # 4 taps / loop\n\t"
+ " # something like ?? cycles / loop\n\t"
+ ".%=Loop1: \n\t"
+ "# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
+ "# movups (%%r9), %%xmmA\n\t"
+ "# movups (%%r10), %%xmmB\n\t"
+ "# movups %%xmmA, %%xmmZ\n\t"
+ "# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
+ "# mulps %%xmmB, %%xmmA\n\t"
+ "# mulps %%xmmZ, %%xmmB\n\t"
+ "# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
+ "# xorps %%xmmPN, %%xmmA\n\t"
+ "# movups %%xmmA, %%xmmZ\n\t"
+ "# unpcklps %%xmmB, %%xmmA\n\t"
+ "# unpckhps %%xmmB, %%xmmZ\n\t"
+ "# movups %%xmmZ, %%xmmY\n\t"
+ "# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
+ "# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
+ "# addps %%xmmZ, %%xmmA\n\t"
+ "# addps %%xmmA, %%xmmC\n\t"
+ "# A=xmm0, B=xmm2, Z=xmm4\n\t"
+ "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
+ " movups 16(%%r9), %%xmm1\n\t"
+ " movups %%xmm0, %%xmm4\n\t"
+ " mulps %%xmm2, %%xmm0\n\t"
+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
+ " movups 16(%%r10), %%xmm3\n\t"
+ " movups %%xmm1, %%xmm5\n\t"
+ " addps %%xmm0, %%xmm6\n\t"
+ " mulps %%xmm3, %%xmm1\n\t"
+ " shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
+ " addps %%xmm1, %%xmm6\n\t"
+ " mulps %%xmm4, %%xmm2\n\t"
+ " movups 32(%%r9), %%xmm0\n\t"
+ " addps %%xmm2, %%xmm7\n\t"
+ " mulps %%xmm5, %%xmm3\n\t"
+ " add $32, %%r9\n\t"
+ " movups 32(%%r10), %%xmm2\n\t"
+ " addps %%xmm3, %%xmm7\n\t"
+ " add $32, %%r10\n\t"
+ ".%=L1_test:\n\t"
+ " dec %%rax\n\t"
+ " jge .%=Loop1\n\t"
+ " # We've handled the bulk of multiplies up to here.\n\t"
+ " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
+ " # If so, we've got 2 more taps to do.\n\t"
+ " and $1, %%r8\n\t"
+ " je .%=Leven\n\t"
+ " # The count was odd, do 2 more taps.\n\t"
+ " # Note that we've already got mm0/mm2 preloaded\n\t"
+ " # from the main loop.\n\t"
+ " movups %%xmm0, %%xmm4\n\t"
+ " mulps %%xmm2, %%xmm0\n\t"
+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
+ " addps %%xmm0, %%xmm6\n\t"
+ " mulps %%xmm4, %%xmm2\n\t"
+ " addps %%xmm2, %%xmm7\n\t"
+ ".%=Leven:\n\t"
+ " # neg inversor\n\t"
+ " xorps %%xmm1, %%xmm1\n\t"
+ " mov $0x80000000, %%r9\n\t"
+ " movd %%r9, %%xmm1\n\t"
+ " shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
+ " # pfpnacc\n\t"
+ " xorps %%xmm1, %%xmm6\n\t"
+ " movups %%xmm6, %%xmm2\n\t"
+ " unpcklps %%xmm7, %%xmm6\n\t"
+ " unpckhps %%xmm7, %%xmm2\n\t"
+ " movups %%xmm2, %%xmm3\n\t"
+ " shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
+ " shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
+ " addps %%xmm2, %%xmm6\n\t"
+ " # xmm6 = r1 i2 r3 i4\n\t"
+ " movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
+ " addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
+ " movlps %%xmm6, (%[rdi]) # store low 2x32 bits (complex) to memory\n\t"
+ :
+ :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
+ :"rax", "r8", "r9", "r10"
+ );
+
+
+ if(isodd) {
+ *result += input[num_points - 1] * taps[num_points - 1];
+ }
+
+ return;
+
+}
+
+#endif /* LV_HAVE_SSE && LV_HAVE_64 */
+
+
+
+
+#ifdef LV_HAVE_SSE3
+
+#include
+
+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ lv_32fc_t dotProduct;
+ memset(&dotProduct, 0x0, 2*sizeof(float));
+
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points/2;
+ unsigned int isodd = num_points & 1;
+
+ __m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
+
+ const lv_32fc_t* a = input;
+ const lv_32fc_t* b = taps;
+
+ dotProdVal = _mm_setzero_ps();
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together
+
+ a += 2;
+ b += 2;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
+
+ _mm_storeu_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector
+
+ dotProduct += ( dotProductVector[0] + dotProductVector[1] );
+
+ if(isodd) {
+ dotProduct += input[num_points - 1] * taps[num_points - 1];
+ }
+
+ *result = dotProduct;
+}
+
+#endif /*LV_HAVE_SSE3*/
+
+#ifdef LV_HAVE_SSE4_1
+
+#include
+
+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ unsigned int i = 0;
+ const unsigned int qtr_points = num_points/4;
+ const unsigned int isodd = num_points & 3;
+
+ __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
+ float *p_input, *p_taps;
+ __m64 *p_result;
+
+ p_result = (__m64*)result;
+ p_input = (float*)input;
+ p_taps = (float*)taps;
+
+ static const __m128i neg = {0x000000000000000080000000};
+
+ real0 = _mm_setzero_ps();
+ real1 = _mm_setzero_ps();
+ im0 = _mm_setzero_ps();
+ im1 = _mm_setzero_ps();
+
+ for(; i < qtr_points; ++i) {
+ xmm0 = _mm_loadu_ps(p_input);
+ xmm1 = _mm_loadu_ps(p_taps);
+
+ p_input += 4;
+ p_taps += 4;
+
+ xmm2 = _mm_loadu_ps(p_input);
+ xmm3 = _mm_loadu_ps(p_taps);
+
+ p_input += 4;
+ p_taps += 4;
+
+ xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
+ xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
+ xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
+ xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
+
+ //imaginary vector from input
+ xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
+ //real vector from input
+ xmm3 = _mm_unpacklo_ps(xmm0, xmm4);
+ //imaginary vector from taps
+ xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
+ //real vector from taps
+ xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
+
+ xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
+ xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
+
+ xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
+ xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
+
+ real0 = _mm_add_ps(xmm4, real0);
+ real1 = _mm_add_ps(xmm5, real1);
+ im0 = _mm_add_ps(xmm6, im0);
+ im1 = _mm_add_ps(xmm7, im1);
+ }
+
+ real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec);
+
+ im0 = _mm_add_ps(im0, im1);
+ real0 = _mm_add_ps(real0, real1);
+
+ im0 = _mm_add_ps(im0, real0);
+
+ _mm_storel_pi(p_result, im0);
+
+ for(i = num_points-isodd; i < num_points; i++) {
+ *result += input[i] * taps[i];
+ }
+}
+
+#endif /*LV_HAVE_SSE4_1*/
+
+
+
+
+#endif /*INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H*/
+#ifndef INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H
+#define INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H
+
+#include
+#include
+#include
+#include
+
+
+#ifdef LV_HAVE_GENERIC
+
+
+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
+
+ float * res = (float*) result;
+ float * in = (float*) input;
+ float * tp = (float*) taps;
+ unsigned int n_2_ccomplex_blocks = num_bytes >> 4;
+ unsigned int isodd = num_points & 1;
+
+ float sum0[2] = {0,0};
+ float sum1[2] = {0,0};
+ unsigned int i = 0;
+
+ for(i = 0; i < n_2_ccomplex_blocks; ++i) {
+ sum0[0] += in[0] * tp[0] - in[1] * tp[1];
+ sum0[1] += in[0] * tp[1] + in[1] * tp[0];
+ sum1[0] += in[2] * tp[2] - in[3] * tp[3];
+ sum1[1] += in[2] * tp[3] + in[3] * tp[2];
+
+ in += 4;
+ tp += 4;
+ }
+
+ res[0] = sum0[0] + sum1[0];
+ res[1] = sum0[1] + sum1[1];
+
+ for(i = 0; i < isodd; ++i) {
+ *result += input[num_points - 1] * taps[num_points - 1];
+ }
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+
+#if LV_HAVE_SSE && LV_HAVE_64
+
+
+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
+ unsigned int isodd = num_points & 1;
+
+ asm
+ (
+ "# ccomplex_dotprod_generic (float* result, const float *input,\n\t"
+ "# const float *taps, unsigned num_bytes)\n\t"
+ "# float sum0 = 0;\n\t"
+ "# float sum1 = 0;\n\t"
+ "# float sum2 = 0;\n\t"
+ "# float sum3 = 0;\n\t"
+ "# do {\n\t"
+ "# sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
+ "# sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
+ "# sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
+ "# sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
+ "# input += 4;\n\t"
+ "# taps += 4; \n\t"
+ "# } while (--n_2_ccomplex_blocks != 0);\n\t"
+ "# result[0] = sum0 + sum2;\n\t"
+ "# result[1] = sum1 + sum3;\n\t"
+ "# TODO: prefetch and better scheduling\n\t"
+ " xor %%r9, %%r9\n\t"
+ " xor %%r10, %%r10\n\t"
+ " movq %%rcx, %%rax\n\t"
+ " movq %%rcx, %%r8\n\t"
+ " movq %[rsi], %%r9\n\t"
+ " movq %[rdx], %%r10\n\t"
+ " xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
+ " movaps 0(%%r9), %%xmm0\n\t"
+ " xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
+ " movaps 0(%%r10), %%xmm2\n\t"
+ " shr $5, %%rax # rax = n_2_ccomplex_blocks / 2\n\t"
+ " shr $4, %%r8\n\t"
+ " jmp .%=L1_test\n\t"
+ " # 4 taps / loop\n\t"
+ " # something like ?? cycles / loop\n\t"
+ ".%=Loop1: \n\t"
+ "# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
+ "# movaps (%%r9), %%xmmA\n\t"
+ "# movaps (%%r10), %%xmmB\n\t"
+ "# movaps %%xmmA, %%xmmZ\n\t"
+ "# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
+ "# mulps %%xmmB, %%xmmA\n\t"
+ "# mulps %%xmmZ, %%xmmB\n\t"
+ "# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
+ "# xorps %%xmmPN, %%xmmA\n\t"
+ "# movaps %%xmmA, %%xmmZ\n\t"
+ "# unpcklps %%xmmB, %%xmmA\n\t"
+ "# unpckhps %%xmmB, %%xmmZ\n\t"
+ "# movaps %%xmmZ, %%xmmY\n\t"
+ "# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
+ "# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
+ "# addps %%xmmZ, %%xmmA\n\t"
+ "# addps %%xmmA, %%xmmC\n\t"
+ "# A=xmm0, B=xmm2, Z=xmm4\n\t"
+ "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
+ " movaps 16(%%r9), %%xmm1\n\t"
+ " movaps %%xmm0, %%xmm4\n\t"
+ " mulps %%xmm2, %%xmm0\n\t"
+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
+ " movaps 16(%%r10), %%xmm3\n\t"
+ " movaps %%xmm1, %%xmm5\n\t"
+ " addps %%xmm0, %%xmm6\n\t"
+ " mulps %%xmm3, %%xmm1\n\t"
+ " shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
+ " addps %%xmm1, %%xmm6\n\t"
+ " mulps %%xmm4, %%xmm2\n\t"
+ " movaps 32(%%r9), %%xmm0\n\t"
+ " addps %%xmm2, %%xmm7\n\t"
+ " mulps %%xmm5, %%xmm3\n\t"
+ " add $32, %%r9\n\t"
+ " movaps 32(%%r10), %%xmm2\n\t"
+ " addps %%xmm3, %%xmm7\n\t"
+ " add $32, %%r10\n\t"
+ ".%=L1_test:\n\t"
+ " dec %%rax\n\t"
+ " jge .%=Loop1\n\t"
+ " # We've handled the bulk of multiplies up to here.\n\t"
+ " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
+ " # If so, we've got 2 more taps to do.\n\t"
+ " and $1, %%r8\n\t"
+ " je .%=Leven\n\t"
+ " # The count was odd, do 2 more taps.\n\t"
+ " # Note that we've already got mm0/mm2 preloaded\n\t"
+ " # from the main loop.\n\t"
+ " movaps %%xmm0, %%xmm4\n\t"
+ " mulps %%xmm2, %%xmm0\n\t"
+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
+ " addps %%xmm0, %%xmm6\n\t"
+ " mulps %%xmm4, %%xmm2\n\t"
+ " addps %%xmm2, %%xmm7\n\t"
+ ".%=Leven:\n\t"
+ " # neg inversor\n\t"
+ " xorps %%xmm1, %%xmm1\n\t"
+ " mov $0x80000000, %%r9\n\t"
+ " movd %%r9, %%xmm1\n\t"
+ " shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
+ " # pfpnacc\n\t"
+ " xorps %%xmm1, %%xmm6\n\t"
+ " movaps %%xmm6, %%xmm2\n\t"
+ " unpcklps %%xmm7, %%xmm6\n\t"
+ " unpckhps %%xmm7, %%xmm2\n\t"
+ " movaps %%xmm2, %%xmm3\n\t"
+ " shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
+ " shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
+ " addps %%xmm2, %%xmm6\n\t"
+ " # xmm6 = r1 i2 r3 i4\n\t"
+ " movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
+ " addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
+ " movlps %%xmm6, (%[rdi]) # store low 2x32 bits (complex) to memory\n\t"
+ :
+ :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
+ :"rax", "r8", "r9", "r10"
+ );
+
+
+ if(isodd) {
+ *result += input[num_points - 1] * taps[num_points - 1];
+ }
+
+ return;
+
+}
+
+#endif
+
+#if LV_HAVE_SSE && LV_HAVE_32
+
+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ volk_gnsssdr_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_points);
+
+#if 0
+ const unsigned int num_bytes = num_points*8;
+ unsigned int isodd = num_points & 1;
+
+ asm volatile
+ (
+ " #pushl %%ebp\n\t"
+ " #movl %%esp, %%ebp\n\t"
+ " movl 12(%%ebp), %%eax # input\n\t"
+ " movl 16(%%ebp), %%edx # taps\n\t"
+ " movl 20(%%ebp), %%ecx # n_bytes\n\t"
+ " xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
+ " movaps 0(%%eax), %%xmm0\n\t"
+ " xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
+ " movaps 0(%%edx), %%xmm2\n\t"
+ " shrl $5, %%ecx # ecx = n_2_ccomplex_blocks / 2\n\t"
+ " jmp .%=L1_test\n\t"
+ " # 4 taps / loop\n\t"
+ " # something like ?? cycles / loop\n\t"
+ ".%=Loop1: \n\t"
+ "# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
+ "# movaps (%%eax), %%xmmA\n\t"
+ "# movaps (%%edx), %%xmmB\n\t"
+ "# movaps %%xmmA, %%xmmZ\n\t"
+ "# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
+ "# mulps %%xmmB, %%xmmA\n\t"
+ "# mulps %%xmmZ, %%xmmB\n\t"
+ "# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
+ "# xorps %%xmmPN, %%xmmA\n\t"
+ "# movaps %%xmmA, %%xmmZ\n\t"
+ "# unpcklps %%xmmB, %%xmmA\n\t"
+ "# unpckhps %%xmmB, %%xmmZ\n\t"
+ "# movaps %%xmmZ, %%xmmY\n\t"
+ "# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
+ "# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
+ "# addps %%xmmZ, %%xmmA\n\t"
+ "# addps %%xmmA, %%xmmC\n\t"
+ "# A=xmm0, B=xmm2, Z=xmm4\n\t"
+ "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
+ " movaps 16(%%eax), %%xmm1\n\t"
+ " movaps %%xmm0, %%xmm4\n\t"
+ " mulps %%xmm2, %%xmm0\n\t"
+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
+ " movaps 16(%%edx), %%xmm3\n\t"
+ " movaps %%xmm1, %%xmm5\n\t"
+ " addps %%xmm0, %%xmm6\n\t"
+ " mulps %%xmm3, %%xmm1\n\t"
+ " shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
+ " addps %%xmm1, %%xmm6\n\t"
+ " mulps %%xmm4, %%xmm2\n\t"
+ " movaps 32(%%eax), %%xmm0\n\t"
+ " addps %%xmm2, %%xmm7\n\t"
+ " mulps %%xmm5, %%xmm3\n\t"
+ " addl $32, %%eax\n\t"
+ " movaps 32(%%edx), %%xmm2\n\t"
+ " addps %%xmm3, %%xmm7\n\t"
+ " addl $32, %%edx\n\t"
+ ".%=L1_test:\n\t"
+ " decl %%ecx\n\t"
+ " jge .%=Loop1\n\t"
+ " # We've handled the bulk of multiplies up to here.\n\t"
+ " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
+ " # If so, we've got 2 more taps to do.\n\t"
+ " movl 20(%%ebp), %%ecx # n_2_ccomplex_blocks\n\t"
+ " shrl $4, %%ecx\n\t"
+ " andl $1, %%ecx\n\t"
+ " je .%=Leven\n\t"
+ " # The count was odd, do 2 more taps.\n\t"
+ " # Note that we've already got mm0/mm2 preloaded\n\t"
+ " # from the main loop.\n\t"
+ " movaps %%xmm0, %%xmm4\n\t"
+ " mulps %%xmm2, %%xmm0\n\t"
+ " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
+ " addps %%xmm0, %%xmm6\n\t"
+ " mulps %%xmm4, %%xmm2\n\t"
+ " addps %%xmm2, %%xmm7\n\t"
+ ".%=Leven:\n\t"
+ " # neg inversor\n\t"
+ " movl 8(%%ebp), %%eax \n\t"
+ " xorps %%xmm1, %%xmm1\n\t"
+ " movl $0x80000000, (%%eax)\n\t"
+ " movss (%%eax), %%xmm1\n\t"
+ " shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
+ " # pfpnacc\n\t"
+ " xorps %%xmm1, %%xmm6\n\t"
+ " movaps %%xmm6, %%xmm2\n\t"
+ " unpcklps %%xmm7, %%xmm6\n\t"
+ " unpckhps %%xmm7, %%xmm2\n\t"
+ " movaps %%xmm2, %%xmm3\n\t"
+ " shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
+ " shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
+ " addps %%xmm2, %%xmm6\n\t"
+ " # xmm6 = r1 i2 r3 i4\n\t"
+ " #movl 8(%%ebp), %%eax # @result\n\t"
+ " movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
+ " addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
+ " movlps %%xmm6, (%%eax) # store low 2x32 bits (complex) to memory\n\t"
+ " #popl %%ebp\n\t"
+ :
+ :
+ : "eax", "ecx", "edx"
+ );
+
+
+ int getem = num_bytes % 16;
+
+ if(isodd) {
+ *result += (input[num_points - 1] * taps[num_points - 1]);
+ }
+
+ return;
+#endif
+}
+
+#endif /*LV_HAVE_SSE*/
+
+#ifdef LV_HAVE_SSE3
+
+#include
+
+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ const unsigned int num_bytes = num_points*8;
+ unsigned int isodd = num_points & 1;
+
+ lv_32fc_t dotProduct;
+ memset(&dotProduct, 0x0, 2*sizeof(float));
+
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_bytes >> 4;
+
+ __m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
+
+ const lv_32fc_t* a = input;
+ const lv_32fc_t* b = taps;
+
+ dotProdVal = _mm_setzero_ps();
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together
+
+ a += 2;
+ b += 2;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
+
+ _mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector
+
+ dotProduct += ( dotProductVector[0] + dotProductVector[1] );
+
+ if(isodd) {
+ dotProduct += input[num_points - 1] * taps[num_points - 1];
+ }
+
+ *result = dotProduct;
+}
+
+#endif /*LV_HAVE_SSE3*/
+
+#ifdef LV_HAVE_SSE4_1
+
+#include
+
+static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ unsigned int i = 0;
+ const unsigned int qtr_points = num_points/4;
+ const unsigned int isodd = num_points & 3;
+
+ __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
+ float *p_input, *p_taps;
+ __m64 *p_result;
+
+ static const __m128i neg = {0x000000000000000080000000};
+
+ p_result = (__m64*)result;
+ p_input = (float*)input;
+ p_taps = (float*)taps;
+
+ real0 = _mm_setzero_ps();
+ real1 = _mm_setzero_ps();
+ im0 = _mm_setzero_ps();
+ im1 = _mm_setzero_ps();
+
+ for(; i < qtr_points; ++i) {
+ xmm0 = _mm_load_ps(p_input);
+ xmm1 = _mm_load_ps(p_taps);
+
+ p_input += 4;
+ p_taps += 4;
+
+ xmm2 = _mm_load_ps(p_input);
+ xmm3 = _mm_load_ps(p_taps);
+
+ p_input += 4;
+ p_taps += 4;
+
+ xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
+ xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
+ xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
+ xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
+
+ //imaginary vector from input
+ xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
+ //real vector from input
+ xmm3 = _mm_unpacklo_ps(xmm0, xmm4);
+ //imaginary vector from taps
+ xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
+ //real vector from taps
+ xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
+
+ xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
+ xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
+
+ xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
+ xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
+
+ real0 = _mm_add_ps(xmm4, real0);
+ real1 = _mm_add_ps(xmm5, real1);
+ im0 = _mm_add_ps(xmm6, im0);
+ im1 = _mm_add_ps(xmm7, im1);
+ }
+
+ real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec);
+
+ im0 = _mm_add_ps(im0, im1);
+ real0 = _mm_add_ps(real0, real1);
+
+ im0 = _mm_add_ps(im0, real0);
+
+ _mm_storel_pi(p_result, im0);
+
+ for(i = num_points-isodd; i < num_points; i++) {
+ *result += input[i] * taps[i];
+ }
+}
+
+#endif /*LV_HAVE_SSE4_1*/
+
+#endif /*INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H*/
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_multiply_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_multiply_32fc.h
new file mode 100644
index 000000000..e2b17c401
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_multiply_32fc.h
@@ -0,0 +1,170 @@
+#ifndef INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H
+#define INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE3
+#include
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x, y, yl, yh, z, tmp1, tmp2;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+ const lv_32fc_t* b = bVector;
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ _mm_storeu_ps((float*)c,z); // Store the results back into the C container
+
+ a += 2;
+ b += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = (*a) * (*b);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_x2_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ const lv_32fc_t* bPtr= bVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = (*aPtr++) * (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
+#ifndef INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H
+#define INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H
+
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE3
+#include
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ __m128 x, y, yl, yh, z, tmp1, tmp2;
+ lv_32fc_t* c = cVector;
+ const lv_32fc_t* a = aVector;
+ const lv_32fc_t* b = bVector;
+ for(;number < halfPoints; number++){
+
+ x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ _mm_store_ps((float*)c,z); // Store the results back into the C container
+
+ a += 2;
+ b += 2;
+ c += 2;
+ }
+
+ if((num_points % 2) != 0) {
+ *c = (*a) * (*b);
+ }
+}
+#endif /* LV_HAVE_SSE */
+
+#ifdef LV_HAVE_GENERIC
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+static inline void volk_gnsssdr_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ lv_32fc_t* cPtr = cVector;
+ const lv_32fc_t* aPtr = aVector;
+ const lv_32fc_t* bPtr= bVector;
+ unsigned int number = 0;
+
+ for(number = 0; number < num_points; number++){
+ *cPtr++ = (*aPtr++) * (*bPtr++);
+ }
+}
+#endif /* LV_HAVE_GENERIC */
+
+#ifdef LV_HAVE_ORC
+ /*!
+ \brief Multiplies the two input complex vectors and stores their results in the third vector
+ \param cVector The vector where the results will be stored
+ \param aVector One of the vectors to be multiplied
+ \param bVector One of the vectors to be multiplied
+ \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
+ */
+extern void volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
+static inline void volk_gnsssdr_32fc_x2_multiply_32fc_u_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
+ volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
+}
+#endif /* LV_HAVE_ORC */
+
+
+
+
+
+#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3.h
new file mode 100644
index 000000000..7e05be9cf
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3.h
@@ -0,0 +1,409 @@
+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H
+#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H
+
+#include
+#include
+#include
+#include
+#include
+
+/*!
+ * TODO: Code the SSE4 version and benchmark it
+ */
+#ifdef LV_HAVE_SSE3
+#include
+
+
+ /*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
+{
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ lv_32fc_t dotProduct_E;
+ memset(&dotProduct_E, 0x0, 2*sizeof(float));
+ lv_32fc_t dotProduct_P;
+ memset(&dotProduct_P, 0x0, 2*sizeof(float));
+ lv_32fc_t dotProduct_L;
+ memset(&dotProduct_L, 0x0, 2*sizeof(float));
+
+ // Aux vars
+ __m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L;
+
+ z_E = _mm_setzero_ps();
+ z_P = _mm_setzero_ps();
+ z_L = _mm_setzero_ps();
+
+ //input and output vectors
+ //lv_32fc_t* _input_BB = input_BB;
+ const lv_32fc_t* _input = input;
+ const lv_32fc_t* _carrier = carrier;
+ const lv_32fc_t* _E_code = E_code;
+ const lv_32fc_t* _P_code = P_code;
+ const lv_32fc_t* _L_code = L_code;
+
+ for(;number < halfPoints; number++)
+ {
+ // carrier wipe-off (vector point-to-point product)
+ x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ //_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container
+
+ // correlation E,P,L (3x vector scalar product)
+ // Early
+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
+ x = z;
+
+ y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
+
+ // Prompt
+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
+
+ // Late
+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
+
+ /*pointer increment*/
+ _carrier += 2;
+ _input += 2;
+ //_input_BB += 2;
+ _E_code += 2;
+ _P_code += 2;
+ _L_code +=2;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
+ //__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB;
+
+ _mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
+ _mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
+ _mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
+
+ dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
+ dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
+ dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
+
+ if((num_points % 2) != 0)
+ {
+ //_input_BB = (*_input) * (*_carrier);
+ dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
+ dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
+ dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
+ }
+
+ *E_out = dotProduct_E;
+ *P_out = dotProduct_P;
+ *L_out = dotProduct_L;
+}
+
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
+{
+ lv_32fc_t bb_signal_sample;
+
+ bb_signal_sample = lv_cmake(0, 0);
+
+ *E_out = 0;
+ *P_out = 0;
+ *L_out = 0;
+ // perform Early, Prompt and Late correlation
+ for(int i=0; i < num_points; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = input[i] * carrier[i];
+ // Now get early, late, and prompt values for each
+ *E_out += bb_signal_sample * E_code[i];
+ *P_out += bb_signal_sample * P_code[i];
+ *L_out += bb_signal_sample * L_code[i];
+ }
+}
+
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H */
+
+
+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H
+#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_SSE3
+#include
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
+{
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ lv_32fc_t dotProduct_E;
+ memset(&dotProduct_E, 0x0, 2*sizeof(float));
+ lv_32fc_t dotProduct_P;
+ memset(&dotProduct_P, 0x0, 2*sizeof(float));
+ lv_32fc_t dotProduct_L;
+ memset(&dotProduct_L, 0x0, 2*sizeof(float));
+
+ // Aux vars
+ __m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L;
+
+ z_E = _mm_setzero_ps();
+ z_P = _mm_setzero_ps();
+ z_L = _mm_setzero_ps();
+
+ //input and output vectors
+ //lv_32fc_t* _input_BB = input_BB;
+ const lv_32fc_t* _input = input;
+ const lv_32fc_t* _carrier = carrier;
+ const lv_32fc_t* _E_code = E_code;
+ const lv_32fc_t* _P_code = P_code;
+ const lv_32fc_t* _L_code = L_code;
+
+ for(;number < halfPoints; number++)
+ {
+ // carrier wipe-off (vector point-to-point product)
+ x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ //_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container
+
+ // correlation E,P,L (3x vector scalar product)
+ // Early
+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
+ x = z;
+
+ y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
+
+ // Prompt
+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
+
+ // Late
+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+
+ z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
+
+ /*pointer increment*/
+ _carrier += 2;
+ _input += 2;
+ //_input_BB += 2;
+ _E_code += 2;
+ _P_code += 2;
+ _L_code +=2;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
+ //__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB;
+
+ _mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
+ _mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
+ _mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
+
+ dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
+ dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
+ dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
+
+ if((num_points % 2) != 0)
+ {
+ //_input_BB = (*_input) * (*_carrier);
+ dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
+ dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
+ dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
+ }
+
+ *E_out = dotProduct_E;
+ *P_out = dotProduct_P;
+ *L_out = dotProduct_L;
+}
+
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
+{
+ lv_32fc_t bb_signal_sample;
+
+ bb_signal_sample = lv_cmake(0, 0);
+
+ *E_out = 0;
+ *P_out = 0;
+ *L_out = 0;
+ // perform Early, Prompt and Late correlation
+ for(int i=0; i < num_points; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = input[i] * carrier[i];
+ // Now get early, late, and prompt values for each
+ *E_out += bb_signal_sample * E_code[i];
+ *P_out += bb_signal_sample * P_code[i];
+ *L_out += bb_signal_sample * L_code[i];
+ }
+}
+
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H */
diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5.h
new file mode 100644
index 000000000..7b66b6491
--- /dev/null
+++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5.h
@@ -0,0 +1,848 @@
+/*!
+ * \file volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5
+ * \brief Volk protokernel: performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation with 64 bits vectors
+ * \authors
+ * - Javier Arribas, 2011. jarribas(at)cttc.es
+ *
- Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
+ *
+ *
+ * Volk protokernel that performs the carrier wipe-off mixing and the
+ * VE, Early, Prompt, Late and VL correlation with 64 bits vectors (32 bits the
+ * real part and 32 bits the imaginary part):
+ * - The carrier wipe-off is done by multiplying the input signal by the
+ * carrier (multiplication of 64 bits vectors) It returns the input
+ * signal in base band (BB)
+ * - VE values are calculated by multiplying the input signal in BB by the
+ * VE code (multiplication of 64 bits vectors), accumulating the results
+ * - Early values are calculated by multiplying the input signal in BB by the
+ * early code (multiplication of 64 bits vectors), accumulating the results
+ * - Prompt values are calculated by multiplying the input signal in BB by the
+ * prompt code (multiplication of 64 bits vectors), accumulating the results
+ * - Late values are calculated by multiplying the input signal in BB by the
+ * late code (multiplication of 64 bits vectors), accumulating the results
+ * - VL values are calculated by multiplying the input signal in BB by the
+ * VL code (multiplication of 64 bits vectors), accumulating the results
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H
+#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_AVX
+#include
+/*!
+ \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param VE_code VE PRN code replica input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param VL_code VL PRN code replica input
+ \param VE_out VE correlation output
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param VL_out VL correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_avx(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
+{
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 4;
+
+ lv_32fc_t dotProduct_VE;
+ lv_32fc_t dotProduct_E;
+ lv_32fc_t dotProduct_P;
+ lv_32fc_t dotProduct_L;
+ lv_32fc_t dotProduct_VL;
+
+ // Aux vars
+ __m256 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
+ __m256 bb_signal_sample, bb_signal_sample_shuffled;
+
+ z_VE = _mm256_setzero_ps();
+ z_E = _mm256_setzero_ps();
+ z_P = _mm256_setzero_ps();
+ z_L = _mm256_setzero_ps();
+ z_VL = _mm256_setzero_ps();
+
+ //input and output vectors
+ const lv_32fc_t* _input = input;
+ const lv_32fc_t* _carrier = carrier;
+ const lv_32fc_t* _VE_code = VE_code;
+ const lv_32fc_t* _E_code = E_code;
+ const lv_32fc_t* _P_code = P_code;
+ const lv_32fc_t* _L_code = L_code;
+ const lv_32fc_t* _VL_code = VL_code;
+
+ for(;number < halfPoints; number++)
+ {
+ // carrier wipe-off (vector point-to-point product)
+ x = _mm256_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm256_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ bb_signal_sample = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ bb_signal_sample_shuffled = _mm256_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
+
+ // correlation VE,E,P,L,VL (5x vector scalar product)
+ // VE
+ y = _mm256_loadu_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_VE = _mm256_add_ps(z_VE, z); // Add the complex multiplication results together
+
+ // Early
+ y = _mm256_loadu_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_E = _mm256_add_ps(z_E, z); // Add the complex multiplication results together
+
+ // Prompt
+ y = _mm256_loadu_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_P = _mm256_add_ps(z_P, z); // Add the complex multiplication results together
+
+ // Late
+ y = _mm256_loadu_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_L = _mm256_add_ps(z_L, z); // Add the complex multiplication results together
+
+ // VL
+ y = _mm256_loadu_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_VL = _mm256_add_ps(z_VL, z); // Add the complex multiplication results together
+
+ /*pointer increment*/
+ _carrier += 4;
+ _input += 4;
+ _VE_code += 4;
+ _E_code += 4;
+ _P_code += 4;
+ _L_code += 4;
+ _VL_code += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VE[4];
+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_E[4];
+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_P[4];
+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_L[4];
+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VL[4];
+
+ _mm256_storeu_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
+ _mm256_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
+ _mm256_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
+ _mm256_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
+ _mm256_storeu_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
+
+ dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] + dotProductVector_VE[2] + dotProductVector_VE[3] );
+ dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] + dotProductVector_E[2] + dotProductVector_E[3] );
+ dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] + dotProductVector_P[2] + dotProductVector_P[3] );
+ dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] + dotProductVector_L[2] + dotProductVector_L[3] );
+ dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] + dotProductVector_VL[2] + dotProductVector_VL[3] );
+
+ for (int i = 0; i<(num_points % 4); ++i)
+ {
+ dotProduct_VE += (*_input) * (*_VE_code++) * (*_carrier);
+ dotProduct_E += (*_input) * (*_E_code++) * (*_carrier);
+ dotProduct_P += (*_input) * (*_P_code++) * (*_carrier);
+ dotProduct_L += (*_input) * (*_L_code++) * (*_carrier);
+ dotProduct_VL += (*_input++) * (*_VL_code++) * (*_carrier++);
+ }
+
+ *VE_out = dotProduct_VE;
+ *E_out = dotProduct_E;
+ *P_out = dotProduct_P;
+ *L_out = dotProduct_L;
+ *VL_out = dotProduct_VL;
+}
+#endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_SSE3
+#include
+ /*!
+ \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param VE_code VE PRN code replica input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param VL_code VL PRN code replica input
+ \param VE_out VE correlation output
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param VL_out VL correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
+{
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 2;
+
+ lv_32fc_t dotProduct_VE;
+ lv_32fc_t dotProduct_E;
+ lv_32fc_t dotProduct_P;
+ lv_32fc_t dotProduct_L;
+ lv_32fc_t dotProduct_VL;
+
+ // Aux vars
+ __m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
+ __m128 bb_signal_sample, bb_signal_sample_shuffled;
+
+ z_VE = _mm_setzero_ps();
+ z_E = _mm_setzero_ps();
+ z_P = _mm_setzero_ps();
+ z_L = _mm_setzero_ps();
+ z_VL = _mm_setzero_ps();
+
+ //input and output vectors
+ const lv_32fc_t* _input = input;
+ const lv_32fc_t* _carrier = carrier;
+ const lv_32fc_t* _VE_code = VE_code;
+ const lv_32fc_t* _E_code = E_code;
+ const lv_32fc_t* _P_code = P_code;
+ const lv_32fc_t* _L_code = L_code;
+ const lv_32fc_t* _VL_code = VL_code;
+
+ for(;number < halfPoints; number++)
+ {
+ // carrier wipe-off (vector point-to-point product)
+ x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
+
+ // correlation VE,E,P,L,VL (5x vector scalar product)
+ // VE
+ y = _mm_loadu_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together
+
+ // Early
+ y = _mm_loadu_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
+
+ // Prompt
+ y = _mm_loadu_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
+
+ // Late
+ y = _mm_loadu_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
+
+ // VL
+ //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_loadu_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together
+
+ /*pointer increment*/
+ _carrier += 2;
+ _input += 2;
+ _VE_code += 2;
+ _E_code += 2;
+ _P_code += 2;
+ _L_code +=2;
+ _VL_code +=2;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2];
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2];
+
+ _mm_storeu_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
+ _mm_storeu_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
+
+ dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] );
+ dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] );
+ dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] );
+ dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] );
+ dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] );
+
+ if((num_points % 2) != 0)
+ {
+ dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier);
+ dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
+ dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
+ dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
+ dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier);
+ }
+
+ *VE_out = dotProduct_VE;
+ *E_out = dotProduct_E;
+ *P_out = dotProduct_P;
+ *L_out = dotProduct_L;
+ *VL_out = dotProduct_VL;
+}
+#endif /* LV_HAVE_SSE3 */
+
+#ifdef LV_HAVE_GENERIC
+/*!
+ \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param VE_code VE PRN code replica input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param VL_code VL PRN code replica input
+ \param VE_out VE correlation output
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param VL_out VL correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
+{
+ lv_32fc_t bb_signal_sample;
+
+ bb_signal_sample = lv_cmake(0, 0);
+
+ *VE_out = 0;
+ *E_out = 0;
+ *P_out = 0;
+ *L_out = 0;
+ *VL_out = 0;
+ // perform Early, Prompt and Late correlation
+ for(int i=0; i < num_points; ++i)
+ {
+ //Perform the carrier wipe-off
+ bb_signal_sample = input[i] * carrier[i];
+ // Now get early, late, and prompt values for each
+ *VE_out += bb_signal_sample * VE_code[i];
+ *E_out += bb_signal_sample * E_code[i];
+ *P_out += bb_signal_sample * P_code[i];
+ *L_out += bb_signal_sample * L_code[i];
+ *VL_out += bb_signal_sample * VL_code[i];
+ }
+}
+
+#endif /* LV_HAVE_GENERIC */
+
+#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H */
+
+
+#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H
+#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H
+
+#include
+#include
+#include
+#include
+#include
+
+#ifdef LV_HAVE_AVX
+#include
+/*!
+ \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
+ \param input The input signal input
+ \param carrier The carrier signal input
+ \param VE_code VE PRN code replica input
+ \param E_code Early PRN code replica input
+ \param P_code Early PRN code replica input
+ \param L_code Early PRN code replica input
+ \param VL_code VL PRN code replica input
+ \param VE_out VE correlation output
+ \param E_out Early correlation output
+ \param P_out Early correlation output
+ \param L_out Early correlation output
+ \param VL_out VL correlation output
+ \param num_points The number of complex values in vectors
+ */
+static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_avx(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
+{
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points / 4;
+
+ lv_32fc_t dotProduct_VE;
+ lv_32fc_t dotProduct_E;
+ lv_32fc_t dotProduct_P;
+ lv_32fc_t dotProduct_L;
+ lv_32fc_t dotProduct_VL;
+
+ // Aux vars
+ __m256 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
+ __m256 bb_signal_sample, bb_signal_sample_shuffled;
+
+ z_VE = _mm256_setzero_ps();
+ z_E = _mm256_setzero_ps();
+ z_P = _mm256_setzero_ps();
+ z_L = _mm256_setzero_ps();
+ z_VL = _mm256_setzero_ps();
+
+ //input and output vectors
+ const lv_32fc_t* _input = input;
+ const lv_32fc_t* _carrier = carrier;
+ const lv_32fc_t* _VE_code = VE_code;
+ const lv_32fc_t* _E_code = E_code;
+ const lv_32fc_t* _P_code = P_code;
+ const lv_32fc_t* _L_code = L_code;
+ const lv_32fc_t* _VL_code = VL_code;
+
+ for(;number < halfPoints; number++)
+ {
+ // carrier wipe-off (vector point-to-point product)
+ x = _mm256_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm256_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ bb_signal_sample = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ bb_signal_sample_shuffled = _mm256_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
+
+ // correlation VE,E,P,L,VL (5x vector scalar product)
+ // VE
+ y = _mm256_load_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_VE = _mm256_add_ps(z_VE, z); // Add the complex multiplication results together
+
+ // Early
+ y = _mm256_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_E = _mm256_add_ps(z_E, z); // Add the complex multiplication results together
+
+ // Prompt
+ y = _mm256_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_P = _mm256_add_ps(z_P, z); // Add the complex multiplication results together
+
+ // Late
+ y = _mm256_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_L = _mm256_add_ps(z_L, z); // Add the complex multiplication results together
+
+ // VL
+ y = _mm256_load_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+ tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+ z_VL = _mm256_add_ps(z_VL, z); // Add the complex multiplication results together
+
+ /*pointer increment*/
+ _carrier += 4;
+ _input += 4;
+ _VE_code += 4;
+ _E_code += 4;
+ _P_code += 4;
+ _L_code += 4;
+ _VL_code += 4;
+ }
+
+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VE[4];
+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_E[4];
+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_P[4];
+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_L[4];
+ __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VL[4];
+
+ _mm256_store_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
+ _mm256_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
+ _mm256_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
+ _mm256_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
+ _mm256_store_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
+
+ dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] + dotProductVector_VE[2] + dotProductVector_VE[3] );
+ dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] + dotProductVector_E[2] + dotProductVector_E[3] );
+ dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] + dotProductVector_P[2] + dotProductVector_P[3] );
+ dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] + dotProductVector_L[2] + dotProductVector_L[3] );
+ dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] + dotProductVector_VL[2] + dotProductVector_VL[3] );
+
+ for (int i = 0; i<(num_points % 4); ++i)
+ {
+ dotProduct_VE += (*_input) * (*_VE_code++) * (*_carrier);
+ dotProduct_E += (*_input) * (*_E_code++) * (*_carrier);
+ dotProduct_P += (*_input) * (*_P_code++) * (*_carrier);
+ dotProduct_L += (*_input) * (*_L_code++) * (*_carrier);
+ dotProduct_VL += (*_input++) * (*_VL_code++) * (*_carrier++);
+ }
+
+ *VE_out = dotProduct_VE;
+ *E_out = dotProduct_E;
+ *P_out = dotProduct_P;
+ *L_out = dotProduct_L;
+ *VL_out = dotProduct_VL;
+}
+#endif /* LV_HAVE_AVX */
+
+#ifdef LV_HAVE_SSE3
+#include