Updating volk_gnsssdr to the new volk scheme

This commit is contained in:
Carles Fernandez 2016-01-12 20:15:16 +01:00
parent f4584a12c1
commit 24909510e7
91 changed files with 4643 additions and 13260 deletions

View File

@ -21,24 +21,38 @@
# Project setup
########################################################################
cmake_minimum_required(VERSION 2.6)
if(NOT DEFINED CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "Choose build type: None Debug Release RelWithDebInfo MinSizeRel")
project(volk_gnsssdr)
enable_language(CXX)
enable_language(C)
enable_testing()
set(VERSION 0.1)
set(LIBVER 0.0.0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11 -Wall")
option(ENABLE_STRIP "Create a stripped volk_gnsssdr_profile binary (without shared libraries)" OFF)
set(CMAKE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) #allows this to be a sub-project
set(CMAKE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) #allows this to be a sub-project
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) #location for custom "Modules"
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
list(INSERT CMAKE_MODULE_PATH 0 ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules) #location for custom "Modules"
include(VolkBuildTypes)
#select the release build type by default to get optimization flags
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
message(STATUS "Build type not specified: defaulting to release.")
endif()
VOLK_CHECK_BUILD_TYPE(${CMAKE_BUILD_TYPE})
set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "")
message(STATUS "Build type set to ${CMAKE_BUILD_TYPE}.")
set(VERSION_INFO_MAJOR_VERSION 0)
set(VERSION_INFO_MINOR_VERSION 0)
set(VERSION_INFO_MAINT_VERSION 6)
include(VolkVersion) #setup version info
########################################################################
# Environment setup
@ -56,7 +70,9 @@ SET(CROSSCOMPILE_MULTILIB ${CROSSCOMPILE_MULTILIB} CACHE STRING "Define \"true\"
########################################################################
# Dependencies setup
########################################################################
include(GrPython) #sets PYTHON_EXECUTABLE and PYTHON_DASH_B
# Python
include(VolkPython) #sets PYTHON_EXECUTABLE and PYTHON_DASH_B
VOLK_PYTHON_CHECK_MODULE("python >= 2.5" sys "sys.version.split()[0] >= '2.5'" PYTHON_MIN_VER_FOUND)
VOLK_PYTHON_CHECK_MODULE("Cheetah >= 2.0.0" Cheetah "Cheetah.Version >= '2.0.0'" CHEETAH_FOUND)
@ -64,10 +80,12 @@ if(NOT PYTHON_MIN_VER_FOUND)
message(FATAL_ERROR "Python 2.5 or greater required to build VOLK_GNSSSDR")
endif()
# Cheetah
if(NOT CHEETAH_FOUND)
message(FATAL_ERROR "Cheetah templates required to build VOLK_GNSSSDR")
endif()
# Boost
if(MSVC)
if (NOT DEFINED BOOST_ALL_DYN_LINK)
set(BOOST_ALL_DYN_LINK TRUE)
@ -79,12 +97,14 @@ if(MSVC)
unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link
endif(BOOST_ALL_DYN_LINK)
endif(MSVC)
include(VolkBoost)
if(NOT Boost_FOUND)
message(FATAL_ERROR "VOLK_GNSSSDR Requires boost to build")
endif()
# Orc
option(ENABLE_ORC "Enable Orc" True)
if(ENABLE_ORC)
find_package(ORC)
@ -92,6 +112,25 @@ else(ENABLE_ORC)
message(STATUS "Disabling use of ORC")
endif(ENABLE_ORC)
########################################################################
# Setup doxygen
########################################################################
find_package(Doxygen)
if(DOXYGEN_FOUND)
configure_file(
${CMAKE_SOURCE_DIR}/Doxyfile.in
${CMAKE_BINARY_DIR}/Doxyfile
@ONLY)
add_custom_target(doc
${DOXYGEN_EXECUTABLE} ${CMAKE_BINARY_DIR}/Doxyfile
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMENT "Generating documentation with Doxygen" VERBATIM
)
endif(DOXYGEN_FOUND)
########################################################################
# Setup the package config file
########################################################################
@ -129,6 +168,8 @@ install(FILES
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_prefs.h
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_complex.h
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_common.h
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_avx_intrinsics.h
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_sse3_intrinsics.h
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h
@ -138,6 +179,58 @@ install(FILES
COMPONENT "volk_gnsssdr_devel"
)
########################################################################
# On Apple only, set install name and use rpath correctly, if not already set
########################################################################
if(APPLE)
if(NOT CMAKE_INSTALL_NAME_DIR)
set(CMAKE_INSTALL_NAME_DIR
${CMAKE_INSTALL_PREFIX}/${VOLK_LIBRARY_DIR} CACHE
PATH "Library Install Name Destination Directory" FORCE)
endif(NOT CMAKE_INSTALL_NAME_DIR)
if(NOT CMAKE_INSTALL_RPATH)
set(CMAKE_INSTALL_RPATH
${CMAKE_INSTALL_PREFIX}/${VOLK_LIBRARY_DIR} CACHE
PATH "Library Install RPath" FORCE)
endif(NOT CMAKE_INSTALL_RPATH)
if(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON CACHE
BOOL "Do Build Using Library Install RPath" FORCE)
endif(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
endif(APPLE)
########################################################################
# Create uninstall target
########################################################################
configure_file(
${CMAKE_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake
@ONLY)
# Only add the target if there isn't one defined already
if(NOT TARGET uninstall)
add_custom_target(uninstall
${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake
)
endif()
########################################################################
# Install our Cmake modules into $prefix/lib/cmake/volk_gnsssdr
# See "Package Configuration Files" on page:
# http://www.cmake.org/Wiki/CMake/Tutorials/Packaging
########################################################################
configure_file(
${CMAKE_SOURCE_DIR}/cmake/Modules/VolkConfigVersion.cmake.in
${CMAKE_BINARY_DIR}/cmake/Modules/VolkConfigVersion.cmake
@ONLY)
########################################################################
# Install cmake search routine for external use
########################################################################
@ -147,30 +240,23 @@ if(NOT CMAKE_MODULES_DIR)
endif(NOT CMAKE_MODULES_DIR)
install(
FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/VolkConfig.cmake
FILES
${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/VolkConfig.cmake
${CMAKE_CURRENT_BINARY_DIR}/cmake/Modules/VolkConfigVersion.cmake
DESTINATION ${CMAKE_MODULES_DIR}/volk_gnsssdr
COMPONENT "volk_gnsssdr_devel"
)
########################################################################
# On Apple only, set install name and use rpath correctly, if not already set
# Option to enable QA testing, on by default
########################################################################
if(APPLE)
if(NOT CMAKE_INSTALL_NAME_DIR)
set(CMAKE_INSTALL_NAME_DIR
${CMAKE_INSTALL_PREFIX}/lib CACHE
PATH "Library Install Name Destination Directory" FORCE)
endif(NOT CMAKE_INSTALL_NAME_DIR)
if(NOT CMAKE_INSTALL_RPATH)
set(CMAKE_INSTALL_RPATH
${CMAKE_INSTALL_PREFIX}/lib CACHE
PATH "Library Install RPath" FORCE)
endif(NOT CMAKE_INSTALL_RPATH)
if(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON CACHE
BOOL "Do Build Using Library Install RPath" FORCE)
endif(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
endif(APPLE)
OPTION(ENABLE_TESTING "Enable QA testing" ON)
if(ENABLE_TESTING)
message(STATUS "QA Testing is enabled.")
else()
message(STATUS "QA Testing is disabled.")
endif()
message(STATUS " Modify using: -DENABLE_TESTING=ON/OFF")
########################################################################
# Setup the library
@ -183,16 +269,6 @@ add_subdirectory(lib)
add_subdirectory(apps)
add_subdirectory(python/volk_gnsssdr_modtool)
########################################################################
# Create uninstall target
########################################################################
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake
@ONLY)
add_custom_target(uninstall
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
########################################################################
# Print summary

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,196 @@
<doxygenlayout version="1.0">
<!-- Generated by doxygen 1.8.6 -->
<!-- Navigation index tabs for HTML output -->
<navindex>
<tab type="mainpage" visible="yes" title=""/>
<tab type="pages" visible="yes" title="" intro=""/>
<tab type="modules" visible="yes" title="" intro=""/>
<!--
<tab type="namespaces" visible="yes" title="">
<tab type="namespacelist" visible="yes" title="" intro=""/>
<tab type="namespacemembers" visible="yes" title="" intro=""/>
</tab>
<tab type="classes" visible="yes" title="">
<tab type="classlist" visible="yes" title="" intro=""/>
<tab type="classindex" visible="$ALPHABETICAL_INDEX" title=""/>
<tab type="hierarchy" visible="yes" title="" intro=""/>
<tab type="classmembers" visible="yes" title="" intro=""/>
</tab>
-->
<tab type="files" visible="yes" title="">
<tab type="filelist" visible="yes" title="" intro=""/>
<tab type="globals" visible="yes" title="" intro=""/>
</tab>
<tab type="examples" visible="yes" title="" intro=""/>
</navindex>
<!-- Layout definition for a class page -->
<class>
<briefdescription visible="yes"/>
<includes visible="$SHOW_INCLUDE_FILES"/>
<inheritancegraph visible="$CLASS_GRAPH"/>
<collaborationgraph visible="$COLLABORATION_GRAPH"/>
<memberdecl>
<nestedclasses visible="yes" title=""/>
<publictypes title=""/>
<services title=""/>
<interfaces title=""/>
<publicslots title=""/>
<signals title=""/>
<publicmethods title=""/>
<publicstaticmethods title=""/>
<publicattributes title=""/>
<publicstaticattributes title=""/>
<protectedtypes title=""/>
<protectedslots title=""/>
<protectedmethods title=""/>
<protectedstaticmethods title=""/>
<protectedattributes title=""/>
<protectedstaticattributes title=""/>
<packagetypes title=""/>
<packagemethods title=""/>
<packagestaticmethods title=""/>
<packageattributes title=""/>
<packagestaticattributes title=""/>
<properties title=""/>
<events title=""/>
<privatetypes title=""/>
<privateslots title=""/>
<privatemethods title=""/>
<privatestaticmethods title=""/>
<privateattributes title=""/>
<privatestaticattributes title=""/>
<friends title=""/>
<related title="" subtitle=""/>
<membergroups visible="yes"/>
</memberdecl>
<detaileddescription title=""/>
<memberdef>
<inlineclasses title=""/>
<typedefs title=""/>
<enums title=""/>
<services title=""/>
<interfaces title=""/>
<constructors title=""/>
<functions title=""/>
<related title=""/>
<variables title=""/>
<properties title=""/>
<events title=""/>
</memberdef>
<allmemberslink visible="yes"/>
<usedfiles visible="$SHOW_USED_FILES"/>
<authorsection visible="yes"/>
</class>
<!-- Layout definition for a namespace page -->
<namespace>
<briefdescription visible="yes"/>
<memberdecl>
<nestednamespaces visible="yes" title=""/>
<constantgroups visible="yes" title=""/>
<classes visible="yes" title=""/>
<typedefs title=""/>
<enums title=""/>
<functions title=""/>
<variables title=""/>
<membergroups visible="yes"/>
</memberdecl>
<detaileddescription title=""/>
<memberdef>
<inlineclasses title=""/>
<typedefs title=""/>
<enums title=""/>
<functions title=""/>
<variables title=""/>
</memberdef>
<authorsection visible="yes"/>
</namespace>
<!-- Layout definition for a file page -->
<file>
<briefdescription visible="yes"/>
<includes visible="$SHOW_INCLUDE_FILES"/>
<includegraph visible="$INCLUDE_GRAPH"/>
<includedbygraph visible="$INCLUDED_BY_GRAPH"/>
<sourcelink visible="yes"/>
<memberdecl>
<classes visible="yes" title=""/>
<namespaces visible="yes" title=""/>
<constantgroups visible="yes" title=""/>
<defines title=""/>
<typedefs title=""/>
<enums title=""/>
<functions title=""/>
<variables title=""/>
<membergroups visible="yes"/>
</memberdecl>
<detaileddescription title=""/>
<memberdef>
<inlineclasses title=""/>
<defines title=""/>
<typedefs title=""/>
<enums title=""/>
<functions title=""/>
<variables title=""/>
</memberdef>
<authorsection/>
</file>
<!-- Layout definition for a group page -->
<group>
<briefdescription visible="yes"/>
<groupgraph visible="$GROUP_GRAPHS"/>
<memberdecl>
<nestedgroups visible="yes" title=""/>
<dirs visible="yes" title=""/>
<files visible="yes" title=""/>
<namespaces visible="yes" title=""/>
<classes visible="yes" title=""/>
<defines title=""/>
<typedefs title=""/>
<enums title=""/>
<enumvalues title=""/>
<functions title=""/>
<variables title=""/>
<signals title=""/>
<publicslots title=""/>
<protectedslots title=""/>
<privateslots title=""/>
<events title=""/>
<properties title=""/>
<friends title=""/>
<membergroups visible="yes"/>
</memberdecl>
<detaileddescription title=""/>
<memberdef>
<pagedocs/>
<inlineclasses title=""/>
<defines title=""/>
<typedefs title=""/>
<enums title=""/>
<enumvalues title=""/>
<functions title=""/>
<variables title=""/>
<signals title=""/>
<publicslots title=""/>
<protectedslots title=""/>
<privateslots title=""/>
<events title=""/>
<properties title=""/>
<friends title=""/>
</memberdef>
<authorsection visible="yes"/>
</group>
<!-- Layout definition for a directory page -->
<directory>
<briefdescription visible="yes"/>
<directorygraph visible="yes"/>
<memberdecl>
<dirs visible="yes"/>
<files visible="yes"/>
</memberdecl>
<detaileddescription title=""/>
</directory>
</doxygenlayout>

View File

@ -1,20 +1,21 @@
This is VOLK_GNSSSDR, the Vector Optimized Library of Kernels for GNSS-SDR
########################################################################
# Adding proto-kernels to the module
########################################################################
1) Add your proto-kernels inside the kernels/ folder, and the ORC implementations inside the orc/ folder. Add the macros implementations inside the /kernels/CommonMacros folder. (those folders are found in the root of the volk_gnsssdr module)
2) Add one profiling line for each of the proto-kernels inside the /apps/volk_gnsssdr_profile.cc file.
3) Add one test line for each of the proto-kernels inside the /lib/testqa.cc file. ########################################################################
# Modifications to allow profiling of some proto-kernels with special parameters
######################################################################## Some of the proto-kernels that GNSS-SDR needs are not supported by the profiling environment of the volk_gnsssdr module. In order to profile them some modifications need to be done to two files: 1) src/algorithms/libs/volk_gnsssdr/lib/qa_utils.cc At the first part of this file there are defined the parameters supported by the environment. The number after run_cast_test indicates the total number of parameters passed to the proto-kernel (input +output parameters). The other part indicates the type of the data passed. Inside func(....) you will need to add the same number of buffs[ ] that the one specified after run_cast_test.
Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
2) src/algorithms/libs/volk_gnsssdr/lib/qa_utils.h In the header you will need to add typedefs for the new definitions made in the .cc file. Take care: you will need to add the same number of void * that the one specified after run_cast_test.
This file is part of GNSS-SDR.
3) To be able to use volk_gnsssdr and default volk functions at the same time in the same file, it is required to add the template files that volk_gnsssdr module uses at build time to generate some headers.
The files are found inside tmpl/: volk_gnsssdr.tmpl.h
volk_gnsssdr_typedefs.tmpl.h
volk_gnsssdr_machines.tmpl.h
volk_gnsssdr_cpu.tmpl.h
volk_gnsssdr_config_fixed.tmpl.h
GNSS-SDR is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
GNSS-SDR is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.

View File

@ -20,8 +20,6 @@
########################################################################
# Setup profiler
########################################################################
if(Boost_FOUND)
if(MSVC)
include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc)
endif(MSVC)
@ -36,21 +34,26 @@ include_directories(
${Boost_INCLUDE_DIRS}
)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(Clang_required_link "c++")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(Clang_required_link "")
endif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# MAKE volk_gnsssdr_profile
add_executable(volk_gnsssdr_profile
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_profile.cc
${CMAKE_SOURCE_DIR}/lib/qa_utils.cc
)
target_link_libraries(volk_gnsssdr_profile volk_gnsssdr ${Boost_LIBRARIES} ${Clang_required_link})
add_dependencies(volk_gnsssdr_profile volk_gnsssdr)
if(ORC_FOUND)
set(orc_lib ${ORC_LIBRARIES})
elseif(ORC_FOUND)
set(orc_lib "")
endif(ORC_FOUND)
target_link_libraries(volk_gnsssdr_profile volk_gnsssdr ${Boost_LIBRARIES} ${Clang_required_link} ${orc_lib})
if(ENABLE_STRIP)
if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
@ -59,6 +62,7 @@ if(ENABLE_STRIP)
endif(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
endif(ENABLE_STRIP)
install(
TARGETS volk_gnsssdr_profile
DESTINATION bin
@ -67,13 +71,12 @@ install(
# MAKE volk_gnsssdr-config-info
add_executable(volk_gnsssdr-config-info volk_gnsssdr-config-info.cc)
target_link_libraries(volk_gnsssdr-config-info volk_gnsssdr ${Boost_LIBRARIES} ${Clang_required_link})
add_dependencies(volk_gnsssdr-config-info volk_gnsssdr)
install(
TARGETS volk_gnsssdr-config-info
DESTINATION bin
COMPONENT "volk_gnsssdr"
)
target_link_libraries(volk_gnsssdr-config-info volk_gnsssdr ${Boost_LIBRARIES} ${Clang_required_link} ${orc_lib})
#install(
# TARGETS volk_gnsssdr-config-info
# DESTINATION bin
# COMPONENT "volk_gnsssdr"
#)
endif(Boost_FOUND)

View File

@ -1,4 +1,3 @@
/* -*- c++ -*- */
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
@ -21,11 +20,10 @@
#include <config.h>
#endif
#include <iostream>
#include <boost/program_options.hpp>
#include "volk_gnsssdr/constants.h"
#include <volk_gnsssdr/constants.h>
#include "volk_gnsssdr/volk_gnsssdr.h"
#include <boost/program_options.hpp>
#include <iostream>
namespace po = boost::program_options;
@ -38,7 +36,6 @@ main(int argc, char **argv)
desc.add_options()
("help,h", "print help message")
("prefix", "print VOLK installation prefix")
("builddate", "print VOLK build date (RFC2822 format)")
("cc", "print VOLK C compiler version")
("cflags", "print VOLK CFLAGS")
("all-machines", "print VOLK machines built into library")
@ -65,9 +62,6 @@ main(int argc, char **argv)
if(vm.count("prefix"))
std::cout << volk_gnsssdr_prefix() << std::endl;
if(vm.count("builddate"))
std::cout << volk_gnsssdr_build_date() << std::endl;
if(vm.count("version"))
std::cout << volk_gnsssdr_version() << std::endl;

View File

@ -1,4 +1,3 @@
/* -*- c++ -*- */
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
@ -17,61 +16,329 @@
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#include <sys/stat.h>
#include <sys/types.h>
#include <ciso646>
#include <iostream>
#include <fstream>
#include <vector>
#include <boost/foreach.hpp>
#include <boost/filesystem.hpp>
#include <boost/program_options.hpp>
#include "qa_utils.h"
#include "kernel_tests.h"
#include "volk_gnsssdr_profile.h"
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <volk_gnsssdr/volk_gnsssdr_prefs.h>
#include "qa_utils.h"
#include <ciso646>
#include <vector>
#include <boost/filesystem.hpp>
#include <boost/program_options.hpp>
#include <boost/xpressive/xpressive.hpp>
#include <iostream>
#include <fstream>
#include <sys/stat.h>
#include <sys/types.h>
namespace fs = boost::filesystem;
int main(int argc, char *argv[]) {
// Adding program options
boost::program_options::options_description desc("Options");
desc.add_options()
("help,h", "Print help messages")
("benchmark,b",
boost::program_options::value<bool>()->default_value( false )
->implicit_value( true ),
"Run all kernels (benchmark mode)")
("tol,t",
boost::program_options::value<float>()->default_value( 1e-6 ),
"Set the default error tolerance for tests")
("vlen,v",
boost::program_options::value<int>()->default_value( 131071 ),
"Set the default vector length for tests") // default is a mersenne prime
("iter,i",
boost::program_options::value<int>()->default_value( 1987 ),
"Set the default number of test iterations per kernel")
("tests-regex,R",
boost::program_options::value<std::string>(),
"Run tests matching regular expression.")
("update,u",
boost::program_options::value<bool>()->default_value( false )
->implicit_value( true ),
"Run only kernels missing from config; use -R to further restrict the candidates")
("dry-run,n",
boost::program_options::value<bool>()->default_value( false )
->implicit_value( true ),
"Dry run. Respect other options, but don't write to file")
("json,j",
boost::program_options::value<std::string>(),
"JSON output file")
;
// Handle the options that were given
boost::program_options::variables_map vm;
bool benchmark_mode;
std::string kernel_regex;
std::ofstream json_file;
float def_tol;
lv_32fc_t def_scalar;
int def_iter;
int def_vlen;
bool def_benchmark_mode;
std::string def_kernel_regex;
bool update_mode = false;
bool dry_run = false;
// Handle the provided options
try {
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
boost::program_options::notify(vm);
benchmark_mode = vm.count("benchmark")?vm["benchmark"].as<bool>():false;
if ( vm.count("tests-regex" ) ) {
kernel_regex = vm["tests-regex"].as<std::string>();
}
else {
kernel_regex = ".*";
}
def_tol = vm["tol"].as<float>();
def_scalar = 327.0;
def_vlen = vm["vlen"].as<int>();
def_iter = vm["iter"].as<int>();
def_benchmark_mode = benchmark_mode;
def_kernel_regex = kernel_regex;
update_mode = vm["update"].as<bool>();
dry_run = vm["dry-run"].as<bool>();
}
catch (boost::program_options::error& error) {
std::cerr << "Error: " << error.what() << std::endl << std::endl;
std::cerr << desc << std::endl;
return 1;
}
/** --help option */
if ( vm.count("help") ) {
std::cout << "The VOLK profiler." << std::endl
<< desc << std::endl;
return 0;
}
if ( vm.count("json") ) {
std::string filename;
try {
filename = vm["json"].as<std::string>();
}
catch (boost::bad_any_cast& error) {
std::cerr << error.what() << std::endl;
return 1;
}
json_file.open( filename.c_str() );
}
volk_gnsssdr_test_params_t test_params(def_tol, def_scalar, def_vlen, def_iter,
def_benchmark_mode, def_kernel_regex);
// Run tests
std::vector<volk_gnsssdr_test_results_t> results;
if(update_mode) {
read_results(&results);
}
// Initialize the list of tests
// the default test parameters come from options
std::vector<volk_gnsssdr_test_case_t> test_cases = init_test_list(test_params);
boost::xpressive::sregex kernel_expression;
try {
kernel_expression = boost::xpressive::sregex::compile(kernel_regex);
}
catch (boost::xpressive::regex_error& error) {
std::cerr << "Error occured while compiling regex" << std::endl << std::endl;
return 1;
}
// Iteratate through list of tests running each one
for(unsigned int ii = 0; ii < test_cases.size(); ++ii) {
bool regex_match = true;
volk_gnsssdr_test_case_t test_case = test_cases[ii];
// if the kernel name matches regex then do the test
if(boost::xpressive::regex_search(test_case.name(), kernel_expression)) {
regex_match = true;
}
else {
regex_match = false;
}
// if we are in update mode check if we've already got results
// if we have any, then no need to test that kernel
bool update = true;
if(update_mode) {
for(unsigned int jj=0; jj < results.size(); ++jj) {
if(results[jj].name == test_case.name() ||
results[jj].name == test_case.puppet_master_name()) {
update = false;
break;
}
}
}
if( regex_match && update ) {
try {
run_volk_gnsssdr_tests(test_case.desc(), test_case.kernel_ptr(), test_case.name(),
test_case.test_parameters(), &results, test_case.puppet_master_name());
}
catch (std::string error) {
std::cerr << "Caught Exception in 'run_volk_gnsssdr_tests': " << error << std::endl;
}
}
}
// Output results according to provided options
if(vm.count("json")) {
write_json(json_file, results);
json_file.close();
}
if(!dry_run) {
write_results(&results, false);
}
else {
std::cout << "Warning: this was a dry-run. Config not generated" << std::endl;
}
}
void read_results(std::vector<volk_gnsssdr_test_results_t> *results)
{
char path[1024];
volk_gnsssdr_get_config_path(path);
const fs::path config_path(path);
if(fs::exists(config_path)) {
// a config exists and we are reading results from it
std::ifstream config(config_path.string().c_str());
char config_line[256];
while(config.getline(config_line, 255)) {
// tokenize the input line by kernel_name unaligned aligned
// then push back in the results vector with fields filled in
std::vector<std::string> single_kernel_result;
std::string config_str(config_line);
std::size_t str_size = config_str.size();
std::size_t found = 1;
found = config_str.find(" ");
// Split line by spaces
while(found && found < str_size) {
found = config_str.find(" ");
// kernel names MUST be less than 128 chars, which is
// a length restricted by volk_gnsssdr/volk_gnsssdr_prefs.c
// on the last token in the parsed string we won't find a space
// so make sure we copy at most 128 chars.
if(found > 127) {
found = 127;
}
str_size = config_str.size();
char buffer[128];
config_str.copy(buffer, found + 1, 0);
buffer[found] = '\0';
single_kernel_result.push_back(std::string(buffer));
config_str.erase(0, found+1);
}
if(single_kernel_result.size() == 3) {
volk_gnsssdr_test_results_t kernel_result;
kernel_result.name = std::string(single_kernel_result[0]);
kernel_result.config_name = std::string(single_kernel_result[0]);
kernel_result.best_arch_u = std::string(single_kernel_result[1]);
kernel_result.best_arch_a = std::string(single_kernel_result[2]);
results->push_back(kernel_result);
}
}
}
}
void write_results(const std::vector<volk_gnsssdr_test_results_t> *results, bool update_result)
{
char path[1024];
volk_gnsssdr_get_config_path(path);
const fs::path config_path(path);
// Until we can update the config on a kernel by kernel basis
// do not overwrite volk_gnsssdr_config when using a regex.
if (not fs::exists(config_path.branch_path()))
{
std::cout << "Creating " << config_path.branch_path() << "..." << std::endl;
fs::create_directories(config_path.branch_path());
}
std::ofstream config;
if(update_result) {
std::cout << "Updating " << config_path << "..." << std::endl;
config.open(config_path.string().c_str(), std::ofstream::app);
if (!config.is_open()) { //either we don't have write access or we don't have the dir yet
std::cout << "Error opening file " << config_path << std::endl;
}
}
else {
std::cout << "Writing " << config_path << "..." << std::endl;
config.open(config_path.string().c_str());
if (!config.is_open()) { //either we don't have write access or we don't have the dir yet
std::cout << "Error opening file " << config_path << std::endl;
}
config << "\
#this file is generated by volk_gnsssdr_profile.\n\
#the function name is followed by the preferred architecture.\n\
";
}
std::vector<volk_gnsssdr_test_results_t>::const_iterator profile_results;
for(profile_results = results->begin(); profile_results != results->end(); ++profile_results) {
config << profile_results->config_name << " "
<< profile_results->best_arch_a << " "
<< profile_results->best_arch_u << std::endl;
}
config.close();
}
void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_t> results)
{
json_file << "{" << std::endl;
json_file << " \"volk_tests\": [" << std::endl;
json_file << " \"volk_gnsssdr_tests\": [" << std::endl;
size_t len = results.size();
size_t i = 0;
BOOST_FOREACH(volk_gnsssdr_test_results_t &result, results) {
std::vector<volk_gnsssdr_test_results_t>::iterator result;
for(result = results.begin(); result != results.end(); ++result) {
json_file << " {" << std::endl;
json_file << " \"name\": \"" << result.name << "\"," << std::endl;
json_file << " \"vlen\": " << result.vlen << "," << std::endl;
json_file << " \"iter\": " << result.iter << "," << std::endl;
json_file << " \"best_arch_a\": \"" << result.best_arch_a
<< "\"," << std::endl;
json_file << " \"best_arch_u\": \"" << result.best_arch_u
<< "\"," << std::endl;
json_file << " \"name\": \"" << result->name << "\"," << std::endl;
json_file << " \"vlen\": " << (int)(result->vlen) << "," << std::endl;
json_file << " \"iter\": " << result->iter << "," << std::endl;
json_file << " \"best_arch_a\": \"" << result->best_arch_a
<< "\"," << std::endl;
json_file << " \"best_arch_u\": \"" << result->best_arch_u
<< "\"," << std::endl;
json_file << " \"results\": {" << std::endl;
size_t results_len = result.results.size();
size_t results_len = result->results.size();
size_t ri = 0;
typedef std::pair<std::string, volk_gnsssdr_test_time_t> tpair;
BOOST_FOREACH(tpair pair, result.results)
{
volk_gnsssdr_test_time_t time = pair.second;
std::map<std::string, volk_gnsssdr_test_time_t>::iterator kernel_time_pair;
for(kernel_time_pair = result->results.begin(); kernel_time_pair != result->results.end(); ++kernel_time_pair) {
volk_gnsssdr_test_time_t time = kernel_time_pair->second;
json_file << " \"" << time.name << "\": {" << std::endl;
json_file << " \"name\": \"" << time.name << "\"," << std::endl;
json_file << " \"time\": " << time.time << "," << std::endl;
json_file << " \"units\": \"" << time.units << "\"" << std::endl;
json_file << " }" ;
if(ri+1 != results_len)
{
json_file << ",";
}
if(ri+1 != results_len) {
json_file << ",";
}
json_file << std::endl;
ri++;
}
json_file << " }" << std::endl;
json_file << " }";
if(i+1 != len)
{
json_file << ",";
}
if(i+1 != len) {
json_file << ",";
}
json_file << std::endl;
i++;
}
@ -79,155 +346,4 @@ void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_
json_file << "}" << std::endl;
}
int main(int argc, char *argv[])
{
// Adding program options
boost::program_options::options_description desc("Options");
desc.add_options()
("help,h", "Print help messages")
("benchmark,b",
boost::program_options::value<bool>()->default_value( false )
->implicit_value( true ),
"Run all kernels (benchmark mode)")
("tests-regex,R",
boost::program_options::value<std::string>(),
"Run tests matching regular expression.")
("json,j",
boost::program_options::value<std::string>(),
"JSON output file")
;
// Handle the options that were given
boost::program_options::variables_map vm;
bool benchmark_mode;
std::string kernel_regex;
bool store_results = true;
std::ofstream json_file;
try {
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
boost::program_options::notify(vm);
benchmark_mode = vm.count("benchmark")?vm["benchmark"].as<bool>():false;
if ( vm.count("tests-regex" ) )
{
kernel_regex = vm["tests-regex"].as<std::string>();
store_results = false;
std::cout << "Warning: using a regexp will not save results to a config" << std::endl;
}
else
{
kernel_regex = ".*";
store_results = true;
}
} catch (boost::program_options::error& error)
{
std::cerr << "Error: " << error.what() << std::endl << std::endl;
std::cerr << desc << std::endl;
return 1;
}
/** --help option
*/
if ( vm.count("help") )
{
std::cout << "The GNSS-SDR VOLK profiler." << std::endl
<< desc << std::endl;
return 0;
}
if ( vm.count("json") )
{
json_file.open( vm["json"].as<std::string>().c_str() );
}
// Run tests
std::vector<volk_gnsssdr_test_results_t> results;
//VOLK_PROFILE(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_max_star_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_64u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, lv_32fc_t(1.0, 0.5), 204602, 1000, &results, benchmark_mode, kernel_regex);
//GNSS-SDR PROTO-KERNELS
//lv_32fc_t sfv = lv_cmake((float)1, (float)2);
//example: VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, sfv, 204602, 1000, &results, benchmark_mode, kernel_regex);
//CAN NOT BE TESTED YET BECAUSE VOLK MODULE DOES NOT SUPPORT IT:
//VOLK_PROFILE(volk_gnsssdr_s32f_x2_update_local_carrier_32fc, 1e-4, 0, 16007, 1, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc, 1e-4, 0, 7, 1, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_TEST_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_convert_16ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_convert_8ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_s32f_convert_8ic, 1e-4, 5, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8i_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8i_max_s8i, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_64f_accumulator_64f, 1e-4, 0, 16000, 1000, &results, benchmark_mode, kernel_regex);
// Until we can update the config on a kernel by kernel basis
// do not overwrite volk_config when using a regex.
if(store_results)
{
char path[1024];
volk_gnsssdr_get_config_path(path);
const fs::path config_path(path);
if (not fs::exists(config_path.branch_path()))
{
std::cout << "Creating " << config_path.branch_path() << "..." << std::endl;
fs::create_directories(config_path.branch_path());
}
std::cout << "Writing " << config_path << "..." << std::endl;
std::ofstream config(config_path.string().c_str());
if(!config.is_open())
{ //either we don't have write access or we don't have the dir yet
std::cout << "Error opening file " << config_path << std::endl;
}
config << " # this file is generated by volk_gnsssdr_profile.\n # the function name is followed by the preferred architecture.\n";
BOOST_FOREACH(volk_gnsssdr_test_results_t result, results)
{
config << result.config_name << " "
<< result.best_arch_a << " "
<< result.best_arch_u << std::endl;
}
config.close();
}
else
{
std::cout << "Warning: config not generated" << std::endl;
}
}

View File

@ -0,0 +1,34 @@
/*!
* \file volk_gnsssdr_profile.h
* \author Carles Fernandez-Prades, 2015. cfernandez(at)cttc.es
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
void read_results(std::vector<volk_gnsssdr_test_results_t> *results);
void write_results(const std::vector<volk_gnsssdr_test_results_t> *results, bool update_result);
void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_t> results);

View File

@ -1,6 +1,9 @@
FIND_PACKAGE(PkgConfig)
PKG_CHECK_MODULES(PC_ORC "orc-0.4 > 0.4.11")
FIND_PROGRAM(ORCC_EXECUTABLE orcc
HINTS ${PC_ORC_TOOLSDIR}
PATHS ${ORC_ROOT}/bin ${CMAKE_INSTALL_PREFIX}/bin)

View File

@ -0,0 +1,205 @@
# Copyright 2015 Free Software Foundation, Inc.
#
# This file is part of Volk
#
# Volk is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# Volk is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Volk; see the file COPYING. If not, write to the Free
# Software Foundation, Inc., 51 Franklin Street, Boston, MA
# 02110-1301, USA.
if(DEFINED __INCLUDED_VOLK_ADD_TEST)
return()
endif()
set(__INCLUDED_VOLK_ADD_TEST TRUE)
########################################################################
# Add a unit test and setup the environment for it.
# Encloses ADD_TEST, with additional functionality to create a shell
# script that sets the environment to gain access to in-build binaries
# properly. The following variables are used to pass in settings:
#
# NAME - the test name
# SOURCES - sources for the test
# TARGET_DEPS - build target dependencies (e.g., libraries)
# EXTRA_LIB_DIRS - other directories for the library path
# ENVIRONS - other environment key/value pairs
# ARGS - arguments for the test
########################################################################
function(VOLK_ADD_TEST test_name)
#parse the arguments for component names
include(CMakeParseArgumentsCopy)
CMAKE_PARSE_ARGUMENTS(VOLK_TEST "" "" "SOURCES;TARGET_DEPS;EXTRA_LIB_DIRS;ENVIRONS;ARGS" ${ARGN})
#set the initial environs to use
set(environs ${VOLK_TEST_ENVIRONS})
#create the initial library path
file(TO_NATIVE_PATH "${VOLK_TEST_EXTRA_LIB_DIRS}" libpath)
#set the source directory, which is mostly FYI
file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR} srcdir)
list(APPEND environs "srcdir=\"${srcdir}\"")
#http://www.cmake.org/pipermail/cmake/2009-May/029464.html
#Replaced this add test + set environs code with the shell script generation.
#Its nicer to be able to manually run the shell script to diagnose problems.
if(UNIX)
if(APPLE)
set(LD_PATH_VAR "DYLD_LIBRARY_PATH")
else()
set(LD_PATH_VAR "LD_LIBRARY_PATH")
endif()
#create a list of target directories to be determined by the
#"add_test" command, via the $<FOO:BAR> operator; make sure the
#test's directory is first, since it ($1) is prepended to PATH.
unset(TARGET_DIR_LIST)
foreach(target ${test_name} ${VOLK_TEST_TARGET_DEPS})
list(APPEND TARGET_DIR_LIST "\$<TARGET_FILE_DIR:${target}>")
endforeach()
#augment the PATH to start with the directory of the test
set(binpath "\"$1:\$PATH\"")
list(APPEND environs "PATH=${binpath}")
#set the shell to use
if(CMAKE_CROSSCOMPILING)
set(SHELL "/bin/sh")
else()
find_program(SHELL sh)
endif()
#check to see if the shell supports "$*" expansion with IFS
if(NOT TESTED_SHELL_SUPPORTS_IFS)
set(TESTED_SHELL_SUPPORTS_IFS TRUE CACHE BOOL "")
set(sh_file ${CMAKE_CURRENT_BINARY_DIR}/ifs_test.sh)
file(WRITE ${sh_file} "#!${SHELL}\n")
file(APPEND ${sh_file} "export IFS=:\n")
file(APPEND ${sh_file} "echo \"$*\"\n")
#make the shell file executable
execute_process(COMMAND chmod +x ${sh_file})
#execute the shell script
execute_process(COMMAND ${sh_file} "a" "b" "c"
OUTPUT_VARIABLE output OUTPUT_STRIP_TRAILING_WHITESPACE
)
#check the output to see if it is correct
string(COMPARE EQUAL ${output} "a:b:c" SHELL_SUPPORTS_IFS)
set(SHELL_SUPPORTS_IFS ${SHELL_SUPPORTS_IFS} CACHE BOOL
"Set this value to TRUE if the shell supports IFS argument expansion"
)
endif()
unset(testlibpath)
if(SHELL_SUPPORTS_IFS)
#"$*" expands in the shell into a list of all of the arguments
#to the shell script, concatenated using the character provided
#in ${IFS}.
list(APPEND testlibpath "$*")
else()
#shell does not support IFS expansion; use a loop instead
list(APPEND testlibpath "\${LL}")
endif()
#finally: add in the current library path variable
list(INSERT libpath 0 ${testlibpath})
list(APPEND libpath "$${LD_PATH_VAR}")
#replace list separator with the path separator
string(REPLACE ";" ":" libpath "${libpath}")
list(APPEND environs "${LD_PATH_VAR}=\"${libpath}\"")
#generate a shell script file that sets the environment and runs the test
set(sh_file ${CMAKE_CURRENT_BINARY_DIR}/${test_name}_test.sh)
file(WRITE ${sh_file} "#!${SHELL}\n")
if(SHELL_SUPPORTS_IFS)
file(APPEND ${sh_file} "export IFS=:\n")
else()
file(APPEND ${sh_file} "LL=\"$1\" && for tf in \"\$@\"; do LL=\"\${LL}:\${tf}\"; done\n")
endif()
#each line sets an environment variable
foreach(environ ${environs})
file(APPEND ${sh_file} "export ${environ}\n")
endforeach(environ)
#redo the test args to have a space between each
string(REPLACE ";" " " VOLK_TEST_ARGS "${VOLK_TEST_ARGS}")
#finally: append the test name to execute
file(APPEND ${sh_file} ${test_name} " " ${VOLK_TEST_ARGS} "\n")
#make the shell file executable
execute_process(COMMAND chmod +x ${sh_file})
add_executable(${test_name} ${VOLK_TEST_SOURCES})
target_link_libraries(${test_name} ${VOLK_TEST_TARGET_DEPS})
#add the shell file as the test to execute;
#use the form that allows for $<FOO:BAR> substitutions,
#then combine the script arguments inside the script.
add_test(NAME qa_${test_name}
COMMAND ${SHELL} ${sh_file} ${TARGET_DIR_LIST}
)
endif(UNIX)
if(WIN32)
#In the land of windows, all libraries must be in the PATH. Since
#the dependent libraries are not yet installed, we must manually
#set them in the PATH to run tests. The following appends the
#path of a target dependency.
#
#NOTE: get_target_property LOCATION is being deprecated as of
#CMake 3.2.0, which just prints a warning & notes that this
#functionality will be removed in the future. Leave it here for
#now until someone can figure out how to do this in Windows.
foreach(target ${test_name} ${VOLK_TEST_TARGET_DEPS})
get_target_property(location ${target} LOCATION)
if(location)
get_filename_component(path ${location} PATH)
string(REGEX REPLACE "\\$\\(.*\\)" ${CMAKE_BUILD_TYPE} path ${path})
list(APPEND libpath ${path})
endif(location)
endforeach(target)
list(APPEND libpath ${DLL_PATHS} "%PATH%")
#replace list separator with the path separator (escaped)
string(REPLACE ";" "\\;" libpath "${libpath}")
list(APPEND environs "PATH=${libpath}")
#generate a bat file that sets the environment and runs the test
set(bat_file ${CMAKE_CURRENT_BINARY_DIR}/${test_name}_test.bat)
file(WRITE ${bat_file} "@echo off\n")
#each line sets an environment variable
foreach(environ ${environs})
file(APPEND ${bat_file} "SET ${environ}\n")
endforeach(environ)
#redo the test args to have a space between each
string(REPLACE ";" " " VOLK_TEST_ARGS "${VOLK_TEST_ARGS}")
#finally: append the test name to execute
file(APPEND ${bat_file} ${test_name} " " ${VOLK_TEST_ARGS} "\n")
file(APPEND ${bat_file} "\n")
add_executable(${test_name} ${VOLK_TEST_SOURCES})
target_link_libraries(${test_name} ${VOLK_TEST_TARGET_DEPS})
add_test(${test_name} ${bat_file})
endif(WIN32)
endfunction(VOLK_ADD_TEST)

View File

@ -86,7 +86,7 @@ set(Boost_NOGO_VERSIONS
)
foreach(ver ${Boost_NOGO_VERSIONS})
if(${Boost_VERSION} EQUAL ${ver})
if("${Boost_VERSION}" STREQUAL "${ver}")
if(NOT ENABLE_BAD_BOOST)
MESSAGE(STATUS "WARNING: Found a known bad version of Boost (v${Boost_VERSION}). Disabling.")
set(Boost_FOUND FALSE)
@ -94,5 +94,5 @@ foreach(ver ${Boost_NOGO_VERSIONS})
MESSAGE(STATUS "WARNING: Found a known bad version of Boost (v${Boost_VERSION}). Continuing anyway.")
set(Boost_FOUND TRUE)
endif(NOT ENABLE_BAD_BOOST)
endif(${Boost_VERSION} EQUAL ${ver})
endif("${Boost_VERSION}" STREQUAL "${ver}")
endforeach(ver)

View File

@ -0,0 +1,189 @@
# Copyright 2014 Free Software Foundation, Inc.
#
# This file is part of VOLK
#
# VOLK is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# VOLK is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
if(DEFINED __INCLUDED_VOLK_BUILD_TYPES_CMAKE)
return()
endif()
set(__INCLUDED_VOLK_BUILD_TYPES_CMAKE TRUE)
# Standard CMake Build Types and their basic CFLAGS:
# - None: nothing set
# - Debug: -O2 -g
# - Release: -O3
# - RelWithDebInfo: -O3 -g
# - MinSizeRel: -Os
# Addtional Build Types, defined below:
# - NoOptWithASM: -O0 -g -save-temps
# - O2WithASM: -O2 -g -save-temps
# - O3WithASM: -O3 -g -save-temps
# - DebugParanoid -O0 -g -Werror
# Defines the list of acceptable cmake build types. When adding a new
# build type below, make sure to add it to this list.
list(APPEND AVAIL_BUILDTYPES
None Debug Release RelWithDebInfo MinSizeRel
DebugParanoid NoOptWithASM O2WithASM O3WithASM
)
########################################################################
# VOLK_CHECK_BUILD_TYPE(build type)
#
# Use this to check that the build type set in CMAKE_BUILD_TYPE on the
# commandline is one of the valid build types used by this project. It
# checks the value set in the cmake interface against the list of
# known build types in AVAIL_BUILDTYPES. If the build type is found,
# the function exits immediately. If nothing is found by the end of
# checking all available build types, we exit with an error and list
# the avialable build types.
########################################################################
function(VOLK_CHECK_BUILD_TYPE settype)
STRING(TOUPPER ${settype} _settype)
foreach(btype ${AVAIL_BUILDTYPES})
STRING(TOUPPER ${btype} _btype)
if(${_settype} STREQUAL ${_btype})
return() # found it; exit cleanly
endif(${_settype} STREQUAL ${_btype})
endforeach(btype)
# Build type not found; error out
message(FATAL_ERROR "Build type '${settype}' not valid, must be one of: ${AVAIL_BUILDTYPES}")
endfunction(VOLK_CHECK_BUILD_TYPE)
########################################################################
# For GCC and Clang, we can set a build type:
#
# -DCMAKE_BUILD_TYPE=DebugParanoid
#
# This type uses no optimization (-O0), outputs debug symbols (-g), warns
# on everything, and stops on warnings.
# NOTE: This is not defined on Windows systems.
########################################################################
if(NOT WIN32)
SET(CMAKE_CXX_FLAGS_DEBUGPARANOID "-Wall -Wextra -g -O0" CACHE STRING
"Flags used by the C++ compiler during DebugParanoid builds." FORCE)
SET(CMAKE_C_FLAGS_DEBUGPARANOID "-Wall -Wextra -g -O0" CACHE STRING
"Flags used by the C compiler during DebugParanoid builds." FORCE)
SET(CMAKE_EXE_LINKER_FLAGS_DEBUGPARANOID
"-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING
"Flags used for linking binaries during NoOptWithASM builds." FORCE)
SET(CMAKE_SHARED_LINKER_FLAGS_DEBUGPARANOID
"-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING
"Flags used by the shared lib linker during NoOptWithASM builds." FORCE)
MARK_AS_ADVANCED(
CMAKE_CXX_FLAGS_DEBUGPARANOID
CMAKE_C_FLAGS_DEBUGPARANOID
CMAKE_EXE_LINKER_FLAGS_DEBUGPARANOID
CMAKE_SHARED_LINKER_DEBUGPARANOID)
endif(NOT WIN32)
########################################################################
# For GCC and Clang, we can set a build type:
#
# -DCMAKE_BUILD_TYPE=NoOptWithASM
#
# This type uses no optimization (-O0), outputs debug symbols (-g) and
# outputs all intermediary files the build system produces, including
# all assembly (.s) files. Look in the build directory for these
# files.
# NOTE: This is not defined on Windows systems.
########################################################################
if(NOT WIN32)
SET(CMAKE_CXX_FLAGS_NOOPTWITHASM "-save-temps -g -O0" CACHE STRING
"Flags used by the C++ compiler during NoOptWithASM builds." FORCE)
SET(CMAKE_C_FLAGS_NOOPTWITHASM "-save-temps -g -O0" CACHE STRING
"Flags used by the C compiler during NoOptWithASM builds." FORCE)
SET(CMAKE_EXE_LINKER_FLAGS_NOOPTWITHASM
"-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING
"Flags used for linking binaries during NoOptWithASM builds." FORCE)
SET(CMAKE_SHARED_LINKER_FLAGS_NOOPTWITHASM
"-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING
"Flags used by the shared lib linker during NoOptWithASM builds." FORCE)
MARK_AS_ADVANCED(
CMAKE_CXX_FLAGS_NOOPTWITHASM
CMAKE_C_FLAGS_NOOPTWITHASM
CMAKE_EXE_LINKER_FLAGS_NOOPTWITHASM
CMAKE_SHARED_LINKER_FLAGS_NOOPTWITHASM)
endif(NOT WIN32)
########################################################################
# For GCC and Clang, we can set a build type:
#
# -DCMAKE_BUILD_TYPE=O2WithASM
#
# This type uses level 2 optimization (-O2), outputs debug symbols
# (-g) and outputs all intermediary files the build system produces,
# including all assembly (.s) files. Look in the build directory for
# these files.
# NOTE: This is not defined on Windows systems.
########################################################################
if(NOT WIN32)
SET(CMAKE_CXX_FLAGS_O2WITHASM "-save-temps -g -O2" CACHE STRING
"Flags used by the C++ compiler during O2WithASM builds." FORCE)
SET(CMAKE_C_FLAGS_O2WITHASM "-save-temps -g -O2" CACHE STRING
"Flags used by the C compiler during O2WithASM builds." FORCE)
SET(CMAKE_EXE_LINKER_FLAGS_O2WITHASM
"-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING
"Flags used for linking binaries during O2WithASM builds." FORCE)
SET(CMAKE_SHARED_LINKER_FLAGS_O2WITHASM
"-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING
"Flags used by the shared lib linker during O2WithASM builds." FORCE)
MARK_AS_ADVANCED(
CMAKE_CXX_FLAGS_O2WITHASM
CMAKE_C_FLAGS_O2WITHASM
CMAKE_EXE_LINKER_FLAGS_O2WITHASM
CMAKE_SHARED_LINKER_FLAGS_O2WITHASM)
endif(NOT WIN32)
########################################################################
# For GCC and Clang, we can set a build type:
#
# -DCMAKE_BUILD_TYPE=O3WithASM
#
# This type uses level 3 optimization (-O3), outputs debug symbols
# (-g) and outputs all intermediary files the build system produces,
# including all assembly (.s) files. Look in the build directory for
# these files.
# NOTE: This is not defined on Windows systems.
########################################################################
if(NOT WIN32)
SET(CMAKE_CXX_FLAGS_O3WITHASM "-save-temps -g -O3" CACHE STRING
"Flags used by the C++ compiler during O3WithASM builds." FORCE)
SET(CMAKE_C_FLAGS_O3WITHASM "-save-temps -g -O3" CACHE STRING
"Flags used by the C compiler during O3WithASM builds." FORCE)
SET(CMAKE_EXE_LINKER_FLAGS_O3WITHASM
"-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING
"Flags used for linking binaries during O3WithASM builds." FORCE)
SET(CMAKE_SHARED_LINKER_FLAGS_O3WITHASM
"-Wl,--warn-unresolved-symbols,--warn-once" CACHE STRING
"Flags used by the shared lib linker during O3WithASM builds." FORCE)
MARK_AS_ADVANCED(
CMAKE_CXX_FLAGS_O3WITHASM
CMAKE_C_FLAGS_O3WITHASM
CMAKE_EXE_LINKER_FLAGS_O3WITHASM
CMAKE_SHARED_LINKER_FLAGS_O3WITHASM)
endif(NOT WIN32)

View File

@ -1,9 +1,9 @@
INCLUDE(FindPkgConfig)
PKG_CHECK_MODULES(PC_VOLK volk_gnsssdr)
PKG_CHECK_MODULES(PC_VOLK volk_gnsssdr_module)
FIND_PATH(
VOLK_INCLUDE_DIRS
NAMES volk_gnsssdr/volk_gnsssdr.h
NAMES volk_gnsssdr_module/volk_gnsssdr_module.h
HINTS $ENV{VOLK_DIR}/include
${PC_VOLK_INCLUDEDIR}
PATHS /usr/local/include
@ -12,7 +12,7 @@ FIND_PATH(
FIND_LIBRARY(
VOLK_LIBRARIES
NAMES volk_gnsssdr
NAMES volk_gnsssdr_module
HINTS $ENV{VOLK_DIR}/lib
${PC_VOLK_LIBDIR}
PATHS /usr/local/lib

View File

@ -0,0 +1,34 @@
# Copyright 2014 Free Software Foundation, Inc.
#
# This file is part of VOLK.
#
# VOLK is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# VOLK is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with VOLK; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
set(MAJOR_VERSION @VERSION_INFO_MAJOR_VERSION@)
set(MINOR_VERSION @VERSION_INFO_MINOR_VERSION@)
set(MAINT_VERSION @VERSION_INFO_MAINT_VERSION@)
set(PACKAGE_VERSION
${MAJOR_VERSION}.${MINOR_VERSION}.${MAINT_VERSION})
if(${PACKAGE_FIND_VERSION_MAJOR} EQUAL ${MAJOR_VERSION})
if(${PACKAGE_FIND_VERSION_MINOR} EQUAL ${MINOR_VERSION})
if(NOT ${PACKAGE_FIND_VERSION_PATCH} GREATER ${MAINT_VERSION})
set(PACKAGE_VERSION_EXACT 1) # exact match for API version
set(PACKAGE_VERSION_COMPATIBLE 1) # compat for minor/patch version
endif(NOT ${PACKAGE_FIND_VERSION_PATCH} GREATER ${MINOR_VERSION})
endif(${PACKAGE_FIND_VERSION_MINOR} EQUAL ${API_COMPAT})
endif(${PACKAGE_FIND_VERSION_MAJOR} EQUAL ${MAJOR_VERSION})

View File

@ -71,7 +71,7 @@ endif(PYTHON_EXECUTABLE)
# - have the result variable to set
########################################################################
macro(VOLK_PYTHON_CHECK_MODULE desc mod cmd have)
#message(STATUS "")
message(STATUS "")
message(STATUS "Python checking for ${desc}")
execute_process(
COMMAND ${PYTHON_EXECUTABLE} -c "
@ -97,11 +97,13 @@ endmacro(VOLK_PYTHON_CHECK_MODULE)
########################################################################
# Sets the python installation directory VOLK_PYTHON_DIR
########################################################################
if(NOT DEFINED VOLK_PYTHON_DIR)
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "
from distutils import sysconfig
print sysconfig.get_python_lib(plat_specific=True, prefix='')
" OUTPUT_VARIABLE VOLK_PYTHON_DIR OUTPUT_STRIP_TRAILING_WHITESPACE
)
endif()
file(TO_CMAKE_PATH ${VOLK_PYTHON_DIR} VOLK_PYTHON_DIR)
########################################################################

View File

@ -0,0 +1,89 @@
# Copyright 2014 Free Software Foundation, Inc.
#
# This file is part of VOLK.
#
# VOLK is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# VOLK is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with VOLK; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
if(DEFINED __INCLUDED_VOLK_VERSION_CMAKE)
return()
endif()
set(__INCLUDED_VOLK_VERSION_CMAKE TRUE)
#eventually, replace version.sh and fill in the variables below
set(MAJOR_VERSION ${VERSION_INFO_MAJOR_VERSION})
set(MINOR_VERSION ${VERSION_INFO_MINOR_VERSION})
set(MAINT_VERSION ${VERSION_INFO_MAINT_VERSION})
########################################################################
# Extract the version string from git describe.
########################################################################
find_package(Git)
if(GIT_FOUND AND EXISTS ${CMAKE_SOURCE_DIR}/.git)
message(STATUS "Extracting version information from git describe...")
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --always --abbrev=8 --long
OUTPUT_VARIABLE GIT_DESCRIBE OUTPUT_STRIP_TRAILING_WHITESPACE
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
else()
if(NOT VOLK_GIT_COUNT)
set(VOLK_GIT_COUNT "0")
endif()
if(NOT VOLK_GIT_HASH)
set(VOLK_GIT_HASH "unknown")
endif()
set(GIT_DESCRIBE "v${MAJOR_VERSION}.${MINOR_VERSION}-${VOLK_GIT_COUNT}-${VOLK_GIT_HASH}")
endif()
########################################################################
# Use the logic below to set the version constants
########################################################################
if("${MINOR_VERSION}" STREQUAL "git")
# VERSION: 1.0git-xxx-gxxxxxxxx
# DOCVER: 1.0git
# LIBVER: 1.0git
set(VERSION "${GIT_DESCRIBE}")
set(DOCVER "${MAJOR_VERSION}.0${MINOR_VERSION}")
set(LIBVER "${MAJOR_VERSION}.0${MINOR_VERSION}")
set(RC_MINOR_VERSION "0")
set(RC_MAINT_VERSION "0")
elseif("${MAINT_VERSION}" STREQUAL "git")
# VERSION: 1.xgit-xxx-gxxxxxxxx
# DOCVER: 1.xgit
# LIBVER: 1.xgit
set(VERSION "${GIT_DESCRIBE}")
set(DOCVER "${MAJOR_VERSION}.${MINOR_VERSION}${MAINT_VERSION}")
set(LIBVER "${MAJOR_VERSION}.${MINOR_VERSION}${MAINT_VERSION}")
math(EXPR RC_MINOR_VERSION "${MINOR_VERSION} - 1")
set(RC_MAINT_VERSION "0")
else()
# This is a numbered release.
# VERSION: 1.1{.x}
# DOCVER: 1.1{.x}
# LIBVER: 1.1{.x}
if("${MAINT_VERSION}" STREQUAL "0")
set(VERSION "${MAJOR_VERSION}.${MINOR_VERSION}")
else()
set(VERSION "${MAJOR_VERSION}.${MINOR_VERSION}.${MAINT_VERSION}")
endif()
set(DOCVER "${VERSION}")
set(LIBVER "${VERSION}")
set(RC_MINOR_VERSION ${MINOR_VERSION})
set(RC_MAINT_VERSION ${MAINT_VERSION})
endif()

View File

@ -4,12 +4,6 @@
<arch name="generic"> <!-- name is required-->
</arch>
<arch name="altivec">
<flag compiler="gnu">-maltivec</flag>
<alignment>16</alignment>
<check name="has_ppc"></check>
</arch>
<arch name="softfp">
<flag compiler="gnu">-mfloat-abi=softfp</flag>
</arch>
@ -87,6 +81,16 @@
<alignment>8</alignment>
</arch>
<arch name="fma">
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>12</param>
</check>
<flag compiler="gnu">-mfma</flag>
<alignment>32</alignment>
</arch>
<arch name="sse">
<check name="cpuid_x86_bit">
<param>3</param>
@ -201,4 +205,24 @@
<alignment>32</alignment>
</arch>
<arch name="avx2">
<check name="cpuid_count_x86_bit">
<param>7</param>
<param>0</param>
<param>1</param>
<param>5</param>
</check>
<!-- check to make sure that xgetbv is enabled in OS -->
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>27</param>
</check>
<!-- check to see that the OS has enabled AVX2 -->
<check name="get_avx2_enabled"></check>
<flag compiler="gnu">-mavx2</flag>
<flag compiler="msvc">/arch:AVX2</flag>
<alignment>32</alignment>
</arch>
</grammar>

View File

@ -4,16 +4,6 @@
<archs>generic orc|</archs>
</machine>
<!--
<machine name="mmx">
<archs>generic 32|64 mmx orc|</archs>
</machine>
<machine name="sse">
<archs>generic 32|64| mmx| sse orc|</archs>
</machine>
-->
<machine name="neon">
<archs>generic neon softfp|hardfp orc|</archs>
</machine>
@ -48,8 +38,9 @@
<archs>generic 32|64| mmx| sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount avx orc|</archs>
</machine>
<machine name="altivec">
<archs>generic altivec</archs>
<!-- trailing | bar means generate without either for MSVC -->
<machine name="avx2">
<archs>generic 32|64| mmx| sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount avx fma avx2 orc|</archs>
</machine>
</grammar>

View File

@ -34,7 +34,7 @@ class machine_class:
self.archs.append(arch)
self.arch_names.append(arch_name)
self.alignment = max(map(lambda a: a.alignment, self.archs))
def __repr__(self): return self.name
def register_machine(name, archs):

View File

@ -53,10 +53,10 @@ def __parse_tmpl(_tmpl, **kwargs):
defs.update(kwargs)
_tmpl = __escape_pre_processor(_tmpl)
_tmpl = """
/* this file was generated by volk_gnsssdr template utils, do not edit! */
""" + _tmpl
/* this file was generated by volk_gnsssdr template utils, do not edit! */
""" + _tmpl
return str(Template.Template(_tmpl, defs))
def main():
@ -64,9 +64,10 @@ def main():
parser.add_option('--input', type='string')
parser.add_option('--output', type='string')
(opts, args) = parser.parse_args()
output = __parse_tmpl(open(opts.input).read(), args=args)
if opts.output: open(opts.output, 'w').write(output)
else: print output
if __name__ == '__main__': main()

View File

@ -1,35 +1,33 @@
/*!
* \file constants.h
* \brief volk_gnsssdr constants
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
/* -*- c++ -*- */
/*
* Copyright 2006,2009,2013 Free Software Foundation, Inc.
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
* This file is part of GNU Radio
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#ifndef GNSS_SDR_VOLK_GNSSSDR_CONSTANTS_H
#define GNSS_SDR_VOLK_GNSSSDR_CONSTANTS_H
#ifndef INCLUDED_VOLK_GNSSSDR_CONSTANTS_H
#define INCLUDED_VOLK_GNSSSDR_CONSTANTS_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
__VOLK_DECL_BEGIN
VOLK_API char* volk_gnsssdr_prefix();
VOLK_API char* volk_gnsssdr_build_date();
VOLK_API char* volk_gnsssdr_version();
VOLK_API char* volk_gnsssdr_c_compiler();
VOLK_API char* volk_gnsssdr_compiler_flags();
@ -37,4 +35,4 @@ VOLK_API char* volk_gnsssdr_available_machines();
__VOLK_DECL_END
#endif /* GNSS_SDR_VOLK_GNSSSDR_CONSTANTS_H */
#endif /* INCLUDED_VOLK_GNSSSDR_CONSTANTS_H */

View File

@ -0,0 +1,71 @@
/*!
* \file volk_gnsssdr_avx_intrinsics.h
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* This file is intended to hold AVX intrinsics of intrinsics.
* They should be used in VOLK kernels to avoid copy-pasta.
*/
#ifndef INCLUDE_VOLK_VOLK_AVX_INTRINSICS_H_
#define INCLUDE_VOLK_VOLK_AVX_INTRINSICS_H_
#include <immintrin.h>
static inline __m256
_mm256_complexmul_ps(__m256 x, __m256 y)
{
__m256 yl, yh, tmp1, tmp2;
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr ...
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di ...
tmp1 = _mm256_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr ...
x = _mm256_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br ...
tmp2 = _mm256_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
return _mm256_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
}
static inline __m256
_mm256_conjugate_ps(__m256 x){
const __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
return _mm256_xor_ps(x, conjugator); // conjugate y
}
static inline __m256
_mm256_complexconjugatemul_ps(__m256 x, __m256 y){
y = _mm256_conjugate_ps(y);
return _mm256_complexmul_ps(x, y);
}
static inline __m256
_mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2){
__m256 complex1, complex2;
cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
return _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
}
static inline __m256
_mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2){
return _mm256_sqrt_ps(_mm256_magnitudesquared_ps(cplxValue1, cplxValue2));
}
#endif /* INCLUDE_VOLK_VOLK_AVX_INTRINSICS_H_ */

View File

@ -91,14 +91,11 @@
#include <inttypes.h>
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
#endif
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
#include <x86intrin.h>
#endif
union bit128{
uint8_t i8[16];
uint16_t i16[8];
uint32_t i[4];
float f[4];
@ -114,6 +111,21 @@ union bit128{
#endif
};
union bit256{
uint8_t i8[32];
uint16_t i16[16];
uint32_t i[8];
float f[8];
double d[4];
#ifdef LV_HAVE_AVX
__m256 float_vec;
__m256i int_vec;
__m256d double_vec;
#endif
};
#define bit128_p(x) ((union bit128 *)(x))
#define bit256_p(x) ((union bit256 *)(x))
#endif /* INCLUDED_LIBVOLK_COMMON_H */

View File

@ -109,6 +109,7 @@ typedef double complex lv_64fc_t;
#endif /* __cplusplus */
#endif /* INCLUDED_VOLK_COMPLEX_H */
#endif /* INCLUDE_VOLK_COMPLEX_H */

View File

@ -25,9 +25,8 @@
#ifndef INCLUDED_VOLK_MALLOC_H
#define INCLUDED_VOLK_MALLOC_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <stdlib.h>
#include "volk_gnsssdr/volk_gnsssdr_common.h"
__VOLK_DECL_BEGIN
@ -39,7 +38,7 @@ __VOLK_DECL_BEGIN
* memory that are guaranteed to be on an alignment, VOLK handles this
* itself. The volk_gnsssdr_malloc function behaves like malloc in that it
* returns a pointer to the allocated memory. However, it also takes
* in an alignment specification, which is usually something like 16 or
* in an alignment specfication, which is usually something like 16 or
* 32 to ensure that the aligned memory is located on a particular
* byte boundary for use with SIMD.
*
@ -60,7 +59,7 @@ VOLK_API void *volk_gnsssdr_malloc(size_t size, size_t alignment);
/*!
* \brief Free's memory allocated by volk_gnsssdr_malloc.
* \param aptr The aligned pointer allocated by volk_gnsssdr_malloc.
* \param aptr The aligned pointer allocaed by volk_gnsssdr_malloc.
*/
VOLK_API void volk_gnsssdr_free(void *aptr);

View File

@ -37,14 +37,15 @@ typedef struct volk_gnsssdr_arch_pref
char impl_u[128]; //best unaligned impl
} volk_gnsssdr_arch_pref_t;
/*!
* \brief get path to volk_gnsssdr_config profiling info
*/
////////////////////////////////////////////////////////////////////////
// get path to volk_gnsssdr_config profiling info;
// returns \0 in the argument on failure.
////////////////////////////////////////////////////////////////////////
VOLK_API void volk_gnsssdr_get_config_path(char *);
/*!
* \brief load prefs into global prefs struct
*/
////////////////////////////////////////////////////////////////////////
// load prefs into global prefs struct
////////////////////////////////////////////////////////////////////////
VOLK_API size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **);
__VOLK_DECL_END

View File

@ -0,0 +1,65 @@
/*!
* \file volk_gnsssdr_sse3_intrinsics.h
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* This file is intended to hold SSE3 intrinsics of intrinsics.
* They should be used in VOLK kernels to avoid copy-pasta.
*/
#ifndef INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
#define INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
#include <pmmintrin.h>
static inline __m128
_mm_complexmul_ps(__m128 x, __m128 y)
{
__m128 yl, yh, tmp1, tmp2;
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
return _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
}
static inline __m128
_mm_complexconjugatemul_ps(__m128 x, __m128 y)
{
const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
y = _mm_xor_ps(y, conjugator); // conjugate y
return _mm_complexmul_ps(x, y);
}
static inline __m128
_mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2){
cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
return _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
}
static inline __m128
_mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2){
return _mm_sqrt_ps(_mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2));
}
#endif /* INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_ */

View File

@ -0,0 +1,49 @@
/*!
* \file volk_gnsssdr_sse_intrinsics.h
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* This file is intended to hold SSE intrinsics of intrinsics.
* They should be used in VOLK kernels to avoid copy-pasta.
*/
#ifndef INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
#define INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
#include <xmmintrin.h>
static inline __m128
_mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2){
__m128 iValue, qValue;
// Arrange in i1i2i3i4 format
iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
// Arrange in q1q2q3q4 format
qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
iValue = _mm_mul_ps(iValue, iValue); // Square the I values
qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
return _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
}
static inline __m128
_mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2){
return _mm_sqrt_ps(_mm_magnitudesquared_ps(cplxValue1, cplxValue2));
}
#endif /* INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_ */

View File

@ -1,174 +0,0 @@
/*!
* \file CommonMacros.h
* \brief Common macros used inside the volk protokernels.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_CommonMacros_u_H
#define INCLUDED_gnsssdr_CommonMacros_u_H
#ifdef LV_HAVE_SSE4_1
/*!
\brief Macros for U_SSE4_1
*/
#ifndef CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1
#define CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(input1, input2, real, imag)\
imag = _mm_srli_si128 (input1, 2);\
imag = _mm_blend_epi16 (input2, imag, 85);\
real = _mm_slli_si128 (input2, 2);\
real = _mm_blend_epi16 (real, input1, 85);
#endif /* CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1 */
#ifndef CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1
#define CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_ps)\
input_i_1 = _mm_cvtepi16_epi32(input);\
input = _mm_srli_si128 (input, 8);\
input_i_2 = _mm_cvtepi16_epi32(input);\
output_i32 = _mm_add_epi32 (input_i_1, input_i_2);\
output_ps = _mm_cvtepi32_ps(output_i32);
#endif /* CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */
#ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1
#define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)\
input_i_1 = _mm_cvtepi8_epi32(input);\
input = _mm_srli_si128 (input, 4);\
input_i_2 = _mm_cvtepi8_epi32(input);\
input = _mm_srli_si128 (input, 4);\
output_i32_1 = _mm_add_epi32 (input_i_1, input_i_2);\
input_i_1 = _mm_cvtepi8_epi32(input);\
input = _mm_srli_si128 (input, 4);\
input_i_2 = _mm_cvtepi8_epi32(input);\
input = _mm_srli_si128 (input, 4);\
output_i32_2 = _mm_add_epi32 (input_i_1, input_i_2);\
output_i32 = _mm_add_epi32 (output_i32_1, output_i32_2);\
output_ps = _mm_cvtepi32_ps(output_i32);
#endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
/*!
\brief Macros for U_SSE2
*/
#ifdef LV_HAVE_SSSE3
/*!
\brief Macros for U_SSSE3
*/
#ifndef CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3
#define CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, x, check_sign_sequence, rearrange_sequence, y_aux, x_abs, real_output, imag_output)\
y_aux = _mm_sign_epi8 (y, x);\
y_aux = _mm_sign_epi8 (y_aux, check_sign_sequence);\
real_output = _mm_maddubs_epi16 (x_abs, y_aux);\
\
y_aux = _mm_shuffle_epi8 (y, rearrange_sequence);\
y_aux = _mm_sign_epi8 (y_aux, x);\
imag_output = _mm_maddubs_epi16 (x_abs, y_aux);
#endif /* CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3 */
#endif /* LV_HAVE_SSSE3 */
#ifndef CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2
#define CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
realx_mult_realy = _mm_mullo_epi16 (realx, realy);\
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);\
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);\
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);\
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);\
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
#endif /* CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2 */
#ifndef CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2
#define CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(input, mult1, real, imag)\
imag = _mm_srli_si128 (input, 1);\
imag = _mm_and_si128 (imag, mult1);\
real = _mm_and_si128 (input, mult1);
#endif /* CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2 */
#ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2
#define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2(input, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\
input_i_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), input);\
input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\
input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\
input_i_1 = _mm_srai_epi32(input_i_1, 24);\
input_i_2 = _mm_srai_epi32(input_i_2, 24);\
output_i32 = _mm_add_epi32(input_i_1, input_i_2);\
output_ps_1 = _mm_cvtepi32_ps(output_i32);\
\
input_i_1 = _mm_unpackhi_epi8(_mm_setzero_si128(), input);\
input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\
input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\
input_i_1 = _mm_srai_epi32(input_i_1, 24);\
input_i_2 = _mm_srai_epi32(input_i_2, 24);\
output_i32 = _mm_add_epi32(input_i_1, input_i_2);\
output_ps_2 = _mm_cvtepi32_ps(output_i32);
#endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2 */
#ifndef CM_8IC_CONTROLMINUS128_8IC_U_SSE2
#define CM_8IC_CONTROLMINUS128_8IC_U_SSE2(y, minus128, minus128control)\
minus128control = _mm_cmpeq_epi8 (y, minus128);\
y = _mm_sub_epi8 (y, minus128control);
#endif /* CM_8IC_CONTROLMINUS128_8IC_U_SSE2 */
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Macros for U_GENERIC
*/
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_CommonMacros_u_H */
#ifndef INCLUDED_gnsssdr_CommonMacros_a_H
#define INCLUDED_gnsssdr_CommonMacros_a_H
#ifdef LV_HAVE_SSE4_1
/*!
\brief Macros for A_SSE4_1
*/
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
/*!
\brief Macros for U_SSE2
*/
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Macros for A_GENERIC
*/
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_CommonMacros_a_H */

View File

@ -1,76 +0,0 @@
/*!
* \file CommonMacros_16ic_cw_corr_32fc.h
* \brief Common macros used inside the 16ic_cw_corr_32fc volk protokernels.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_u_H
#define INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_u_H
#include "CommonMacros/CommonMacros.h"
#ifdef LV_HAVE_SSE4_1
/*!
\brief Macros for U_SSE4_1
*/
#ifndef CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1
#define CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)\
CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)\
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)\
CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
#endif /* CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1 */
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Macros for U_GENERIC
*/
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_u_H */
#ifndef INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_a_H
#define INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_a_H
#ifdef LV_HAVE_SSE4_1
/*!
\brief Macros for A_SSE4_1
*/
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Macros for A_GENERIC
*/
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_CommonMacros_16ic_cw_corr_32fc_a_H */

View File

@ -1,115 +0,0 @@
/*!
* \file CommonMacros_8ic_cw_corr_32fc.h
* \brief Common macros used inside the 8ic_cw_corr_32fc volk protokernels.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H
#define INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H
#include "CommonMacros/CommonMacros.h"
#ifdef LV_HAVE_SSE4_1
/*!
\brief Macros for U_SSE4_1
*/
#ifndef CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1
#define CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)\
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)\
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
\
imag_output = _mm_slli_si128 (imag_output, 1);\
output = _mm_blendv_epi8 (imag_output, real_output, mult1);\
\
CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
#endif /* CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1 */
#ifndef CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1
#define CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)\
CM_8IC_CONTROLMINUS128_8IC_U_SSE2(y, minus128, minus128control)\
CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output)\
CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)\
CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
#endif /* CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1 */
#ifndef CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1
#define CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)\
CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output)\
CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(real_output, input_i_1, input_i_2, output_i32, real_output_ps)\
CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(imag_output, input_i_1, input_i_2, output_i32, imag_output_ps)
#endif /* CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1 */
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
/*!
\brief Macros for U_SSE2
*/
#ifndef CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2
#define CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)\
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(real_bb_signal_sample, imag_bb_signal_sample, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
\
real_output = _mm_and_si128 (real_output, mult1);\
imag_output = _mm_and_si128 (imag_output, mult1);\
imag_output = _mm_slli_si128 (imag_output, 1);\
output = _mm_or_si128 (real_output, imag_output);\
\
CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2(output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
#endif /* CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2 */
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Macros for U_GENERIC
*/
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_u_H */
#ifndef INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H
#define INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H
#ifdef LV_HAVE_SSE4_1
/*!
\brief Macros for A_SSE4_1
*/
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Macros for A_GENERIC
*/
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_CommonMacros_8ic_cw_corr_32fc_a_H */

View File

@ -1,34 +0,0 @@
####################################################################
Common Macros inside volk_gnsssdr module
####################################################################
First of all, sorry for making you need to read this: macros are evil, they can not be debugged, you do not know where the errors come from, syntax is annoying.. BUT this is the only way I found that allows to share one piece of code between various proto-kernels without performance penalties.
Inline functions have been tested, and they introduce a really small time penalty, but it becomes huge because of long loops, with thousands of samples.
####################################################################
Syntax
####################################################################
In order to allow better understanding of the code we created the macros with a specific syntax:
1) Inside CommonMacros.h you will find macros for common operations. we will explain the syntax with an example:
example: CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
First of all, you find the characters â"CM", which means CommonMacros. After that the type and the amount of inputs is placed: "_16IC_X4" (16 bits complex integers, four inputs). The syntax for type is the same as the one used with volk protokernels, refer to GNURadio documentation for more help. The it comes the name of the macro ("_SCALAR_PRODUCT"), and after that the type and the amount of outputs ("_16IC_X2"). Finally it is placed the SSE minimum version needed to run ("_U_SSE2"). In the arguments you will find (from left to right) the inputs (four inputs: realx, imagx, realy, imagy), some variables that the macro needs to work (realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy) and finally the outputs (two outputs: real_output, imag_output).
The variables that the macro needs are specified when calling it in order to avoid after-compile problems: if you want to use a macro you will need to declare all the variables it needs before, or you will not be able to compile.
2) Inside all the other headers, CommonMacros_XXXXXX.h you will find macros for a specific group of proto-kernels. The syntax is the same as the CommonMacros.h
####################################################################
Workflow
####################################################################
In order to use the macros easily, we usually test the code without macros inside a testing proto-kernel, where you are able to test it, debug it and use breakpoints.
When it works we place code inside a macro an I test it again.
####################################################################
Why macros
####################################################################
1) They are the only way we could find for sharing code between proto-kernels without performance penalty.
2) It is true that they are really difficult to debug, but if you work with them responsibly it is not so hard. Volk_gnsssdr checks all the SSE proto-kernels implementations results against the generic implementation results, so if your macro is not working you will appreciate it after profiling it.

View File

@ -1,67 +0,0 @@
########################################################################
# How to create custom kernel dispatchers
########################################################################
A kernel dispatcher is kernel implementation that calls other kernel implementations.
By default, a dispatcher is generated by the build system for every kernel such that:
* the best aligned implemention is called when all pointer arguments are aligned,
* and otherwise the best unaligned implementation is called.
The author of a VOLK kernel may create a custom dispatcher,
to be called in place of the automatically generated one.
A custom dispatcher may be useful to handle head and tail cases,
or to implement different alignment and bounds checking logic.
########################################################################
# Code for an example dispatcher w/ tail case
########################################################################
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#ifdef LV_HAVE_DISPATCHER
static inline void volk_gnsssdr_32f_x2_add_32f_dispatcher(float* cVector, const float* aVector, const float* bVector, unsigned int num_points)
{
const unsigned int num_points_r = num_points%4;
const unsigned int num_points_x = num_points - num_points_r;
if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(cVector, VOLK_OR_PTR(aVector, bVector))))
{
volk_gnsssdr_32f_x2_add_32f_a(cVector, aVector, bVector, num_points_x);
}
else
{
volk_gnsssdr_32f_x2_add_32f_u(cVector, aVector, bVector, num_points_x);
}
volk_gnsssdr_32f_x2_add_32f_g(cVector+num_points_x, aVector+num_points_x, bVector+num_points_x, num_points_r);
}
#endif //LV_HAVE_DISPATCHER
########################################################################
# Code for an example dispatcher w/ tail case and accumulator
########################################################################
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#ifdef LV_HAVE_DISPATCHER
static inline void volk_gnsssdr_32f_x2_dot_prod_32f_dispatcher(float * result, const float * input, const float * taps, unsigned int num_points)
{
const unsigned int num_points_r = num_points%16;
const unsigned int num_points_x = num_points - num_points_r;
if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(input, taps)))
{
volk_gnsssdr_32f_x2_dot_prod_32f_a(result, input, taps, num_points_x);
}
else
{
volk_gnsssdr_32f_x2_dot_prod_32f_u(result, input, taps, num_points_x);
}
float result_tail = 0;
volk_gnsssdr_32f_x2_dot_prod_32f_g(&result_tail, input+num_points_x, taps+num_points_x, num_points_r);
*result += result_tail;
}
#endif //LV_HAVE_DISPATCHER

View File

@ -1,461 +0,0 @@
/*!
* \file volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3.h
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 32 bits vectors
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* Early, Prompt, and Late correlation with 32 bits vectors (16 bits the
* real part and 16 bits the imaginary part):
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 32 bits vectors) It returns the input
* signal in base band (BB)
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 32 bits vectors), accumulating the results
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 32 bits vectors), accumulating the results
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 32 bits vectors), accumulating the results
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*! \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
__m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 real_output_ps, imag_output_ps;
float E_out_real = 0;
float E_out_imag = 0;
float P_out_real = 0;
float P_out_imag = 0;
float L_out_real = 0;
float L_out_imag = 0;
const lv_16sc_t* input_ptr = input;
const lv_16sc_t* carrier_ptr = carrier;
const lv_16sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_16sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_16sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_ps();
imag_E_code_acc = _mm_setzero_ps();
real_P_code_acc = _mm_setzero_ps();
imag_P_code_acc = _mm_setzero_ps();
real_L_code_acc = _mm_setzero_ps();
imag_L_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(unsigned int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x1 = _mm_lddqu_si128((__m128i*)input_ptr);
input_ptr += 4;
x2 = _mm_lddqu_si128((__m128i*)input_ptr);
y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
carrier_ptr += 4;
y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get early values
y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
E_code_ptr += 4;
y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
//Adds the float 32 results
real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
//Get prompt values
y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
P_code_ptr += 4;
y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
//Get late values
y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
L_code_ptr += 4;
y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
input_ptr += 4;
carrier_ptr += 4;
E_code_ptr += 4;
P_code_ptr += 4;
L_code_ptr += 4;
}
__VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
_mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<4; ++i)
{
E_out_real += real_E_dotProductVector[i];
E_out_imag += imag_E_dotProductVector[i];
P_out_real += real_P_dotProductVector[i];
P_out_imag += imag_P_dotProductVector[i];
L_out_real += real_L_dotProductVector[i];
L_out_imag += imag_L_dotProductVector[i];
}
*E_out_ptr = lv_cmake(E_out_real, E_out_imag);
*P_out_ptr = lv_cmake(P_out_real, P_out_imag);
*L_out_ptr = lv_cmake(L_out_real, L_out_imag);
}
lv_16sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
{
lv_16sc_t bb_signal_sample;
lv_16sc_t tmp1;
lv_16sc_t tmp2;
lv_16sc_t tmp3;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
tmp1 = bb_signal_sample * E_code[i];
tmp2 = bb_signal_sample * P_code[i];
tmp3 = bb_signal_sample * L_code[i];
// Now get early, late, and prompt values for each
*E_out += (lv_32fc_t)tmp1;
*P_out += (lv_32fc_t)tmp2;
*L_out += (lv_32fc_t)tmp3;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
__m128 real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 real_output_ps, imag_output_ps;
float E_out_real = 0;
float E_out_imag = 0;
float P_out_real = 0;
float P_out_imag = 0;
float L_out_real = 0;
float L_out_imag = 0;
const lv_16sc_t* input_ptr = input;
const lv_16sc_t* carrier_ptr = carrier;
const lv_16sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_16sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_16sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_ps();
imag_E_code_acc = _mm_setzero_ps();
real_P_code_acc = _mm_setzero_ps();
imag_P_code_acc = _mm_setzero_ps();
real_L_code_acc = _mm_setzero_ps();
imag_L_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(unsigned int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x1 = _mm_load_si128((__m128i*)input_ptr);
input_ptr += 4;
x2 = _mm_load_si128((__m128i*)input_ptr);
y1 = _mm_load_si128((__m128i*)carrier_ptr);
carrier_ptr += 4;
y2 = _mm_load_si128((__m128i*)carrier_ptr);
CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get early values
y1 = _mm_load_si128((__m128i*)E_code_ptr);
E_code_ptr += 4;
y2 = _mm_load_si128((__m128i*)E_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
//Adds the float 32 results
real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
//Get prompt values
y1 = _mm_load_si128((__m128i*)P_code_ptr);
P_code_ptr += 4;
y2 = _mm_load_si128((__m128i*)P_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
//Get late values
y1 = _mm_load_si128((__m128i*)L_code_ptr);
L_code_ptr += 4;
y2 = _mm_load_si128((__m128i*)L_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
input_ptr += 4;
carrier_ptr += 4;
E_code_ptr += 4;
P_code_ptr += 4;
L_code_ptr += 4;
}
__VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
_mm_store_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<4; ++i)
{
E_out_real += real_E_dotProductVector[i];
E_out_imag += imag_E_dotProductVector[i];
P_out_real += real_P_dotProductVector[i];
P_out_imag += imag_P_dotProductVector[i];
L_out_real += real_L_dotProductVector[i];
L_out_imag += imag_L_dotProductVector[i];
}
*E_out_ptr = lv_cmake(E_out_real, E_out_imag);
*P_out_ptr = lv_cmake(P_out_real, P_out_imag);
*L_out_ptr = lv_cmake(L_out_real, L_out_imag);
}
lv_16sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, unsigned int num_points)
{
lv_16sc_t bb_signal_sample;
lv_16sc_t tmp1;
lv_16sc_t tmp2;
lv_16sc_t tmp3;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
tmp1 = bb_signal_sample * E_code[i];
tmp2 = bb_signal_sample * P_code[i];
tmp3 = bb_signal_sample * L_code[i];
// Now get early, late, and prompt values for each
*E_out += (lv_32fc_t)tmp1;
*P_out += (lv_32fc_t)tmp2;
*L_out += (lv_32fc_t)tmp3;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3_a_H */

View File

@ -1,595 +0,0 @@
/*!
* \file volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5.h
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Very early, Early, Prompt, Late and very late correlation with 32 bits vectors and returns float32 values.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* Very Early, Early, Prompt, Late and Very Late correlation with 32 bits vectors (16 bits the
* real part and 16 bits the imaginary part) and accumulates into float32 values, returning them:
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 32 bits vectors) It returns the input
* signal in base band (BB)
* - Very Early values are calculated by multiplying the input signal in BB by the
* very early code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
* - Very Late values are calculated by multiplying the input signal in BB by the
* very late code (multiplication of 32 bits vectors), converting that to float32 and accumulating the results
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
__m128i realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
__m128 real_VE_code_acc, imag_VE_code_acc, real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc, real_VL_code_acc, imag_VL_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 real_output_ps, imag_output_ps;
float VE_out_real = 0;
float VE_out_imag = 0;
float E_out_real = 0;
float E_out_imag = 0;
float P_out_real = 0;
float P_out_imag = 0;
float L_out_real = 0;
float L_out_imag = 0;
float VL_out_real = 0;
float VL_out_imag = 0;
const lv_16sc_t* input_ptr = input;
const lv_16sc_t* carrier_ptr = carrier;
const lv_16sc_t* VE_code_ptr = VE_code;
lv_32fc_t* VE_out_ptr = VE_out;
const lv_16sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_16sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_16sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
const lv_16sc_t* VL_code_ptr = VL_code;
lv_32fc_t* VL_out_ptr = VL_out;
*VE_out_ptr = 0;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
*VL_out_ptr = 0;
real_VE_code_acc = _mm_setzero_ps();
imag_VE_code_acc = _mm_setzero_ps();
real_E_code_acc = _mm_setzero_ps();
imag_E_code_acc = _mm_setzero_ps();
real_P_code_acc = _mm_setzero_ps();
imag_P_code_acc = _mm_setzero_ps();
real_L_code_acc = _mm_setzero_ps();
imag_L_code_acc = _mm_setzero_ps();
real_VL_code_acc = _mm_setzero_ps();
imag_VL_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(unsigned int number = 0;number < sse_iters; number++){
//Perform the carrier wipe-off
x1 = _mm_lddqu_si128((__m128i*)input_ptr);
input_ptr += 4;
x2 = _mm_lddqu_si128((__m128i*)input_ptr);
y1 = _mm_lddqu_si128((__m128i*)carrier_ptr);
carrier_ptr += 4;
y2 = _mm_lddqu_si128((__m128i*)carrier_ptr);
CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get very early values
y1 = _mm_lddqu_si128((__m128i*)VE_code_ptr);
VE_code_ptr += 4;
y2 = _mm_lddqu_si128((__m128i*)VE_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VE_code_acc = _mm_add_ps (real_VE_code_acc, real_output_ps);
imag_VE_code_acc = _mm_add_ps (imag_VE_code_acc, imag_output_ps);
//Get early values
y1 = _mm_lddqu_si128((__m128i*)E_code_ptr);
E_code_ptr += 4;
y2 = _mm_lddqu_si128((__m128i*)E_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
//Get prompt values
y1 = _mm_lddqu_si128((__m128i*)P_code_ptr);
P_code_ptr += 4;
y2 = _mm_lddqu_si128((__m128i*)P_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
//Get late values
y1 = _mm_lddqu_si128((__m128i*)L_code_ptr);
L_code_ptr += 4;
y2 = _mm_lddqu_si128((__m128i*)L_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
//Get very late values
y1 = _mm_lddqu_si128((__m128i*)VL_code_ptr);
VL_code_ptr += 4;
y2 = _mm_lddqu_si128((__m128i*)VL_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VL_code_acc = _mm_add_ps (real_VL_code_acc, real_output_ps);
imag_VL_code_acc = _mm_add_ps (imag_VL_code_acc, imag_output_ps);
input_ptr += 4;
carrier_ptr += 4;
VE_code_ptr += 4;
E_code_ptr += 4;
P_code_ptr += 4;
L_code_ptr += 4;
VL_code_ptr += 4;
}
__VOLK_ATTR_ALIGNED(16) float real_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_VL_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VL_dotProductVector[4];
_mm_storeu_ps((float*)real_VE_dotProductVector,real_VE_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_VE_dotProductVector,imag_VE_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_VL_dotProductVector,real_VL_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_VL_dotProductVector,imag_VL_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<4; ++i)
{
VE_out_real += real_VE_dotProductVector[i];
VE_out_imag += imag_VE_dotProductVector[i];
E_out_real += real_E_dotProductVector[i];
E_out_imag += imag_E_dotProductVector[i];
P_out_real += real_P_dotProductVector[i];
P_out_imag += imag_P_dotProductVector[i];
L_out_real += real_L_dotProductVector[i];
L_out_imag += imag_L_dotProductVector[i];
VL_out_real += real_VL_dotProductVector[i];
VL_out_imag += imag_VL_dotProductVector[i];
}
*VE_out_ptr = lv_cmake(VE_out_real, VE_out_imag);
*E_out_ptr = lv_cmake(E_out_real, E_out_imag);
*P_out_ptr = lv_cmake(P_out_real, P_out_imag);
*L_out_ptr = lv_cmake(L_out_real, L_out_imag);
*VL_out_ptr = lv_cmake(VL_out_real, VL_out_imag);
}
lv_16sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*VE_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VE_code_ptr++));
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
*VL_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VL_code_ptr++));
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
{
lv_16sc_t bb_signal_sample;
lv_16sc_t tmp1;
lv_16sc_t tmp2;
lv_16sc_t tmp3;
lv_16sc_t tmp4;
lv_16sc_t tmp5;
bb_signal_sample = lv_cmake(0, 0);
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
tmp1 = bb_signal_sample * VE_code[i];
tmp2 = bb_signal_sample * E_code[i];
tmp3 = bb_signal_sample * P_code[i];
tmp4 = bb_signal_sample * L_code[i];
tmp5 = bb_signal_sample * VL_code[i];
// Now get early, late, and prompt values for each
*VE_out += (lv_32fc_t)tmp1;
*E_out += (lv_32fc_t)tmp2;
*P_out += (lv_32fc_t)tmp3;
*L_out += (lv_32fc_t)tmp4;
*VL_out += (lv_32fc_t)tmp5;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_16ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x1, x2, y1, y2, real_bb_signal_sample, imag_bb_signal_sample;
__m128i realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output;
__m128 real_VE_code_acc, imag_VE_code_acc, real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc, real_VL_code_acc, imag_VL_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 real_output_ps, imag_output_ps;
float VE_out_real = 0;
float VE_out_imag = 0;
float E_out_real = 0;
float E_out_imag = 0;
float P_out_real = 0;
float P_out_imag = 0;
float L_out_real = 0;
float L_out_imag = 0;
float VL_out_real = 0;
float VL_out_imag = 0;
const lv_16sc_t* input_ptr = input;
const lv_16sc_t* carrier_ptr = carrier;
const lv_16sc_t* VE_code_ptr = VE_code;
lv_32fc_t* VE_out_ptr = VE_out;
const lv_16sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_16sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_16sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
const lv_16sc_t* VL_code_ptr = VL_code;
lv_32fc_t* VL_out_ptr = VL_out;
*VE_out_ptr = 0;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
*VL_out_ptr = 0;
real_VE_code_acc = _mm_setzero_ps();
imag_VE_code_acc = _mm_setzero_ps();
real_E_code_acc = _mm_setzero_ps();
imag_E_code_acc = _mm_setzero_ps();
real_P_code_acc = _mm_setzero_ps();
imag_P_code_acc = _mm_setzero_ps();
real_L_code_acc = _mm_setzero_ps();
imag_L_code_acc = _mm_setzero_ps();
real_VL_code_acc = _mm_setzero_ps();
imag_VL_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(unsigned int number = 0;number < sse_iters; number++){
//Perform the carrier wipe-off
x1 = _mm_load_si128((__m128i*)input_ptr);
input_ptr += 4;
x2 = _mm_load_si128((__m128i*)input_ptr);
y1 = _mm_load_si128((__m128i*)carrier_ptr);
carrier_ptr += 4;
y2 = _mm_load_si128((__m128i*)carrier_ptr);
CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(x1, x2, realx, imagx)
CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(y1, y2, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get very early values
y1 = _mm_load_si128((__m128i*)VE_code_ptr);
VE_code_ptr += 4;
y2 = _mm_load_si128((__m128i*)VE_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VE_code_acc = _mm_add_ps (real_VE_code_acc, real_output_ps);
imag_VE_code_acc = _mm_add_ps (imag_VE_code_acc, imag_output_ps);
//Get early values
y1 = _mm_load_si128((__m128i*)E_code_ptr);
E_code_ptr += 4;
y2 = _mm_load_si128((__m128i*)E_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
//Get prompt values
y1 = _mm_load_si128((__m128i*)P_code_ptr);
P_code_ptr += 4;
y2 = _mm_load_si128((__m128i*)P_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
//Get late values
y1 = _mm_load_si128((__m128i*)L_code_ptr);
L_code_ptr += 4;
y2 = _mm_load_si128((__m128i*)L_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
//Get very late values
y1 = _mm_load_si128((__m128i*)VL_code_ptr);
VL_code_ptr += 4;
y2 = _mm_load_si128((__m128i*)VL_code_ptr);
CM_16IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y1, y2, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VL_code_acc = _mm_add_ps (real_VL_code_acc, real_output_ps);
imag_VL_code_acc = _mm_add_ps (imag_VL_code_acc, imag_output_ps);
input_ptr += 4;
carrier_ptr += 4;
VE_code_ptr += 4;
E_code_ptr += 4;
P_code_ptr += 4;
L_code_ptr += 4;
VL_code_ptr += 4;
}
__VOLK_ATTR_ALIGNED(16) float real_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_VL_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VL_dotProductVector[4];
_mm_store_ps((float*)real_VE_dotProductVector,real_VE_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_VE_dotProductVector,imag_VE_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_VL_dotProductVector,real_VL_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_VL_dotProductVector,imag_VL_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<4; ++i)
{
VE_out_real += real_VE_dotProductVector[i];
VE_out_imag += imag_VE_dotProductVector[i];
E_out_real += real_E_dotProductVector[i];
E_out_imag += imag_E_dotProductVector[i];
P_out_real += real_P_dotProductVector[i];
P_out_imag += imag_P_dotProductVector[i];
L_out_real += real_L_dotProductVector[i];
L_out_imag += imag_L_dotProductVector[i];
VL_out_real += real_VL_dotProductVector[i];
VL_out_imag += imag_VL_dotProductVector[i];
}
*VE_out_ptr = lv_cmake(VE_out_real, VE_out_imag);
*E_out_ptr = lv_cmake(E_out_real, E_out_imag);
*P_out_ptr = lv_cmake(P_out_real, P_out_imag);
*L_out_ptr = lv_cmake(L_out_real, L_out_imag);
*VL_out_ptr = lv_cmake(VL_out_real, VL_out_imag);
}
lv_16sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*VE_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VE_code_ptr++));
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
*VL_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VL_code_ptr++));
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Very Early, Early, Prompt, Late and Very Vate correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_16sc_t* input, const lv_16sc_t* carrier, const lv_16sc_t* VE_code, const lv_16sc_t* E_code, const lv_16sc_t* P_code, const lv_16sc_t* L_code, const lv_16sc_t* VL_code, unsigned int num_points)
{
lv_16sc_t bb_signal_sample;
lv_16sc_t tmp1;
lv_16sc_t tmp2;
lv_16sc_t tmp3;
lv_16sc_t tmp4;
lv_16sc_t tmp5;
bb_signal_sample = lv_cmake(0, 0);
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
tmp1 = bb_signal_sample * VE_code[i];
tmp2 = bb_signal_sample * E_code[i];
tmp3 = bb_signal_sample * P_code[i];
tmp4 = bb_signal_sample * L_code[i];
tmp5 = bb_signal_sample * VL_code[i];
// Now get early, late, and prompt values for each
*VE_out += (lv_32fc_t)tmp1;
*E_out += (lv_32fc_t)tmp2;
*P_out += (lv_32fc_t)tmp3;
*L_out += (lv_32fc_t)tmp4;
*VL_out += (lv_32fc_t)tmp5;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5_a_H */

View File

@ -1,303 +0,0 @@
/*!
* \file volk_gnsssdr_32fc_convert_16ic.h
* \brief Volk protokernel: converts float32 complex values to 16 integer complex values taking care of overflow
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#include "volk_gnsssdr/volk_gnsssdr_complex.h"
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H
#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
const unsigned int sse_iters = num_points/4;
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
float min_val = -32768;
float max_val = 32767;
__m128 inputVal1, inputVal2;
__m128i intInputVal1, intInputVal2;
__m128 ret1, ret2;
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
for(unsigned int i = 0;i < sse_iters; i++){
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
// Clip
ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
_mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 8;
}
for(unsigned int i = 0; i < (num_points%4)*2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
const unsigned int sse_iters = num_points/4;
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
float min_val = -32768;
float max_val = 32767;
__m128 inputVal1, inputVal2;
__m128i intInputVal1, intInputVal2;
__m128 ret1, ret2;
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
for(unsigned int i = 0;i < sse_iters; i++){
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
// Clip
ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
_mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 8;
}
for(unsigned int i = 0; i < (num_points%4)*2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
float min_val = -32768;
float max_val = 32767;
for(unsigned int i = 0; i < num_points*2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H
#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points){
const unsigned int sse_iters = num_points/4;
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
float min_val = -32768;
float max_val = 32767;
__m128 inputVal1, inputVal2;
__m128i intInputVal1, intInputVal2;
__m128 ret1, ret2;
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
for(unsigned int i = 0;i < sse_iters; i++)
{
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
// Clip
ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 8;
}
for(unsigned int i = 0; i < (num_points%4)*2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points/4;
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
float min_val = -32768;
float max_val = 32767;
__m128 inputVal1, inputVal2;
__m128i intInputVal1, intInputVal2;
__m128 ret1, ret2;
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
for(unsigned int i = 0;i < sse_iters; i++)
{
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
// Clip
ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
outputVectorPtr += 8;
}
for(unsigned int i = 0; i < (num_points%4)*2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_a_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
float min_val = -32768;
float max_val = 32767;
for(unsigned int i = 0; i < num_points*2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H */

View File

@ -1,240 +0,0 @@
/*!
* \file volk_gnsssdr_32fc_s32f_convert_8ic.h
* \brief Volk protokernel: converts float32 complex values to 8 integer complex values taking care of overflow
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#include "volk_gnsssdr/volk_gnsssdr_complex.h"
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H
#define INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_s32f_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points)
{
const unsigned int sse_iters = num_points/8;
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
__m128 invScalar = _mm_set_ps1(1.0/scalar);
float min_val = -128;
float max_val = 127;
__m128 inputVal1, inputVal2, inputVal3, inputVal4;
__m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
__m128i int8InputVal;
__m128 ret1, ret2, ret3, ret4;
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
for(unsigned int i = 0;i < sse_iters; i++)
{
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal3 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal4 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal1 = _mm_mul_ps(inputVal1, invScalar);
inputVal2 = _mm_mul_ps(inputVal2, invScalar);
inputVal3 = _mm_mul_ps(inputVal3, invScalar);
inputVal4 = _mm_mul_ps(inputVal4, invScalar);
// Clip
ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
intInputVal3 = _mm_cvtps_epi32(ret3);
intInputVal4 = _mm_cvtps_epi32(ret4);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
_mm_storeu_si128((__m128i*)outputVectorPtr, int8InputVal);
outputVectorPtr += 16;
}
float scaled = 0;
for(unsigned int i = 0; i < (num_points%4)*4; i++)
{
scaled = inputVectorPtr[i]/scalar;
if(scaled > max_val)
scaled = max_val;
else if(scaled < min_val)
scaled = min_val;
outputVectorPtr[i] = (int8_t)rintf(scaled);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_s32f_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points)
{
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
float scaled = 0;
float min_val = -128;
float max_val = 127;
for(unsigned int i = 0; i < num_points*2; i++)
{
scaled = (inputVectorPtr[i])/scalar;
if(scaled > max_val)
scaled = max_val;
else if(scaled < min_val)
scaled = min_val;
outputVectorPtr[i] = (int8_t)rintf(scaled);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H
#define INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_s32f_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points)
{
const unsigned int sse_iters = num_points/8;
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
__m128 invScalar = _mm_set_ps1(1.0/scalar);
float min_val = -128;
float max_val = 127;
__m128 inputVal1, inputVal2, inputVal3, inputVal4;
__m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
__m128i int8InputVal;
__m128 ret1, ret2, ret3, ret4;
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
for(unsigned int i = 0;i < sse_iters; i++)
{
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal3 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal4 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal1 = _mm_mul_ps(inputVal1, invScalar);
inputVal2 = _mm_mul_ps(inputVal2, invScalar);
inputVal3 = _mm_mul_ps(inputVal3, invScalar);
inputVal4 = _mm_mul_ps(inputVal4, invScalar);
// Clip
ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
intInputVal3 = _mm_cvtps_epi32(ret3);
intInputVal4 = _mm_cvtps_epi32(ret4);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
_mm_store_si128((__m128i*)outputVectorPtr, int8InputVal);
outputVectorPtr += 16;
}
float scaled = 0;
for(unsigned int i = 0; i < (num_points%4)*4; i++)
{
scaled = inputVectorPtr[i]/scalar;
if(scaled > max_val)
scaled = max_val;
else if(scaled < min_val)
scaled = min_val;
outputVectorPtr[i] = (int8_t)rintf(scaled);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_s32f_convert_8ic_a_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points)
{
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
float scaled = 0;
float min_val = -128;
float max_val = 127;
for(unsigned int i = 0; i < num_points*2; i++)
{
scaled = inputVectorPtr[i]/scalar;
if(scaled > max_val)
scaled = max_val;
else if(scaled < min_val)
scaled = min_val;
outputVectorPtr[i] = (int8_t)rintf(scaled);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H */

View File

@ -1,231 +0,0 @@
/*!
* \file volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc
* \brief Volk protokernel: replaces the tracking function for update_local_code
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that replaces the tracking function for update_local_code
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H
#define INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H
#include <float.h>
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
#include "volk_gnsssdr/volk_gnsssdr_complex.h"
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Takes the conjugate of a complex vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_sse4_1(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
__m128 tquot, fmod_num, fmod_result, associated_chip_index_array;
__m128 tcode_half_chips_array = _mm_set_ps (tcode_half_chips_input+3*code_phase_step_half_chips, tcode_half_chips_input+2*code_phase_step_half_chips, tcode_half_chips_input+code_phase_step_half_chips, tcode_half_chips_input);
__m128 code_phase_step_half_chips_array = _mm_set1_ps (code_phase_step_half_chips*4);
__m128 d_very_early_late_spc_chips_Multiplied_by_2 = _mm_set1_ps (2*d_very_early_late_spc_chips);
__m128 code_length_half_chips_array = _mm_set1_ps (code_length_half_chips);
__m128 twos = _mm_set1_ps (2);
__m128i associated_chip_index_array_int;
__VOLK_ATTR_ALIGNED(16) int32_t output[4];
for (unsigned int i = 0; i < sse_iters; i++)
{
//fmod = numer - tquot * denom; tquot = numer/denom truncated
//associated_chip_index = 2 + round(fmod(tcode_half_chips - 2*d_very_early_late_spc_chips, code_length_half_chips));
fmod_num = _mm_sub_ps (tcode_half_chips_array, d_very_early_late_spc_chips_Multiplied_by_2);
tquot = _mm_div_ps (fmod_num, code_length_half_chips_array);
tquot = _mm_round_ps (tquot, (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) );
fmod_result = _mm_sub_ps (fmod_num, _mm_mul_ps (tquot, code_length_half_chips_array));
associated_chip_index_array = _mm_round_ps (fmod_result, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC));
associated_chip_index_array = _mm_add_ps(twos, associated_chip_index_array);
associated_chip_index_array_int = _mm_cvtps_epi32 (associated_chip_index_array);
_mm_storeu_si128 ((__m128i*)output, associated_chip_index_array_int);
//d_very_early_code[i] = d_ca_code[associated_chip_index];
*d_very_early_code++ = d_ca_code[output[0]];
*d_very_early_code++ = d_ca_code[output[1]];
*d_very_early_code++ = d_ca_code[output[2]];
*d_very_early_code++ = d_ca_code[output[3]];
//tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
tcode_half_chips_array = _mm_add_ps (tcode_half_chips_array, code_phase_step_half_chips_array);
}
if (num_points % 4 != 0)
{
__VOLK_ATTR_ALIGNED(16) float tcode_half_chips_stored[4];
_mm_storeu_ps ((float*)tcode_half_chips_stored, tcode_half_chips_array);
int associated_chip_index;
float tcode_half_chips = tcode_half_chips_stored[0];
float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
for (unsigned int i = 0; i < num_points%4; i++)
{
associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
d_very_early_code[i] = d_ca_code[associated_chip_index];
tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
}
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Takes the conjugate of a complex vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_generic(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points)
{
int associated_chip_index;
float tcode_half_chips = tcode_half_chips_input;
float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
for (unsigned int i = 0; i < num_points; i++)
{
associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
d_very_early_code[i] = d_ca_code[associated_chip_index];
tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_H
#define INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Takes the conjugate of a complex vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_sse4_1(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
__m128 tquot, fmod_num, fmod_result, associated_chip_index_array;
__m128 tcode_half_chips_array = _mm_set_ps (tcode_half_chips_input+3*code_phase_step_half_chips, tcode_half_chips_input+2*code_phase_step_half_chips, tcode_half_chips_input+code_phase_step_half_chips, tcode_half_chips_input);
__m128 code_phase_step_half_chips_array = _mm_set1_ps (code_phase_step_half_chips*4);
__m128 d_very_early_late_spc_chips_Multiplied_by_2 = _mm_set1_ps (2*d_very_early_late_spc_chips);
__m128 code_length_half_chips_array = _mm_set1_ps (code_length_half_chips);
__m128 twos = _mm_set1_ps (2);
__m128i associated_chip_index_array_int;
__VOLK_ATTR_ALIGNED(16) int32_t output[4];
for (unsigned int i = 0; i < sse_iters; i++)
{
//fmod = numer - tquot * denom; tquot = numer/denom truncated
//associated_chip_index = 2 + round(fmod(tcode_half_chips - 2*d_very_early_late_spc_chips, code_length_half_chips));
fmod_num = _mm_sub_ps (tcode_half_chips_array, d_very_early_late_spc_chips_Multiplied_by_2);
tquot = _mm_div_ps (fmod_num, code_length_half_chips_array);
tquot = _mm_round_ps (tquot, (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) );
fmod_result = _mm_sub_ps (fmod_num, _mm_mul_ps (tquot, code_length_half_chips_array));
associated_chip_index_array = _mm_round_ps (fmod_result, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC));
associated_chip_index_array = _mm_add_ps(twos, associated_chip_index_array);
associated_chip_index_array_int = _mm_cvtps_epi32 (associated_chip_index_array);
_mm_store_si128 ((__m128i*)output, associated_chip_index_array_int);
//d_very_early_code[i] = d_ca_code[associated_chip_index];
*d_very_early_code++ = d_ca_code[output[0]];
*d_very_early_code++ = d_ca_code[output[1]];
*d_very_early_code++ = d_ca_code[output[2]];
*d_very_early_code++ = d_ca_code[output[3]];
//tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
tcode_half_chips_array = _mm_add_ps (tcode_half_chips_array, code_phase_step_half_chips_array);
}
if (num_points % 4 != 0)
{
__VOLK_ATTR_ALIGNED(16) float tcode_half_chips_stored[4];
_mm_storeu_ps ((float*)tcode_half_chips_stored, tcode_half_chips_array);
int associated_chip_index;
float tcode_half_chips = tcode_half_chips_stored[0];
float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
for (unsigned int i = 0; i < num_points%4; i++)
{
associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
d_very_early_code[i] = d_ca_code[associated_chip_index];
tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
}
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Takes the conjugate of a complex vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_generic(lv_32fc_t* d_very_early_code, const float d_very_early_late_spc_chips, const float code_length_half_chips, const float code_phase_step_half_chips, const float tcode_half_chips_input, const lv_32fc_t* d_ca_code, unsigned int num_points)
{
int associated_chip_index;
float tcode_half_chips = tcode_half_chips_input;
float d_very_early_late_spc_chips_multiplied_by_2 = 2*d_very_early_late_spc_chips;
for (unsigned int i = 0; i < num_points; i++)
{
associated_chip_index = 2 + round(fmod(tcode_half_chips - d_very_early_late_spc_chips_multiplied_by_2, code_length_half_chips));
d_very_early_code[i] = d_ca_code[associated_chip_index];
tcode_half_chips = tcode_half_chips + code_phase_step_half_chips;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_a_H */

View File

@ -1,455 +0,0 @@
/*!
* \file volk_gnsssdr_32fc_x5_cw_vepl_corr_32fc_x5
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt and Late correlation with 64 bits vectors
* \authors <ul>
* <li> Javier Arribas, 2011. jarribas(at)cttc.es
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* Early, Prompt and Late correlation with 64 bits vectors (32 bits the
* real part and 32 bits the imaginary part):
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 64 bits vectors) It returns the input
* signal in base band (BB)
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 64 bits vectors), accumulating the results
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 64 bits vectors), accumulating the results
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 64 bits vectors), accumulating the results
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
/*!
* TODO: Code the SSE4 version and benchmark it
*/
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
lv_32fc_t dotProduct_E;
memset(&dotProduct_E, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_P;
memset(&dotProduct_P, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_L;
memset(&dotProduct_L, 0x0, 2*sizeof(float));
// Aux vars
__m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L;
z_E = _mm_setzero_ps();
z_P = _mm_setzero_ps();
z_L = _mm_setzero_ps();
//input and output vectors
//lv_32fc_t* _input_BB = input_BB;
const lv_32fc_t* _input = input;
const lv_32fc_t* _carrier = carrier;
const lv_32fc_t* _E_code = E_code;
const lv_32fc_t* _P_code = P_code;
const lv_32fc_t* _L_code = L_code;
for(;number < halfPoints; number++)
{
// carrier wipe-off (vector point-to-point product)
x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
//_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container
// correlation E,P,L (3x vector scalar product)
// Early
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
x = z;
y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
// Prompt
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
// Late
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
/*pointer increment*/
_carrier += 2;
_input += 2;
//_input_BB += 2;
_E_code += 2;
_P_code += 2;
_L_code +=2;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
//__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB;
_mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
if((num_points % 2) != 0)
{
//_input_BB = (*_input) * (*_carrier);
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
}
*E_out = dotProduct_E;
*P_out = dotProduct_P;
*L_out = dotProduct_L;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
{
lv_32fc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
lv_32fc_t dotProduct_E;
memset(&dotProduct_E, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_P;
memset(&dotProduct_P, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_L;
memset(&dotProduct_L, 0x0, 2*sizeof(float));
// Aux vars
__m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L;
z_E = _mm_setzero_ps();
z_P = _mm_setzero_ps();
z_L = _mm_setzero_ps();
//input and output vectors
//lv_32fc_t* _input_BB = input_BB;
const lv_32fc_t* _input = input;
const lv_32fc_t* _carrier = carrier;
const lv_32fc_t* _E_code = E_code;
const lv_32fc_t* _P_code = P_code;
const lv_32fc_t* _L_code = L_code;
for(;number < halfPoints; number++)
{
// carrier wipe-off (vector point-to-point product)
x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
//_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container
// correlation E,P,L (3x vector scalar product)
// Early
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
x = z;
y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
// Prompt
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
// Late
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
/*pointer increment*/
_carrier += 2;
_input += 2;
//_input_BB += 2;
_E_code += 2;
_P_code += 2;
_L_code +=2;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
//__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB;
_mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
if((num_points % 2) != 0)
{
//_input_BB = (*_input) * (*_carrier);
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
}
*E_out = dotProduct_E;
*P_out = dotProduct_P;
*L_out = dotProduct_L;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
{
lv_32fc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H */

View File

@ -1,848 +0,0 @@
/*!
* \file volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5
* \brief Volk protokernel: performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation with 64 bits vectors
* \authors <ul>
* <li> Javier Arribas, 2011. jarribas(at)cttc.es
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* VE, Early, Prompt, Late and VL correlation with 64 bits vectors (32 bits the
* real part and 32 bits the imaginary part):
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 64 bits vectors) It returns the input
* signal in base band (BB)
* - VE values are calculated by multiplying the input signal in BB by the
* VE code (multiplication of 64 bits vectors), accumulating the results
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 64 bits vectors), accumulating the results
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 64 bits vectors), accumulating the results
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 64 bits vectors), accumulating the results
* - VL values are calculated by multiplying the input signal in BB by the
* VL code (multiplication of 64 bits vectors), accumulating the results
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code VE PRN code replica input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param VL_code VL PRN code replica input
\param VE_out VE correlation output
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param VL_out VL correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_avx(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 4;
lv_32fc_t dotProduct_VE;
lv_32fc_t dotProduct_E;
lv_32fc_t dotProduct_P;
lv_32fc_t dotProduct_L;
lv_32fc_t dotProduct_VL;
// Aux vars
__m256 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
__m256 bb_signal_sample, bb_signal_sample_shuffled;
z_VE = _mm256_setzero_ps();
z_E = _mm256_setzero_ps();
z_P = _mm256_setzero_ps();
z_L = _mm256_setzero_ps();
z_VL = _mm256_setzero_ps();
//input and output vectors
const lv_32fc_t* _input = input;
const lv_32fc_t* _carrier = carrier;
const lv_32fc_t* _VE_code = VE_code;
const lv_32fc_t* _E_code = E_code;
const lv_32fc_t* _P_code = P_code;
const lv_32fc_t* _L_code = L_code;
const lv_32fc_t* _VL_code = VL_code;
for(;number < halfPoints; number++)
{
// carrier wipe-off (vector point-to-point product)
x = _mm256_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm256_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
bb_signal_sample = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
bb_signal_sample_shuffled = _mm256_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
// correlation VE,E,P,L,VL (5x vector scalar product)
// VE
y = _mm256_loadu_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VE = _mm256_add_ps(z_VE, z); // Add the complex multiplication results together
// Early
y = _mm256_loadu_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_E = _mm256_add_ps(z_E, z); // Add the complex multiplication results together
// Prompt
y = _mm256_loadu_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_P = _mm256_add_ps(z_P, z); // Add the complex multiplication results together
// Late
y = _mm256_loadu_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_L = _mm256_add_ps(z_L, z); // Add the complex multiplication results together
// VL
y = _mm256_loadu_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VL = _mm256_add_ps(z_VL, z); // Add the complex multiplication results together
/*pointer increment*/
_carrier += 4;
_input += 4;
_VE_code += 4;
_E_code += 4;
_P_code += 4;
_L_code += 4;
_VL_code += 4;
}
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VE[4];
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_E[4];
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_P[4];
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_L[4];
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VL[4];
_mm256_storeu_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
_mm256_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
_mm256_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
_mm256_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
_mm256_storeu_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] + dotProductVector_VE[2] + dotProductVector_VE[3] );
dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] + dotProductVector_E[2] + dotProductVector_E[3] );
dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] + dotProductVector_P[2] + dotProductVector_P[3] );
dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] + dotProductVector_L[2] + dotProductVector_L[3] );
dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] + dotProductVector_VL[2] + dotProductVector_VL[3] );
for (unsigned int i = 0; i<(num_points % 4); ++i)
{
dotProduct_VE += (*_input) * (*_VE_code++) * (*_carrier);
dotProduct_E += (*_input) * (*_E_code++) * (*_carrier);
dotProduct_P += (*_input) * (*_P_code++) * (*_carrier);
dotProduct_L += (*_input) * (*_L_code++) * (*_carrier);
dotProduct_VL += (*_input++) * (*_VL_code++) * (*_carrier++);
}
*VE_out = dotProduct_VE;
*E_out = dotProduct_E;
*P_out = dotProduct_P;
*L_out = dotProduct_L;
*VL_out = dotProduct_VL;
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code VE PRN code replica input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param VL_code VL PRN code replica input
\param VE_out VE correlation output
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param VL_out VL correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
lv_32fc_t dotProduct_VE;
lv_32fc_t dotProduct_E;
lv_32fc_t dotProduct_P;
lv_32fc_t dotProduct_L;
lv_32fc_t dotProduct_VL;
// Aux vars
__m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
__m128 bb_signal_sample, bb_signal_sample_shuffled;
z_VE = _mm_setzero_ps();
z_E = _mm_setzero_ps();
z_P = _mm_setzero_ps();
z_L = _mm_setzero_ps();
z_VL = _mm_setzero_ps();
//input and output vectors
const lv_32fc_t* _input = input;
const lv_32fc_t* _carrier = carrier;
const lv_32fc_t* _VE_code = VE_code;
const lv_32fc_t* _E_code = E_code;
const lv_32fc_t* _P_code = P_code;
const lv_32fc_t* _L_code = L_code;
const lv_32fc_t* _VL_code = VL_code;
for(;number < halfPoints; number++)
{
// carrier wipe-off (vector point-to-point product)
x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
// correlation VE,E,P,L,VL (5x vector scalar product)
// VE
y = _mm_loadu_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together
// Early
y = _mm_loadu_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
// Prompt
y = _mm_loadu_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
// Late
y = _mm_loadu_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
// VL
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_loadu_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together
/*pointer increment*/
_carrier += 2;
_input += 2;
_VE_code += 2;
_E_code += 2;
_P_code += 2;
_L_code +=2;
_VL_code +=2;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2];
_mm_storeu_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
_mm_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
_mm_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
_mm_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
_mm_storeu_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] );
dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] );
dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] );
dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] );
dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] );
if((num_points % 2) != 0)
{
dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier);
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier);
}
*VE_out = dotProduct_VE;
*E_out = dotProduct_E;
*P_out = dotProduct_P;
*L_out = dotProduct_L;
*VL_out = dotProduct_VL;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code VE PRN code replica input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param VL_code VL PRN code replica input
\param VE_out VE correlation output
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param VL_out VL correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
{
lv_32fc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*VE_out += bb_signal_sample * VE_code[i];
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
*VL_out += bb_signal_sample * VL_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code VE PRN code replica input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param VL_code VL PRN code replica input
\param VE_out VE correlation output
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param VL_out VL correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_avx(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 4;
lv_32fc_t dotProduct_VE;
lv_32fc_t dotProduct_E;
lv_32fc_t dotProduct_P;
lv_32fc_t dotProduct_L;
lv_32fc_t dotProduct_VL;
// Aux vars
__m256 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
__m256 bb_signal_sample, bb_signal_sample_shuffled;
z_VE = _mm256_setzero_ps();
z_E = _mm256_setzero_ps();
z_P = _mm256_setzero_ps();
z_L = _mm256_setzero_ps();
z_VL = _mm256_setzero_ps();
//input and output vectors
const lv_32fc_t* _input = input;
const lv_32fc_t* _carrier = carrier;
const lv_32fc_t* _VE_code = VE_code;
const lv_32fc_t* _E_code = E_code;
const lv_32fc_t* _P_code = P_code;
const lv_32fc_t* _L_code = L_code;
const lv_32fc_t* _VL_code = VL_code;
for(;number < halfPoints; number++)
{
// carrier wipe-off (vector point-to-point product)
x = _mm256_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm256_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
bb_signal_sample = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
bb_signal_sample_shuffled = _mm256_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
// correlation VE,E,P,L,VL (5x vector scalar product)
// VE
y = _mm256_load_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VE = _mm256_add_ps(z_VE, z); // Add the complex multiplication results together
// Early
y = _mm256_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_E = _mm256_add_ps(z_E, z); // Add the complex multiplication results together
// Prompt
y = _mm256_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_P = _mm256_add_ps(z_P, z); // Add the complex multiplication results together
// Late
y = _mm256_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_L = _mm256_add_ps(z_L, z); // Add the complex multiplication results together
// VL
y = _mm256_load_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm256_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm256_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VL = _mm256_add_ps(z_VL, z); // Add the complex multiplication results together
/*pointer increment*/
_carrier += 4;
_input += 4;
_VE_code += 4;
_E_code += 4;
_P_code += 4;
_L_code += 4;
_VL_code += 4;
}
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VE[4];
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_E[4];
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_P[4];
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_L[4];
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector_VL[4];
_mm256_store_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
_mm256_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
_mm256_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
_mm256_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
_mm256_store_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] + dotProductVector_VE[2] + dotProductVector_VE[3] );
dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] + dotProductVector_E[2] + dotProductVector_E[3] );
dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] + dotProductVector_P[2] + dotProductVector_P[3] );
dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] + dotProductVector_L[2] + dotProductVector_L[3] );
dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] + dotProductVector_VL[2] + dotProductVector_VL[3] );
for (unsigned int i = 0; i<(num_points % 4); ++i)
{
dotProduct_VE += (*_input) * (*_VE_code++) * (*_carrier);
dotProduct_E += (*_input) * (*_E_code++) * (*_carrier);
dotProduct_P += (*_input) * (*_P_code++) * (*_carrier);
dotProduct_L += (*_input) * (*_L_code++) * (*_carrier);
dotProduct_VL += (*_input++) * (*_VL_code++) * (*_carrier++);
}
*VE_out = dotProduct_VE;
*E_out = dotProduct_E;
*P_out = dotProduct_P;
*L_out = dotProduct_L;
*VL_out = dotProduct_VL;
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code VE PRN code replica input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param VL_code VL PRN code replica input
\param VE_out VE correlation output
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param VL_out VL correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
lv_32fc_t dotProduct_VE;
lv_32fc_t dotProduct_E;
lv_32fc_t dotProduct_P;
lv_32fc_t dotProduct_L;
lv_32fc_t dotProduct_VL;
// Aux vars
__m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
__m128 bb_signal_sample, bb_signal_sample_shuffled;
z_VE = _mm_setzero_ps();
z_E = _mm_setzero_ps();
z_P = _mm_setzero_ps();
z_L = _mm_setzero_ps();
z_VL = _mm_setzero_ps();
//input and output vectors
const lv_32fc_t* _input = input;
const lv_32fc_t* _carrier = carrier;
const lv_32fc_t* _VE_code = VE_code;
const lv_32fc_t* _E_code = E_code;
const lv_32fc_t* _P_code = P_code;
const lv_32fc_t* _L_code = L_code;
const lv_32fc_t* _VL_code = VL_code;
for(;number < halfPoints; number++)
{
// carrier wipe-off (vector point-to-point product)
x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
// correlation VE,E,P,L,VL (5x vector scalar product)
// VE
y = _mm_load_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together
// Early
y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
// Prompt
y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
// Late
y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
// VL
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together
/*pointer increment*/
_carrier += 2;
_input += 2;
_VE_code += 2;
_E_code += 2;
_P_code += 2;
_L_code +=2;
_VL_code +=2;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2];
_mm_store_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
dotProduct_VE = ( dotProductVector_VE[0] + dotProductVector_VE[1] );
dotProduct_E = ( dotProductVector_E[0] + dotProductVector_E[1] );
dotProduct_P = ( dotProductVector_P[0] + dotProductVector_P[1] );
dotProduct_L = ( dotProductVector_L[0] + dotProductVector_L[1] );
dotProduct_VL = ( dotProductVector_VL[0] + dotProductVector_VL[1] );
if((num_points % 2) != 0)
{
dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier);
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier);
}
*VE_out = dotProduct_VE;
*E_out = dotProduct_E;
*P_out = dotProduct_P;
*L_out = dotProduct_L;
*VL_out = dotProduct_VL;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code VE PRN code replica input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param VL_code VL PRN code replica input
\param VE_out VE correlation output
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param VL_out VL correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
{
lv_32fc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*VE_out += bb_signal_sample * VE_code[i];
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
*VL_out += bb_signal_sample * VL_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H */

View File

@ -1,614 +0,0 @@
/*!
* \file volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3.h
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors, and accumulates the results into float32.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
* real part and 8 bits the imaginary part), and accumulates the result
* in 32 bits single point values, returning float32 values:
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 16 bits vectors) It returns the input
* signal in base band (BB)
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 16 bits vectors), accumulating the results into float32 values
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
__m128 E_code_acc, P_code_acc, L_code_acc;
__m128i input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2;
__m128 output_ps;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
E_code_acc = _mm_setzero_ps();
L_code_acc = _mm_setzero_ps();
P_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_lddqu_si128((__m128i*)input_ptr);
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get early values
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
E_code_acc = _mm_add_ps (E_code_acc, output_ps);
//Get prompt values
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
P_code_acc = _mm_add_ps (P_code_acc, output_ps);
//Get late values
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
L_code_acc = _mm_add_ps (L_code_acc, output_ps);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t E_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t P_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t L_dotProductVector[2];
_mm_storeu_ps((float*)E_dotProductVector,E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)P_dotProductVector,P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)L_dotProductVector,L_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<2; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
__m128 E_code_acc, P_code_acc, L_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 output_ps_1, output_ps_2;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
E_code_acc = _mm_setzero_ps();
L_code_acc = _mm_setzero_ps();
P_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(unsigned int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_loadu_si128((__m128i*)input_ptr);
y = _mm_loadu_si128((__m128i*)carrier_ptr);
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get early values
y = _mm_loadu_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
E_code_acc = _mm_add_ps (E_code_acc, output_ps_1);
E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
//Get prompt values
y = _mm_loadu_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
P_code_acc = _mm_add_ps (P_code_acc, output_ps_1);
P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
//Get late values
y = _mm_loadu_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
L_code_acc = _mm_add_ps (L_code_acc, output_ps_1);
L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t E_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t P_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t L_dotProductVector[2];
_mm_storeu_ps((float*)E_dotProductVector,E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)P_dotProductVector,P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)L_dotProductVector,L_code_acc); // Store the results back into the dot product vector
for (unsigned int i = 0; i<2; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
lv_8sc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*E_out += (lv_32fc_t) (bb_signal_sample * E_code[i]);
*P_out += (lv_32fc_t) (bb_signal_sample * P_code[i]);
*L_out += (lv_32fc_t) (bb_signal_sample * L_code[i]);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
__m128 E_code_acc, P_code_acc, L_code_acc;
__m128i input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2;
__m128 output_ps;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
E_code_acc = _mm_setzero_ps();
L_code_acc = _mm_setzero_ps();
P_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
E_code_acc = _mm_add_ps (E_code_acc, output_ps);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
P_code_acc = _mm_add_ps (P_code_acc, output_ps);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
L_code_acc = _mm_add_ps (L_code_acc, output_ps);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t E_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t P_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t L_dotProductVector[2];
_mm_store_ps((float*)E_dotProductVector,E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)P_dotProductVector,P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)L_dotProductVector,L_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<2; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse2(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
__m128 E_code_acc, P_code_acc, L_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 output_ps_1, output_ps_2;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
E_code_acc = _mm_setzero_ps();
L_code_acc = _mm_setzero_ps();
P_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(unsigned int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
E_code_acc = _mm_add_ps (E_code_acc, output_ps_1);
E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
P_code_acc = _mm_add_ps (P_code_acc, output_ps_1);
P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
L_code_acc = _mm_add_ps (L_code_acc, output_ps_1);
L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t E_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t P_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t L_dotProductVector[2];
_mm_store_ps((float*)E_dotProductVector,E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)P_dotProductVector,P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)L_dotProductVector,L_code_acc); // Store the results back into the dot product vector
for (unsigned int i = 0; i<2; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
lv_8sc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*E_out += (lv_32fc_t) (bb_signal_sample * E_code[i]);
*P_out += (lv_32fc_t) (bb_signal_sample * P_code[i]);
*L_out += (lv_32fc_t) (bb_signal_sample * L_code[i]);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H */

View File

@ -1,874 +0,0 @@
/*!
* \file volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.h
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
* real part and 8 bits the imaginary part):
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 16 bits vectors) It returns the input
* signal in base band (BB)
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 16 bits vectors), accumulating the results
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 16 bits vectors), accumulating the results
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 16 bits vectors), accumulating the results
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*! \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_8sc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_8sc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_8sc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_si128();
imag_E_code_acc = _mm_setzero_si128();
real_L_code_acc = _mm_setzero_si128();
imag_L_code_acc = _mm_setzero_si128();
real_P_code_acc = _mm_setzero_si128();
imag_P_code_acc = _mm_setzero_si128();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_lddqu_si128((__m128i*)input_ptr);
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//Get early values
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//Get late values
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//Get prompt values
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
L_code_ptr += 8;
P_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1);
_mm_storeu_si128((__m128i*)E_dotProductVector, output);
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1);
_mm_storeu_si128((__m128i*)L_dotProductVector, output);
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1);
_mm_storeu_si128((__m128i*)P_dotProductVector, output);
for (int i = 0; i<8; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_8sc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_8sc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_8sc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_si128();
imag_E_code_acc = _mm_setzero_si128();
real_L_code_acc = _mm_setzero_si128();
imag_L_code_acc = _mm_setzero_si128();
real_P_code_acc = _mm_setzero_si128();
imag_P_code_acc = _mm_setzero_si128();
if (sse_iters>0)
{
for(unsigned int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_loadu_si128((__m128i*)input_ptr);
y = _mm_loadu_si128((__m128i*)carrier_ptr);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//Get early values
y = _mm_loadu_si128((__m128i*)E_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//Get late values
y = _mm_loadu_si128((__m128i*)L_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//Get prompt values
y = _mm_loadu_si128((__m128i*)P_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
L_code_ptr += 8;
P_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1);
imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1);
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc);
_mm_storeu_si128((__m128i*)E_dotProductVector, output);
real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1);
imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1);
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc);
_mm_storeu_si128((__m128i*)L_dotProductVector, output);
real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1);
imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1);
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc);
_mm_storeu_si128((__m128i*)P_dotProductVector, output);
for (int i = 0; i<8; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
lv_8sc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_8sc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_8sc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_8sc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_si128();
imag_E_code_acc = _mm_setzero_si128();
real_L_code_acc = _mm_setzero_si128();
imag_L_code_acc = _mm_setzero_si128();
real_P_code_acc = _mm_setzero_si128();
imag_P_code_acc = _mm_setzero_si128();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
L_code_ptr += 8;
P_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1);
_mm_store_si128((__m128i*)E_dotProductVector, output);
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1);
_mm_store_si128((__m128i*)L_dotProductVector, output);
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1);
_mm_store_si128((__m128i*)P_dotProductVector, output);
for (int i = 0; i<8; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_8sc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_8sc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_8sc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_si128();
imag_E_code_acc = _mm_setzero_si128();
real_L_code_acc = _mm_setzero_si128();
imag_L_code_acc = _mm_setzero_si128();
real_P_code_acc = _mm_setzero_si128();
imag_P_code_acc = _mm_setzero_si128();
if (sse_iters>0)
{
for(unsigned int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
L_code_ptr += 8;
P_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1);
imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1);
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc);
_mm_store_si128((__m128i*)E_dotProductVector, output);
real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1);
imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1);
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc);
_mm_store_si128((__m128i*)L_dotProductVector, output);
real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1);
imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1);
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc);
_mm_store_si128((__m128i*)P_dotProductVector, output);
for (unsigned int i = 0; i<8; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
lv_8sc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
extern void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl(short* E_out_real, short* E_out_imag, short* P_out_real, short* P_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, unsigned int num_points);
extern void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl(short* L_out_real, short* L_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* L_code, unsigned int num_points);
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_orc(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points){
short E_out_real = 0;
short E_out_imag = 0;
char* E_out_real_c = (char*)&E_out_real;
E_out_real_c++;
char* E_out_imag_c = (char*)&E_out_imag;
E_out_imag_c++;
short P_out_real = 0;
short P_out_imag = 0;
char* P_out_real_c = (char*)&P_out_real;
P_out_real_c++;
char* P_out_imag_c = (char*)&P_out_imag;
P_out_imag_c++;
short L_out_real = 0;
short L_out_imag = 0;
char* L_out_real_c = (char*)&L_out_real;
L_out_real_c++;
char* L_out_imag_c = (char*)&L_out_imag;
L_out_imag_c++;
volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl( &E_out_real, &E_out_imag, &P_out_real, &P_out_imag, input, carrier, E_code, P_code, num_points);
volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl( &L_out_real, &L_out_imag, input, carrier, L_code, num_points);
//ORC implementation of 8ic_x5_cw_epl_corr_8ic_x3 is done in two different functions because it seems that
//in one function the length of the code gives memory problems (bad access, segmentation fault).
//Also, the maximum number of accumulators that can be used is 4 (and we need 6).
//The "carrier wipe-off" step is done two times: one in the first function and another one in the second.
//Joining all the ORC code in one function would be quicker because the "carrier wipe-off" step would be done just
//one time.
*E_out = lv_cmake(*E_out_real_c, *E_out_imag_c);
*P_out = lv_cmake(*P_out_real_c, *P_out_imag_c);
*L_out = lv_cmake(*L_out_real_c, *L_out_imag_c);
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H */

View File

@ -1,797 +0,0 @@
/*!
* \file volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5.h
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Very early, Early, Prompt, Late and very late correlation with 16 bits vectors, and accumulates the results into float32. In order to avoid overflow, If input, carrier and XX_code have the same number of bits, they must be values between 3 and 3 (2 bits).
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* Very early, Early, Prompt, Late and very late correlation with 16 bits vectors (8 bits the
* real part and 8 bits the imaginary part), and accumulates the result
* in 32 bits single point values, returning float32 values:
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 16 bits vectors) It returns the input
* signal in base band (BB)
* - Very Early values are calculated by multiplying the input signal in BB by the
* very early code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Very Late values are calculated by multiplying the input signal in BB by the
* very late code (multiplication of 16 bits vectors), accumulating the results into float32 values
*
* -------------------------------------------------------------------------
* Bits analysis
*
* input = 8 bits
* carrier = 8 bits
* XX_code = 8 bits
* XX_out = 8 bits
* bb_signal_sample = 8 bits
*
* bb_signal_sample = input*carrier -> 17 bits limited to 8 bits = input and carrier must be values between 7 and 7 to avoid overflow (3 bits)
*
* XX_out16 = XX_code*bb_signal_sample -> 17 bits limited to 8 bits = XX_code and bb_signal_sample must be values between 7 and 7 to avoid overflow (3 bits)
*
* conclusion = input and carrier must be values between 1 and 1 (1 bit) and XX_code must be values between 7 and 7 to avoid overflow (3 bits)
* If input, carrier and XX_code have the same number of bits, they must be values between 3 and 3 to avoid overflow (2 bits).
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
__m128 VE_code_acc, E_code_acc, P_code_acc, L_code_acc, VL_code_acc;
__m128i input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2;
__m128 output_ps;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* VE_code_ptr = VE_code;
lv_32fc_t* VE_out_ptr = VE_out;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* VL_code_ptr = VL_code;
lv_32fc_t* VL_out_ptr = VL_out;
*VE_out_ptr = 0;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
*VL_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
VE_code_acc = _mm_setzero_ps();
E_code_acc = _mm_setzero_ps();
P_code_acc = _mm_setzero_ps();
L_code_acc = _mm_setzero_ps();
VL_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_lddqu_si128((__m128i*)input_ptr);
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get very early values
y = _mm_lddqu_si128((__m128i*)VE_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
VE_code_acc = _mm_add_ps (VE_code_acc, output_ps);
//Get early values
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
E_code_acc = _mm_add_ps (E_code_acc, output_ps);
//Get prompt values
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
P_code_acc = _mm_add_ps (P_code_acc, output_ps);
//Get late values
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
L_code_acc = _mm_add_ps (L_code_acc, output_ps);
//Get very late values
y = _mm_lddqu_si128((__m128i*)VL_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
VL_code_acc = _mm_add_ps (VL_code_acc, output_ps);
input_ptr += 8;
carrier_ptr += 8;
VE_code_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
VL_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t VE_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t E_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t P_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t L_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t VL_dotProductVector[2];
_mm_storeu_ps((float*)VE_dotProductVector,VE_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)E_dotProductVector,E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)P_dotProductVector,P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)L_dotProductVector,L_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)VL_dotProductVector,VL_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<2; ++i)
{
*VE_out_ptr += VE_dotProductVector[i];
*E_out_ptr += E_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*VL_out_ptr += VL_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get very early, early, prompt, late and very late values for each
*VE_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VE_code_ptr++));
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
*VL_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VL_code_ptr++));
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_sse2(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
__m128 VE_code_acc, E_code_acc, P_code_acc, L_code_acc, VL_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 output_ps_1, output_ps_2;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* VE_code_ptr = VE_code;
lv_32fc_t* VE_out_ptr = VE_out;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* VL_code_ptr = VL_code;
lv_32fc_t* VL_out_ptr = VL_out;
*VE_out_ptr = 0;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
*VL_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
VE_code_acc = _mm_setzero_ps();
E_code_acc = _mm_setzero_ps();
P_code_acc = _mm_setzero_ps();
L_code_acc = _mm_setzero_ps();
VL_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(unsigned int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_loadu_si128((__m128i*)input_ptr);
y = _mm_loadu_si128((__m128i*)carrier_ptr);
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get very early values
y = _mm_loadu_si128((__m128i*)VE_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
VE_code_acc = _mm_add_ps (VE_code_acc, output_ps_1);
VE_code_acc = _mm_add_ps (VE_code_acc, output_ps_2);
//Get early values
y = _mm_loadu_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
E_code_acc = _mm_add_ps (E_code_acc, output_ps_1);
E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
//Get prompt values
y = _mm_loadu_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
P_code_acc = _mm_add_ps (P_code_acc, output_ps_1);
P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
//Get late values
y = _mm_loadu_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
L_code_acc = _mm_add_ps (L_code_acc, output_ps_1);
L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
//Get very late values
y = _mm_loadu_si128((__m128i*)VL_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
VL_code_acc = _mm_add_ps (VL_code_acc, output_ps_1);
VL_code_acc = _mm_add_ps (VL_code_acc, output_ps_2);
input_ptr += 8;
carrier_ptr += 8;
VE_code_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
VL_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t VE_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t E_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t P_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t L_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t VL_dotProductVector[2];
_mm_storeu_ps((float*)VE_dotProductVector,VE_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)E_dotProductVector,E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)P_dotProductVector,P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)L_dotProductVector,L_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)VL_dotProductVector,VL_code_acc); // Store the results back into the dot product vector
for (unsigned int i = 0; i<2; ++i)
{
*VE_out_ptr += VE_dotProductVector[i];
*E_out_ptr += E_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*VL_out_ptr += VL_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get very early, early, prompt, late and very late values for each
*VE_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VE_code_ptr++));
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
*VL_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VL_code_ptr++));
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
lv_8sc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
// perform very early, Early, Prompt, Late and very late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
*VE_out += (lv_32fc_t) (bb_signal_sample * VE_code[i]);
*E_out += (lv_32fc_t) (bb_signal_sample * E_code[i]);
*P_out += (lv_32fc_t) (bb_signal_sample * P_code[i]);
*L_out += (lv_32fc_t) (bb_signal_sample * L_code[i]);
*VL_out += (lv_32fc_t) (bb_signal_sample * VL_code[i]);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_a_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
__m128 VE_code_acc, E_code_acc, P_code_acc, L_code_acc, VL_code_acc;
__m128i input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2;
__m128 output_ps;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* VE_code_ptr = VE_code;
lv_32fc_t* VE_out_ptr = VE_out;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* VL_code_ptr = VL_code;
lv_32fc_t* VL_out_ptr = VL_out;
*VE_out_ptr = 0;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
*VL_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
VE_code_acc = _mm_setzero_ps();
E_code_acc = _mm_setzero_ps();
P_code_acc = _mm_setzero_ps();
L_code_acc = _mm_setzero_ps();
VL_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get very early values
y = _mm_load_si128((__m128i*)VE_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
VE_code_acc = _mm_add_ps (VE_code_acc, output_ps);
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
E_code_acc = _mm_add_ps (E_code_acc, output_ps);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
P_code_acc = _mm_add_ps (P_code_acc, output_ps);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
L_code_acc = _mm_add_ps (L_code_acc, output_ps);
//Get very late values
y = _mm_load_si128((__m128i*)VL_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE4_1(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)
VL_code_acc = _mm_add_ps (VL_code_acc, output_ps);
input_ptr += 8;
carrier_ptr += 8;
VE_code_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
VL_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t VE_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t E_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t P_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t L_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t VL_dotProductVector[2];
_mm_store_ps((float*)VE_dotProductVector,VE_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)E_dotProductVector,E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)P_dotProductVector,P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)L_dotProductVector,L_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)VL_dotProductVector,VL_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<2; ++i)
{
*VE_out_ptr += VE_dotProductVector[i];
*E_out_ptr += E_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*VL_out_ptr += VL_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get very early, early, prompt, late and very late values for each
*VE_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VE_code_ptr++));
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
*VL_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VL_code_ptr++));
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_a_sse2(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
__m128 VE_code_acc, E_code_acc, P_code_acc, L_code_acc, VL_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 output_ps_1, output_ps_2;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* VE_code_ptr = VE_code;
lv_32fc_t* VE_out_ptr = VE_out;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* VL_code_ptr = VL_code;
lv_32fc_t* VL_out_ptr = VL_out;
*VE_out_ptr = 0;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
*VL_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
VE_code_acc = _mm_setzero_ps();
E_code_acc = _mm_setzero_ps();
P_code_acc = _mm_setzero_ps();
L_code_acc = _mm_setzero_ps();
VL_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(unsigned int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(x, mult1, realx, imagx)
CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(y, mult1, realy, imagy)
CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_bb_signal_sample, imag_bb_signal_sample)
//Get very early values
y = _mm_load_si128((__m128i*)VE_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
VE_code_acc = _mm_add_ps (VE_code_acc, output_ps_1);
VE_code_acc = _mm_add_ps (VE_code_acc, output_ps_2);
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
E_code_acc = _mm_add_ps (E_code_acc, output_ps_1);
E_code_acc = _mm_add_ps (E_code_acc, output_ps_2);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
P_code_acc = _mm_add_ps (P_code_acc, output_ps_1);
P_code_acc = _mm_add_ps (P_code_acc, output_ps_2);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
L_code_acc = _mm_add_ps (L_code_acc, output_ps_1);
L_code_acc = _mm_add_ps (L_code_acc, output_ps_2);
//Get very late values
y = _mm_load_si128((__m128i*)VL_code_ptr);
CM_8IC_X2_CW_CORR_32FC_X2_U_SSE2(y, mult1, realy, imagy, real_bb_signal_sample, imag_bb_signal_sample,realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)
VL_code_acc = _mm_add_ps (VL_code_acc, output_ps_1);
VL_code_acc = _mm_add_ps (VL_code_acc, output_ps_2);
input_ptr += 8;
carrier_ptr += 8;
VE_code_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
VL_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t VE_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t E_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t P_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t L_dotProductVector[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t VL_dotProductVector[2];
_mm_store_ps((float*)VE_dotProductVector,VE_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)E_dotProductVector,E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)P_dotProductVector,P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)L_dotProductVector,L_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)VL_dotProductVector,VL_code_acc); // Store the results back into the dot product vector
for (unsigned int i = 0; i<2; ++i)
{
*VE_out_ptr += VE_dotProductVector[i];
*E_out_ptr += E_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*VL_out_ptr += VL_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get very early, early, prompt, late and very late values for each
*VE_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VE_code_ptr++));
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * (*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * (*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * (*L_code_ptr++));
*VL_out_ptr += (lv_32fc_t) (bb_signal_sample * (*VL_code_ptr++));
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
lv_8sc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
// perform very early, Early, Prompt, Late and very late correlation
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
*VE_out += (lv_32fc_t) (bb_signal_sample * VE_code[i]);
*E_out += (lv_32fc_t) (bb_signal_sample * E_code[i]);
*P_out += (lv_32fc_t) (bb_signal_sample * P_code[i]);
*L_out += (lv_32fc_t) (bb_signal_sample * L_code[i]);
*VL_out += (lv_32fc_t) (bb_signal_sample * VL_code[i]);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5_a_H */

View File

@ -1,768 +0,0 @@
/*!
* \file volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5.h
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Very early, Early, Prompt, Late and very late correlation with 16 bits vectors, and accumulates the results into float32. This protokernel is called "safe" because it checks when the inputs have a -128 value, and replaces it with a -127 value. By doing this it avoids malfunctioning, but it lasts more time that the "unsafe" implementation. In order to avoid overflow, "input" and "carrier" must be values between 7 and 7 and "XX_code inputs" must be values between 127 and 127.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* Very early, Early, Prompt, Late and very late correlation with 16 bits vectors (8 bits the
* real part and 8 bits the imaginary part), and accumulates the result
* in 32 bits single point values, returning float32 values:
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 16 bits vectors) It returns the input
* signal in base band (BB)
* - Very Early values are calculated by multiplying the input signal in BB by the
* very early code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Very Late values are calculated by multiplying the input signal in BB by the
* very late code (multiplication of 16 bits vectors), accumulating the results into float32 values
*
* -------------------------------------------------------------------------
* Bits analysis
*
* input = 8 bits
* carrier = 8 bits
* XX_code = 8 bits
* XX_out16 = 16 bits
* bb_signal_sample = 8 bits
*
* bb_signal_sample = input*carrier -> 17 bits limited to 8 bits = input and carrier must be values between 7 and 7 to avoid overflow (3 bits)
*
* XX_out16 = XX_code*bb_signal_sample -> 17 bits limited to 16 bits = XX_code must be values between 127 and 127 to avoid overflow (7 bits)
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5_u_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, x_abs, y, y_aux, bb_signal_sample_aux, bb_signal_sample_aux_abs;;
__m128i real_output, imag_output;
__m128 real_VE_code_acc, imag_VE_code_acc, real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc, real_VL_code_acc, imag_VL_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 real_output_ps, imag_output_ps;
__m128i minus128control;
__m128i minus128 = _mm_set1_epi8 (-128);
__m128i check_sign_sequence = _mm_set_epi8 (255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1);
__m128i rearrange_sequence = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
__m128i mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* VE_code_ptr = VE_code;
lv_32fc_t* VE_out_ptr = VE_out;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* VL_code_ptr = VL_code;
lv_32fc_t* VL_out_ptr = VL_out;
float VE_out_real = 0;
float VE_out_imag = 0;
float E_out_real = 0;
float E_out_imag = 0;
float P_out_real = 0;
float P_out_imag = 0;
float L_out_real = 0;
float L_out_imag = 0;
float VL_out_real = 0;
float VL_out_imag = 0;
real_VE_code_acc = _mm_setzero_ps();
imag_VE_code_acc = _mm_setzero_ps();
real_E_code_acc = _mm_setzero_ps();
imag_E_code_acc = _mm_setzero_ps();
real_P_code_acc = _mm_setzero_ps();
imag_P_code_acc = _mm_setzero_ps();
real_L_code_acc = _mm_setzero_ps();
imag_L_code_acc = _mm_setzero_ps();
real_VL_code_acc = _mm_setzero_ps();
imag_VL_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_lddqu_si128((__m128i*)input_ptr);
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
x_abs = _mm_abs_epi8 (x);
CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, x, check_sign_sequence, rearrange_sequence, y_aux, x_abs, real_output, imag_output)
imag_output = _mm_slli_si128 (imag_output, 1);
bb_signal_sample_aux = _mm_blendv_epi8 (imag_output, real_output, mult1);
bb_signal_sample_aux_abs = _mm_abs_epi8 (bb_signal_sample_aux);
//Get very early values
y = _mm_lddqu_si128((__m128i*)VE_code_ptr);
CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VE_code_acc = _mm_add_ps (real_VE_code_acc, real_output_ps);
imag_VE_code_acc = _mm_add_ps (imag_VE_code_acc, imag_output_ps);
//Get early values
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
//Get prompt values
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
//Get late values
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
//Get very late values
y = _mm_lddqu_si128((__m128i*)VL_code_ptr);
CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VL_code_acc = _mm_add_ps (real_VL_code_acc, real_output_ps);
imag_VL_code_acc = _mm_add_ps (imag_VL_code_acc, imag_output_ps);
input_ptr += 8;
carrier_ptr += 8;
VE_code_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
VL_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) float real_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_VL_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VL_dotProductVector[4];
_mm_storeu_ps((float*)real_VE_dotProductVector,real_VE_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_VE_dotProductVector,imag_VE_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_VL_dotProductVector,real_VL_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_VL_dotProductVector,imag_VL_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<4; ++i)
{
VE_out_real += real_VE_dotProductVector[i];
VE_out_imag += imag_VE_dotProductVector[i];
E_out_real += real_E_dotProductVector[i];
E_out_imag += imag_E_dotProductVector[i];
P_out_real += real_P_dotProductVector[i];
P_out_imag += imag_P_dotProductVector[i];
L_out_real += real_L_dotProductVector[i];
L_out_imag += imag_L_dotProductVector[i];
VL_out_real += real_VL_dotProductVector[i];
VL_out_imag += imag_VL_dotProductVector[i];
}
*VE_out_ptr = lv_cmake(VE_out_real, VE_out_imag);
*E_out_ptr = lv_cmake(E_out_real, E_out_imag);
*P_out_ptr = lv_cmake(P_out_real, P_out_imag);
*L_out_ptr = lv_cmake(L_out_real, L_out_imag);
*VL_out_ptr = lv_cmake(VL_out_real, VL_out_imag);
}
if(num_points%8!=0)
{
lv_16sc_t bb_signal_sample;
lv_16sc_t VE_code_value;
lv_16sc_t E_code_value;
lv_16sc_t P_code_value;
lv_16sc_t L_code_value;
lv_16sc_t VL_code_value;
for(int i=0; i < num_points%8; ++i)
{
VE_code_value = *VE_code_ptr++;
E_code_value = *E_code_ptr++;
P_code_value = *P_code_ptr++;
L_code_value = *L_code_ptr++;
VL_code_value = *VL_code_ptr++;
if(lv_creal(VE_code_value) == -128)
{
VE_code_value = lv_cmake(-127, lv_cimag(VE_code_value));
}
if(lv_cimag(VE_code_value) == -128)
{
VE_code_value = lv_cmake(lv_creal(VE_code_value), -127);
}
if(lv_creal(E_code_value) == -128)
{
E_code_value = lv_cmake(-127, lv_cimag(E_code_value));
}
if(lv_cimag(E_code_value) == -128)
{
E_code_value = lv_cmake(lv_creal(E_code_value), -127);
}
if(lv_creal(P_code_value) == -128)
{
P_code_value = lv_cmake(-127, lv_cimag(P_code_value));
}
if(lv_cimag(P_code_value) == -128)
{
P_code_value = lv_cmake(lv_creal(P_code_value), -127);
}
if(lv_creal(L_code_value) == -128)
{
L_code_value = lv_cmake(-127, lv_cimag(L_code_value));
}
if(lv_cimag(L_code_value) == -128)
{
L_code_value = lv_cmake(lv_creal(L_code_value), -127);
}
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get very early, early, prompt, late and very late values for each
*VE_out_ptr += (lv_32fc_t) (bb_signal_sample * VE_code_value);
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * E_code_value);
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * P_code_value);
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * L_code_value);
*VL_out_ptr += (lv_32fc_t) (bb_signal_sample * VL_code_value);
}
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
lv_16sc_t VE_code_value;
lv_16sc_t E_code_value;
lv_16sc_t P_code_value;
lv_16sc_t L_code_value;
lv_16sc_t VL_code_value;
lv_16sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points; ++i)
{
VE_code_value = VE_code[i];
E_code_value = E_code[i];
P_code_value = P_code[i];
L_code_value = L_code[i];
VL_code_value = VL_code[i];
if(lv_creal(VE_code_value) == -128)
{
VE_code_value = lv_cmake(-127, lv_cimag(VE_code_value));
}
if(lv_cimag(VE_code_value) == -128)
{
VE_code_value = lv_cmake(lv_creal(VE_code_value), -127);
}
if(lv_creal(E_code_value) == -128)
{
E_code_value = lv_cmake(-127, lv_cimag(E_code_value));
}
if(lv_cimag(E_code_value) == -128)
{
E_code_value = lv_cmake(lv_creal(E_code_value), -127);
}
if(lv_creal(P_code_value) == -128)
{
P_code_value = lv_cmake(-127, lv_cimag(P_code_value));
}
if(lv_cimag(P_code_value) == -128)
{
P_code_value = lv_cmake(lv_creal(P_code_value), -127);
}
if(lv_creal(L_code_value) == -128)
{
L_code_value = lv_cmake(-127, lv_cimag(L_code_value));
}
if(lv_cimag(L_code_value) == -128)
{
L_code_value = lv_cmake(lv_creal(L_code_value), -127);
}
if(lv_creal(VL_code_value) == -128)
{
VL_code_value = lv_cmake(-127, lv_cimag(VL_code_value));
}
if(lv_cimag(VL_code_value) == -128)
{
VL_code_value = lv_cmake(lv_creal(VL_code_value), -127);
}
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get very early, early, prompt, late and very late values for each
*VE_out += (lv_32fc_t) (bb_signal_sample * VE_code_value);
*E_out += (lv_32fc_t) (bb_signal_sample * E_code_value);
*P_out += (lv_32fc_t) (bb_signal_sample * P_code_value);
*L_out += (lv_32fc_t) (bb_signal_sample * L_code_value);
*VL_out += (lv_32fc_t) (bb_signal_sample * VL_code_value);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5_a_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, x_abs, y, y_aux, bb_signal_sample_aux, bb_signal_sample_aux_abs;;
__m128i real_output, imag_output;
__m128 real_VE_code_acc, imag_VE_code_acc, real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc, real_VL_code_acc, imag_VL_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 real_output_ps, imag_output_ps;
__m128i minus128control;
__m128i minus128 = _mm_set1_epi8 (-128);
__m128i check_sign_sequence = _mm_set_epi8 (255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1);
__m128i rearrange_sequence = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
__m128i mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* VE_code_ptr = VE_code;
lv_32fc_t* VE_out_ptr = VE_out;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* VL_code_ptr = VL_code;
lv_32fc_t* VL_out_ptr = VL_out;
float VE_out_real = 0;
float VE_out_imag = 0;
float E_out_real = 0;
float E_out_imag = 0;
float P_out_real = 0;
float P_out_imag = 0;
float L_out_real = 0;
float L_out_imag = 0;
float VL_out_real = 0;
float VL_out_imag = 0;
real_VE_code_acc = _mm_setzero_ps();
imag_VE_code_acc = _mm_setzero_ps();
real_E_code_acc = _mm_setzero_ps();
imag_E_code_acc = _mm_setzero_ps();
real_P_code_acc = _mm_setzero_ps();
imag_P_code_acc = _mm_setzero_ps();
real_L_code_acc = _mm_setzero_ps();
imag_L_code_acc = _mm_setzero_ps();
real_VL_code_acc = _mm_setzero_ps();
imag_VL_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
x_abs = _mm_abs_epi8 (x);
CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, x, check_sign_sequence, rearrange_sequence, y_aux, x_abs, real_output, imag_output)
imag_output = _mm_slli_si128 (imag_output, 1);
bb_signal_sample_aux = _mm_blendv_epi8 (imag_output, real_output, mult1);
bb_signal_sample_aux_abs = _mm_abs_epi8 (bb_signal_sample_aux);
//Get very early values
y = _mm_load_si128((__m128i*)VE_code_ptr);
CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VE_code_acc = _mm_add_ps (real_VE_code_acc, real_output_ps);
imag_VE_code_acc = _mm_add_ps (imag_VE_code_acc, imag_output_ps);
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
//Get very late values
y = _mm_load_si128((__m128i*)VL_code_ptr);
CM_8IC_X2_CW_CORR_SAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, minus128, minus128control, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VL_code_acc = _mm_add_ps (real_VL_code_acc, real_output_ps);
imag_VL_code_acc = _mm_add_ps (imag_VL_code_acc, imag_output_ps);
input_ptr += 8;
carrier_ptr += 8;
VE_code_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
VL_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) float real_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_VL_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VL_dotProductVector[4];
_mm_store_ps((float*)real_VE_dotProductVector,real_VE_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_VE_dotProductVector,imag_VE_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_VL_dotProductVector,real_VL_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_VL_dotProductVector,imag_VL_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<4; ++i)
{
VE_out_real += real_VE_dotProductVector[i];
VE_out_imag += imag_VE_dotProductVector[i];
E_out_real += real_E_dotProductVector[i];
E_out_imag += imag_E_dotProductVector[i];
P_out_real += real_P_dotProductVector[i];
P_out_imag += imag_P_dotProductVector[i];
L_out_real += real_L_dotProductVector[i];
L_out_imag += imag_L_dotProductVector[i];
VL_out_real += real_VL_dotProductVector[i];
VL_out_imag += imag_VL_dotProductVector[i];
}
*VE_out_ptr = lv_cmake(VE_out_real, VE_out_imag);
*E_out_ptr = lv_cmake(E_out_real, E_out_imag);
*P_out_ptr = lv_cmake(P_out_real, P_out_imag);
*L_out_ptr = lv_cmake(L_out_real, L_out_imag);
*VL_out_ptr = lv_cmake(VL_out_real, VL_out_imag);
}
if(num_points%8!=0)
{
lv_16sc_t bb_signal_sample;
lv_16sc_t VE_code_value;
lv_16sc_t E_code_value;
lv_16sc_t P_code_value;
lv_16sc_t L_code_value;
lv_16sc_t VL_code_value;
for(int i=0; i < num_points%8; ++i)
{
VE_code_value = *VE_code_ptr++;
E_code_value = *E_code_ptr++;
P_code_value = *P_code_ptr++;
L_code_value = *L_code_ptr++;
VL_code_value = *VL_code_ptr++;
if(lv_creal(VE_code_value) == -128)
{
VE_code_value = lv_cmake(-127, lv_cimag(VE_code_value));
}
if(lv_cimag(VE_code_value) == -128)
{
VE_code_value = lv_cmake(lv_creal(VE_code_value), -127);
}
if(lv_creal(E_code_value) == -128)
{
E_code_value = lv_cmake(-127, lv_cimag(E_code_value));
}
if(lv_cimag(E_code_value) == -128)
{
E_code_value = lv_cmake(lv_creal(E_code_value), -127);
}
if(lv_creal(P_code_value) == -128)
{
P_code_value = lv_cmake(-127, lv_cimag(P_code_value));
}
if(lv_cimag(P_code_value) == -128)
{
P_code_value = lv_cmake(lv_creal(P_code_value), -127);
}
if(lv_creal(L_code_value) == -128)
{
L_code_value = lv_cmake(-127, lv_cimag(L_code_value));
}
if(lv_cimag(L_code_value) == -128)
{
L_code_value = lv_cmake(lv_creal(L_code_value), -127);
}
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get very early, early, prompt, late and very late values for each
*VE_out_ptr += (lv_32fc_t) (bb_signal_sample * VE_code_value);
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * E_code_value);
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * P_code_value);
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * L_code_value);
*VL_out_ptr += (lv_32fc_t) (bb_signal_sample * VL_code_value);
}
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
lv_16sc_t VE_code_value;
lv_16sc_t E_code_value;
lv_16sc_t P_code_value;
lv_16sc_t L_code_value;
lv_16sc_t VL_code_value;
lv_16sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points; ++i)
{
VE_code_value = VE_code[i];
E_code_value = E_code[i];
P_code_value = P_code[i];
L_code_value = L_code[i];
VL_code_value = VL_code[i];
if(lv_creal(VE_code_value) == -128)
{
VE_code_value = lv_cmake(-127, lv_cimag(VE_code_value));
}
if(lv_cimag(VE_code_value) == -128)
{
VE_code_value = lv_cmake(lv_creal(VE_code_value), -127);
}
if(lv_creal(E_code_value) == -128)
{
E_code_value = lv_cmake(-127, lv_cimag(E_code_value));
}
if(lv_cimag(E_code_value) == -128)
{
E_code_value = lv_cmake(lv_creal(E_code_value), -127);
}
if(lv_creal(P_code_value) == -128)
{
P_code_value = lv_cmake(-127, lv_cimag(P_code_value));
}
if(lv_cimag(P_code_value) == -128)
{
P_code_value = lv_cmake(lv_creal(P_code_value), -127);
}
if(lv_creal(L_code_value) == -128)
{
L_code_value = lv_cmake(-127, lv_cimag(L_code_value));
}
if(lv_cimag(L_code_value) == -128)
{
L_code_value = lv_cmake(lv_creal(L_code_value), -127);
}
if(lv_creal(VL_code_value) == -128)
{
VL_code_value = lv_cmake(-127, lv_cimag(VL_code_value));
}
if(lv_cimag(VL_code_value) == -128)
{
VL_code_value = lv_cmake(lv_creal(VL_code_value), -127);
}
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get very early, early, prompt, late and very late values for each
*VE_out += (lv_32fc_t) (bb_signal_sample * VE_code_value);
*E_out += (lv_32fc_t) (bb_signal_sample * E_code_value);
*P_out += (lv_32fc_t) (bb_signal_sample * P_code_value);
*L_out += (lv_32fc_t) (bb_signal_sample * L_code_value);
*VL_out += (lv_32fc_t) (bb_signal_sample * VL_code_value);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5_a_H */

View File

@ -1,550 +0,0 @@
/*!
* \file volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5.h
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Very early, Early, Prompt, Late and very late correlation with 16 bits vectors, and accumulates the results into float32. This protokernel is called "unsafe" because it does NOT check when the inputs have a -128 value. If you introduce a -128 value the protokernel will NOT operate properly (generic implementation will have different results than volk implementation). In order to avoid overflow, "input" and "carrier" must be values between 7 and 7 and "XX_code inputs" must be values between 127 and 127.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* Very early, Early, Prompt, Late and very late correlation with 16 bits vectors (8 bits the
* real part and 8 bits the imaginary part), and accumulates the result
* in 32 bits single point values, returning float32 values:
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 16 bits vectors) It returns the input
* signal in base band (BB)
* - Very Early values are calculated by multiplying the input signal in BB by the
* very early code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 16 bits vectors), accumulating the results into float32 values
* - Very Late values are calculated by multiplying the input signal in BB by the
* very late code (multiplication of 16 bits vectors), accumulating the results into float32 values
*
* -------------------------------------------------------------------------
* Bits analysis
*
* input = 8 bits
* carrier = 8 bits
* XX_code = 8 bits
* XX_out16 = 16 bits
* bb_signal_sample = 8 bits
*
* bb_signal_sample = input*carrier -> 17 bits limited to 8 bits = input and carrier must be values between 7 and 7 to avoid overflow (3 bits)
*
* XX_out16 = XX_code*bb_signal_sample -> 17 bits limited to 16 bits = XX_code must be values between 127 and 127 to avoid overflow (7 bits)
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5_u_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, x_abs, y, y_aux, bb_signal_sample_aux, bb_signal_sample_aux_abs;;
__m128i real_output, imag_output;
__m128 real_VE_code_acc, imag_VE_code_acc, real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc, real_VL_code_acc, imag_VL_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 real_output_ps, imag_output_ps;
__m128i check_sign_sequence = _mm_set_epi8 (255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1);
__m128i rearrange_sequence = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
__m128i mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* VE_code_ptr = VE_code;
lv_32fc_t* VE_out_ptr = VE_out;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* VL_code_ptr = VL_code;
lv_32fc_t* VL_out_ptr = VL_out;
float VE_out_real = 0;
float VE_out_imag = 0;
float E_out_real = 0;
float E_out_imag = 0;
float P_out_real = 0;
float P_out_imag = 0;
float L_out_real = 0;
float L_out_imag = 0;
float VL_out_real = 0;
float VL_out_imag = 0;
real_VE_code_acc = _mm_setzero_ps();
imag_VE_code_acc = _mm_setzero_ps();
real_E_code_acc = _mm_setzero_ps();
imag_E_code_acc = _mm_setzero_ps();
real_P_code_acc = _mm_setzero_ps();
imag_P_code_acc = _mm_setzero_ps();
real_L_code_acc = _mm_setzero_ps();
imag_L_code_acc = _mm_setzero_ps();
real_VL_code_acc = _mm_setzero_ps();
imag_VL_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_lddqu_si128((__m128i*)input_ptr);
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
x_abs = _mm_abs_epi8 (x);
CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, x, check_sign_sequence, rearrange_sequence, y_aux, x_abs, real_output, imag_output)
imag_output = _mm_slli_si128 (imag_output, 1);
bb_signal_sample_aux = _mm_blendv_epi8 (imag_output, real_output, mult1);
bb_signal_sample_aux_abs = _mm_abs_epi8 (bb_signal_sample_aux);
//Get very early values
y = _mm_lddqu_si128((__m128i*)VE_code_ptr);
CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VE_code_acc = _mm_add_ps (real_VE_code_acc, real_output_ps);
imag_VE_code_acc = _mm_add_ps (imag_VE_code_acc, imag_output_ps);
//Get early values
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
//Get prompt values
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
//Get late values
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
//Get very late values
y = _mm_lddqu_si128((__m128i*)VL_code_ptr);
CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VL_code_acc = _mm_add_ps (real_VL_code_acc, real_output_ps);
imag_VL_code_acc = _mm_add_ps (imag_VL_code_acc, imag_output_ps);
input_ptr += 8;
carrier_ptr += 8;
VE_code_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
VL_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) float real_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_VL_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VL_dotProductVector[4];
_mm_storeu_ps((float*)real_VE_dotProductVector,real_VE_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_VE_dotProductVector,imag_VE_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)real_VL_dotProductVector,real_VL_code_acc); // Store the results back into the dot product vector
_mm_storeu_ps((float*)imag_VL_dotProductVector,imag_VL_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<4; ++i)
{
VE_out_real += real_VE_dotProductVector[i];
VE_out_imag += imag_VE_dotProductVector[i];
E_out_real += real_E_dotProductVector[i];
E_out_imag += imag_E_dotProductVector[i];
P_out_real += real_P_dotProductVector[i];
P_out_imag += imag_P_dotProductVector[i];
L_out_real += real_L_dotProductVector[i];
L_out_imag += imag_L_dotProductVector[i];
VL_out_real += real_VL_dotProductVector[i];
VL_out_imag += imag_VL_dotProductVector[i];
}
*VE_out_ptr = lv_cmake(VE_out_real, VE_out_imag);
*E_out_ptr = lv_cmake(E_out_real, E_out_imag);
*P_out_ptr = lv_cmake(P_out_real, P_out_imag);
*L_out_ptr = lv_cmake(L_out_real, L_out_imag);
*VL_out_ptr = lv_cmake(VL_out_real, VL_out_imag);
}
lv_16sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get very early, early, prompt, late and very late values for each
*VE_out_ptr += (lv_32fc_t) (bb_signal_sample * ((lv_16sc_t)*VE_code_ptr++));
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * ((lv_16sc_t)*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * ((lv_16sc_t)*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * ((lv_16sc_t)*L_code_ptr++));
*VL_out_ptr += (lv_32fc_t) (bb_signal_sample * ((lv_16sc_t)*VL_code_ptr++));
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
lv_16sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get very early, early, prompt, late and very late values for each
*VE_out += (lv_32fc_t) (bb_signal_sample * VE_code[i]);
*E_out += (lv_32fc_t) (bb_signal_sample * E_code[i]);
*P_out += (lv_32fc_t) (bb_signal_sample * P_code[i]);
*L_out += (lv_32fc_t) (bb_signal_sample * L_code[i]);
*VL_out += (lv_32fc_t) (bb_signal_sample * VL_code[i]);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
#include "CommonMacros/CommonMacros_8ic_cw_epl_corr_32fc.h"
#include "CommonMacros/CommonMacros.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5_a_sse4_1(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, x_abs, y, y_aux, bb_signal_sample_aux, bb_signal_sample_aux_abs;;
__m128i real_output, imag_output;
__m128 real_VE_code_acc, imag_VE_code_acc, real_E_code_acc, imag_E_code_acc, real_P_code_acc, imag_P_code_acc, real_L_code_acc, imag_L_code_acc, real_VL_code_acc, imag_VL_code_acc;
__m128i input_i_1, input_i_2, output_i32;
__m128 real_output_ps, imag_output_ps;
__m128i check_sign_sequence = _mm_set_epi8 (255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1, 255, 1);
__m128i rearrange_sequence = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);
__m128i mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* VE_code_ptr = VE_code;
lv_32fc_t* VE_out_ptr = VE_out;
const lv_8sc_t* E_code_ptr = E_code;
lv_32fc_t* E_out_ptr = E_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_32fc_t* P_out_ptr = P_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_32fc_t* L_out_ptr = L_out;
const lv_8sc_t* VL_code_ptr = VL_code;
lv_32fc_t* VL_out_ptr = VL_out;
float VE_out_real = 0;
float VE_out_imag = 0;
float E_out_real = 0;
float E_out_imag = 0;
float P_out_real = 0;
float P_out_imag = 0;
float L_out_real = 0;
float L_out_imag = 0;
float VL_out_real = 0;
float VL_out_imag = 0;
real_VE_code_acc = _mm_setzero_ps();
imag_VE_code_acc = _mm_setzero_ps();
real_E_code_acc = _mm_setzero_ps();
imag_E_code_acc = _mm_setzero_ps();
real_P_code_acc = _mm_setzero_ps();
imag_P_code_acc = _mm_setzero_ps();
real_L_code_acc = _mm_setzero_ps();
imag_L_code_acc = _mm_setzero_ps();
real_VL_code_acc = _mm_setzero_ps();
imag_VL_code_acc = _mm_setzero_ps();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++)
{
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
x_abs = _mm_abs_epi8 (x);
CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, x, check_sign_sequence, rearrange_sequence, y_aux, x_abs, real_output, imag_output)
imag_output = _mm_slli_si128 (imag_output, 1);
bb_signal_sample_aux = _mm_blendv_epi8 (imag_output, real_output, mult1);
bb_signal_sample_aux_abs = _mm_abs_epi8 (bb_signal_sample_aux);
//Get very early values
y = _mm_load_si128((__m128i*)VE_code_ptr);
CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VE_code_acc = _mm_add_ps (real_VE_code_acc, real_output_ps);
imag_VE_code_acc = _mm_add_ps (imag_VE_code_acc, imag_output_ps);
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_E_code_acc = _mm_add_ps (real_E_code_acc, real_output_ps);
imag_E_code_acc = _mm_add_ps (imag_E_code_acc, imag_output_ps);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_P_code_acc = _mm_add_ps (real_P_code_acc, real_output_ps);
imag_P_code_acc = _mm_add_ps (imag_P_code_acc, imag_output_ps);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_L_code_acc = _mm_add_ps (real_L_code_acc, real_output_ps);
imag_L_code_acc = _mm_add_ps (imag_L_code_acc, imag_output_ps);
//Get very late values
y = _mm_load_si128((__m128i*)VL_code_ptr);
CM_8IC_X2_CW_CORR_UNSAFE_32FC_X2_U_SSE4_1(y, bb_signal_sample_aux, check_sign_sequence, rearrange_sequence, y_aux, bb_signal_sample_aux_abs, real_output, imag_output, input_i_1, input_i_2, output_i32, real_output_ps, imag_output_ps)
real_VL_code_acc = _mm_add_ps (real_VL_code_acc, real_output_ps);
imag_VL_code_acc = _mm_add_ps (imag_VL_code_acc, imag_output_ps);
input_ptr += 8;
carrier_ptr += 8;
VE_code_ptr += 8;
E_code_ptr += 8;
P_code_ptr += 8;
L_code_ptr += 8;
VL_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) float real_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VE_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_E_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_P_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_L_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float real_VL_dotProductVector[4];
__VOLK_ATTR_ALIGNED(16) float imag_VL_dotProductVector[4];
_mm_store_ps((float*)real_VE_dotProductVector,real_VE_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_VE_dotProductVector,imag_VE_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_E_dotProductVector,real_E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_E_dotProductVector,imag_E_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_P_dotProductVector,real_P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_P_dotProductVector,imag_P_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_L_dotProductVector,real_L_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_L_dotProductVector,imag_L_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)real_VL_dotProductVector,real_VL_code_acc); // Store the results back into the dot product vector
_mm_store_ps((float*)imag_VL_dotProductVector,imag_VL_code_acc); // Store the results back into the dot product vector
for (int i = 0; i<4; ++i)
{
VE_out_real += real_VE_dotProductVector[i];
VE_out_imag += imag_VE_dotProductVector[i];
E_out_real += real_E_dotProductVector[i];
E_out_imag += imag_E_dotProductVector[i];
P_out_real += real_P_dotProductVector[i];
P_out_imag += imag_P_dotProductVector[i];
L_out_real += real_L_dotProductVector[i];
L_out_imag += imag_L_dotProductVector[i];
VL_out_real += real_VL_dotProductVector[i];
VL_out_imag += imag_VL_dotProductVector[i];
}
*VE_out_ptr = lv_cmake(VE_out_real, VE_out_imag);
*E_out_ptr = lv_cmake(E_out_real, E_out_imag);
*P_out_ptr = lv_cmake(P_out_real, P_out_imag);
*L_out_ptr = lv_cmake(L_out_real, L_out_imag);
*VL_out_ptr = lv_cmake(VL_out_real, VL_out_imag);
}
lv_16sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get very early, early, prompt, late and very late values for each
*VE_out_ptr += (lv_32fc_t) (bb_signal_sample * ((lv_16sc_t)*VE_code_ptr++));
*E_out_ptr += (lv_32fc_t) (bb_signal_sample * ((lv_16sc_t)*E_code_ptr++));
*P_out_ptr += (lv_32fc_t) (bb_signal_sample * ((lv_16sc_t)*P_code_ptr++));
*L_out_ptr += (lv_32fc_t) (bb_signal_sample * ((lv_16sc_t)*L_code_ptr++));
*VL_out_ptr += (lv_32fc_t) (bb_signal_sample * ((lv_16sc_t)*VL_code_ptr++));
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code Very Early PRN code replica input
\param E_code Early PRN code replica input
\param P_code Prompt PRN code replica input
\param L_code Late PRN code replica input
\param VL_code Very Late PRN code replica input
\param VE_out Very Early correlation output
\param E_out Early correlation output
\param P_out Prompt correlation output
\param L_out Late correlation output
\param VL_out Very Late correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* VE_code, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, const lv_8sc_t* VL_code, unsigned int num_points)
{
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
lv_16sc_t bb_signal_sample;
for(unsigned int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get very early, early, prompt, late and very late values for each
*VE_out += (lv_32fc_t) (bb_signal_sample * VE_code[i]);
*E_out += (lv_32fc_t) (bb_signal_sample * E_code[i]);
*P_out += (lv_32fc_t) (bb_signal_sample * P_code[i]);
*L_out += (lv_32fc_t) (bb_signal_sample * L_code[i]);
*VL_out += (lv_32fc_t) (bb_signal_sample * VL_code[i]);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5_a_H */

View File

@ -1,865 +0,0 @@
/*!
* \file volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc
* \brief Volk protokernel: replaces the tracking function for update_local_carrier. Algorithm by Julien Pommier and Giovanni Garberoglio, modified by Andres Cecilia.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that replaces the tracking function for update_local_carrier. Algorithm by Julien Pommier and Giovanni Garberoglio, modified by Andres Cecilia.
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2007 Julien Pommier
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*
* (this is the zlib license)
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2012 Giovanni Garberoglio
* Interdisciplinary Laboratory for Computational Science (LISC)
* Fondazione Bruno Kessler and University of Trento
* via Sommarive, 18
* I-38123 Trento (Italy)
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_u_H
#define INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_u_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_s32f_x2_update_local_carrier_32fc_u_avx(lv_32fc_t* d_carr_sign, const float phase_rad_init, const float phase_step_rad, unsigned int num_points)
{
// float* pointer1 = (float*)&phase_rad_init;
// *pointer1 = 0;
// float* pointer2 = (float*)&phase_step_rad;
// *pointer2 = 0.5;
const unsigned int sse_iters = num_points / 8;
__m256 _ps256_minus_cephes_DP1 = _mm256_set1_ps(-0.78515625f);
__m256 _ps256_minus_cephes_DP2 = _mm256_set1_ps(-2.4187564849853515625e-4f);
__m256 _ps256_minus_cephes_DP3 = _mm256_set1_ps(-3.77489497744594108e-8f);
__m256 _ps256_sign_mask = _mm256_set1_ps(-0.f);
__m128i _pi32avx_1 = _mm_set1_epi32(1);
__m128i _pi32avx_inv1 = _mm_set1_epi32(~1);
__m128i _pi32avx_2 = _mm_set1_epi32(2);
__m128i _pi32avx_4 = _mm_set1_epi32(4);
__m256 _ps256_cephes_FOPI = _mm256_set1_ps(1.27323954473516f); // 4 / PI
__m256 _ps256_sincof_p0 = _mm256_set1_ps(-1.9515295891E-4f);
__m256 _ps256_sincof_p1 = _mm256_set1_ps( 8.3321608736E-3f);
__m256 _ps256_sincof_p2 = _mm256_set1_ps(-1.6666654611E-1f);
__m256 _ps256_coscof_p0 = _mm256_set1_ps( 2.443315711809948E-005f);
__m256 _ps256_coscof_p1 = _mm256_set1_ps(-1.388731625493765E-003f);
__m256 _ps256_coscof_p2 = _mm256_set1_ps( 4.166664568298827E-002f);
__m256 _ps256_1 = _mm256_set1_ps(1.f);
__m256 _ps256_0p5 = _mm256_set1_ps(0.5f);
__m256 phase_step_rad_array = _mm256_set1_ps(8*phase_step_rad);
__m256 phase_rad_array, x, s, c, swap_sign_bit_sin, sign_bit_cos, poly_mask, z, tmp, y, y2, ysin1, ysin2;
__m256 xmm1, xmm2, xmm3, sign_bit_sin;
__m256i imm0, imm2, imm4, tmp256i;
__m128i imm0_1, imm0_2, imm2_1, imm2_2, imm4_1, imm4_2;
__VOLK_ATTR_ALIGNED(32) float sin_value[8];
__VOLK_ATTR_ALIGNED(32) float cos_value[8];
phase_rad_array = _mm256_set_ps (phase_rad_init+7*phase_step_rad, phase_rad_init+6*phase_step_rad, phase_rad_init+5*phase_step_rad, phase_rad_init+4*phase_step_rad, phase_rad_init+3*phase_step_rad, phase_rad_init+2*phase_step_rad, phase_rad_init+phase_step_rad, phase_rad_init);
for(int i = 0; i < sse_iters; i++)
{
x = phase_rad_array;
/* extract the sign bit (upper one) */
sign_bit_sin = _mm256_and_ps(x, _ps256_sign_mask);
/* take the absolute value */
x = _mm256_xor_ps(x, sign_bit_sin);
/* scale by 4/Pi */
y = _mm256_mul_ps(x, _ps256_cephes_FOPI);
/* we use SSE2 routines to perform the integer ops */
//COPY_IMM_TO_XMM(_mm256_cvttps_epi32(y),imm2_1,imm2_2);
tmp256i = _mm256_cvttps_epi32(y);
imm2_1 = _mm256_extractf128_si256 (tmp256i, 0);
imm2_2 = _mm256_extractf128_si256 (tmp256i, 1);
imm2_1 = _mm_add_epi32(imm2_1, _pi32avx_1);
imm2_2 = _mm_add_epi32(imm2_2, _pi32avx_1);
imm2_1 = _mm_and_si128(imm2_1, _pi32avx_inv1);
imm2_2 = _mm_and_si128(imm2_2, _pi32avx_inv1);
//COPY_XMM_TO_IMM(imm2_1,imm2_2,imm2);
//_mm256_set_m128i not defined in some versions of immintrin.h
//imm2 = _mm256_set_m128i (imm2_2, imm2_1);
imm2 = _mm256_insertf128_si256(_mm256_castsi128_si256(imm2_1),(imm2_2),1);
y = _mm256_cvtepi32_ps(imm2);
imm4_1 = imm2_1;
imm4_2 = imm2_2;
imm0_1 = _mm_and_si128(imm2_1, _pi32avx_4);
imm0_2 = _mm_and_si128(imm2_2, _pi32avx_4);
imm0_1 = _mm_slli_epi32(imm0_1, 29);
imm0_2 = _mm_slli_epi32(imm0_2, 29);
//COPY_XMM_TO_IMM(imm0_1, imm0_2, imm0);
//_mm256_set_m128i not defined in some versions of immintrin.h
//imm0 = _mm256_set_m128i (imm0_2, imm0_1);
imm0 = _mm256_insertf128_si256(_mm256_castsi128_si256(imm0_1),(imm0_2),1);
imm2_1 = _mm_and_si128(imm2_1, _pi32avx_2);
imm2_2 = _mm_and_si128(imm2_2, _pi32avx_2);
imm2_1 = _mm_cmpeq_epi32(imm2_1, _mm_setzero_si128());
imm2_2 = _mm_cmpeq_epi32(imm2_2, _mm_setzero_si128());
//COPY_XMM_TO_IMM(imm2_1, imm2_2, imm2);
//_mm256_set_m128i not defined in some versions of immintrin.h
//imm2 = _mm256_set_m128i (imm2_2, imm2_1);
imm2 = _mm256_insertf128_si256(_mm256_castsi128_si256(imm2_1),(imm2_2),1);
swap_sign_bit_sin = _mm256_castsi256_ps(imm0);
poly_mask = _mm256_castsi256_ps(imm2);
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = _ps256_minus_cephes_DP1;
xmm2 = _ps256_minus_cephes_DP2;
xmm3 = _ps256_minus_cephes_DP3;
xmm1 = _mm256_mul_ps(y, xmm1);
xmm2 = _mm256_mul_ps(y, xmm2);
xmm3 = _mm256_mul_ps(y, xmm3);
x = _mm256_add_ps(x, xmm1);
x = _mm256_add_ps(x, xmm2);
x = _mm256_add_ps(x, xmm3);
imm4_1 = _mm_sub_epi32(imm4_1, _pi32avx_2);
imm4_2 = _mm_sub_epi32(imm4_2, _pi32avx_2);
imm4_1 = _mm_andnot_si128(imm4_1, _pi32avx_4);
imm4_2 = _mm_andnot_si128(imm4_2, _pi32avx_4);
imm4_1 = _mm_slli_epi32(imm4_1, 29);
imm4_2 = _mm_slli_epi32(imm4_2, 29);
//COPY_XMM_TO_IMM(imm4_1, imm4_2, imm4);
//_mm256_set_m128i not defined in some versions of immintrin.h
//imm4 = _mm256_set_m128i (imm4_2, imm4_1);
imm4 = _mm256_insertf128_si256(_mm256_castsi128_si256(imm4_1),(imm4_2),1);
sign_bit_cos = _mm256_castsi256_ps(imm4);
sign_bit_sin = _mm256_xor_ps(sign_bit_sin, swap_sign_bit_sin);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
z = _mm256_mul_ps(x,x);
y = _ps256_coscof_p0;
y = _mm256_mul_ps(y, z);
y = _mm256_add_ps(y, _ps256_coscof_p1);
y = _mm256_mul_ps(y, z);
y = _mm256_add_ps(y, _ps256_coscof_p2);
y = _mm256_mul_ps(y, z);
y = _mm256_mul_ps(y, z);
tmp = _mm256_mul_ps(z, _ps256_0p5);
y = _mm256_sub_ps(y, tmp);
y = _mm256_add_ps(y, _ps256_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
y2 = _ps256_sincof_p0;
y2 = _mm256_mul_ps(y2, z);
y2 = _mm256_add_ps(y2, _ps256_sincof_p1);
y2 = _mm256_mul_ps(y2, z);
y2 = _mm256_add_ps(y2, _ps256_sincof_p2);
y2 = _mm256_mul_ps(y2, z);
y2 = _mm256_mul_ps(y2, x);
y2 = _mm256_add_ps(y2, x);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
ysin2 = _mm256_and_ps(xmm3, y2);
ysin1 = _mm256_andnot_ps(xmm3, y);
y2 = _mm256_sub_ps(y2,ysin2);
y = _mm256_sub_ps(y, ysin1);
xmm1 = _mm256_add_ps(ysin1,ysin2);
xmm2 = _mm256_add_ps(y,y2);
/* update the sign */
s = _mm256_xor_ps(xmm1, sign_bit_sin);
c = _mm256_xor_ps(xmm2, sign_bit_cos);
//GNSS-SDR needs to return -sin
s = _mm256_xor_ps(s, _ps256_sign_mask);
_mm256_storeu_ps ((float*)sin_value, s);
_mm256_storeu_ps ((float*)cos_value, c);
for(int i = 0; i < 8; i++)
{
d_carr_sign[i] = lv_cmake(cos_value[i], sin_value[i]);
}
d_carr_sign += 8;
phase_rad_array = _mm256_add_ps (phase_rad_array, phase_step_rad_array);
}
if (num_points%8!=0)
{
__VOLK_ATTR_ALIGNED(32) float phase_rad_store[8];
_mm256_storeu_ps ((float*)phase_rad_store, phase_rad_array);
float phase_rad = phase_rad_store[0];
for(int i = 0; i < num_points%8; i++)
{
*d_carr_sign = lv_cmake(cos(phase_rad), -sin(phase_rad));
d_carr_sign++;
phase_rad += phase_step_rad;
}
}
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_s32f_x2_update_local_carrier_32fc_u_sse2(lv_32fc_t* d_carr_sign, const float phase_rad_init, const float phase_step_rad, unsigned int num_points)
{
// float* pointer1 = (float*)&phase_rad_init;
// *pointer1 = 0;
// float* pointer2 = (float*)&phase_step_rad;
// *pointer2 = 0.5;
const unsigned int sse_iters = num_points / 4;
__m128 _ps_minus_cephes_DP1 = _mm_set1_ps(-0.78515625f);
__m128 _ps_minus_cephes_DP2 = _mm_set1_ps(-2.4187564849853515625e-4f);
__m128 _ps_minus_cephes_DP3 = _mm_set1_ps(-3.77489497744594108e-8f);
__m128 _ps_sign_mask = _mm_set1_ps(-0.f);
__m128i _pi32_1 = _mm_set1_epi32(1);
__m128i _pi32_inv1 = _mm_set1_epi32(~1);
__m128i _pi32_2 = _mm_set1_epi32(2);
__m128i _pi32_4 = _mm_set1_epi32(4);
__m128 _ps_cephes_FOPI = _mm_set1_ps(1.27323954473516f); // 4 / PI
__m128 _ps_sincof_p0 = _mm_set1_ps(-1.9515295891E-4f);
__m128 _ps_sincof_p1 = _mm_set1_ps( 8.3321608736E-3f);
__m128 _ps_sincof_p2 = _mm_set1_ps(-1.6666654611E-1f);
__m128 _ps_coscof_p0 = _mm_set1_ps( 2.443315711809948E-005f);
__m128 _ps_coscof_p1 = _mm_set1_ps(-1.388731625493765E-003f);
__m128 _ps_coscof_p2 = _mm_set1_ps( 4.166664568298827E-002f);
__m128 _ps_1 = _mm_set1_ps(1.f);
__m128 _ps_0p5 = _mm_set1_ps(0.5f);
__m128 phase_step_rad_array = _mm_set1_ps(4*phase_step_rad);
__m128 phase_rad_array, x, s, c, swap_sign_bit_sin, sign_bit_cos, poly_mask, z, tmp, y, y2, ysin1, ysin2;
__m128 xmm1, xmm2, xmm3, sign_bit_sin;
__m128i emm0, emm2, emm4;
__VOLK_ATTR_ALIGNED(16) float sin_value[4];
__VOLK_ATTR_ALIGNED(16) float cos_value[4];
phase_rad_array = _mm_set_ps (phase_rad_init+3*phase_step_rad, phase_rad_init+2*phase_step_rad, phase_rad_init+phase_step_rad, phase_rad_init);
for(unsigned int i = 0; i < sse_iters; i++)
{
x = phase_rad_array;
/* extract the sign bit (upper one) */
sign_bit_sin = _mm_and_ps(x, _ps_sign_mask);
/* take the absolute value */
x = _mm_xor_ps(x, sign_bit_sin);
/* scale by 4/Pi */
y = _mm_mul_ps(x, _ps_cephes_FOPI);
/* store the integer part of y in emm2 */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, _pi32_1);
emm2 = _mm_and_si128(emm2, _pi32_inv1);
y = _mm_cvtepi32_ps(emm2);
emm4 = emm2;
/* get the swap sign flag for the sine */
emm0 = _mm_and_si128(emm2, _pi32_4);
emm0 = _mm_slli_epi32(emm0, 29);
swap_sign_bit_sin = _mm_castsi128_ps(emm0);
/* get the polynom selection mask for the sine*/
emm2 = _mm_and_si128(emm2, _pi32_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
poly_mask = _mm_castsi128_ps(emm2);
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = _mm_mul_ps(y, _ps_minus_cephes_DP1);
xmm2 = _mm_mul_ps(y, _ps_minus_cephes_DP2);
xmm3 = _mm_mul_ps(y, _ps_minus_cephes_DP3);
x = _mm_add_ps(_mm_add_ps(x, xmm1), _mm_add_ps(xmm2, xmm3));
emm4 = _mm_sub_epi32(emm4, _pi32_2);
emm4 = _mm_andnot_si128(emm4, _pi32_4);
emm4 = _mm_slli_epi32(emm4, 29);
sign_bit_cos = _mm_castsi128_ps(emm4);
sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
z = _mm_mul_ps(x,x);
y = _ps_coscof_p0;
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, _ps_coscof_p1);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, _ps_coscof_p2);
y = _mm_mul_ps(y, _mm_mul_ps(z, z));
tmp = _mm_mul_ps(z, _ps_0p5);
y = _mm_sub_ps(y, tmp);
y = _mm_add_ps(y, _ps_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
y2 = _ps_sincof_p0;
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, _ps_sincof_p1);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, _ps_sincof_p2);
y2 = _mm_mul_ps(y2, _mm_mul_ps(z, x));
y2 = _mm_add_ps(y2, x);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
ysin2 = _mm_and_ps(xmm3, y2);
ysin1 = _mm_andnot_ps(xmm3, y);
y2 = _mm_sub_ps(y2,ysin2);
y = _mm_sub_ps(y, ysin1);
xmm1 = _mm_add_ps(ysin1,ysin2);
xmm2 = _mm_add_ps(y,y2);
/* update the sign */
s = _mm_xor_ps(xmm1, sign_bit_sin);
c = _mm_xor_ps(xmm2, sign_bit_cos);
//GNSS-SDR needs to return -sin
s = _mm_xor_ps(s, _ps_sign_mask);
_mm_storeu_ps ((float*)sin_value, s);
_mm_storeu_ps ((float*)cos_value, c);
for(unsigned int e = 0; e < 4; e++)
{
d_carr_sign[e] = lv_cmake(cos_value[e], sin_value[e]);
}
d_carr_sign += 4;
phase_rad_array = _mm_add_ps (phase_rad_array, phase_step_rad_array);
}
if (num_points%4!=0)
{
__VOLK_ATTR_ALIGNED(16) float phase_rad_store[4];
_mm_storeu_ps ((float*)phase_rad_store, phase_rad_array);
float phase_rad = phase_rad_store[0];
for(unsigned int i = 0; i < num_points%4; i++)
{
*d_carr_sign = lv_cmake(cos(phase_rad), -sin(phase_rad));
d_carr_sign++;
phase_rad += phase_step_rad;
}
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_s32f_x2_update_local_carrier_32fc_generic(lv_32fc_t* d_carr_sign, const float phase_rad_init, const float phase_step_rad, unsigned int num_points)
{
// float* pointer1 = (float*)&phase_rad_init;
// *pointer1 = 0;
// float* pointer2 = (float*)&phase_step_rad;
// *pointer2 = 0.5;
float phase_rad = phase_rad_init;
for(unsigned int i = 0; i < num_points; i++)
{
*d_carr_sign = lv_cmake(cos(phase_rad), -sin(phase_rad));
d_carr_sign++;
phase_rad += phase_step_rad;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_a_H
#define INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_s32f_x2_update_local_carrier_32fc_a_avx(lv_32fc_t* d_carr_sign, const float phase_rad_init, const float phase_step_rad, unsigned int num_points)
{
// float* pointer1 = (float*)&phase_rad_init;
// *pointer1 = 0;
// float* pointer2 = (float*)&phase_step_rad;
// *pointer2 = 0.5;
const unsigned int sse_iters = num_points / 8;
__m256 _ps256_minus_cephes_DP1 = _mm256_set1_ps(-0.78515625f);
__m256 _ps256_minus_cephes_DP2 = _mm256_set1_ps(-2.4187564849853515625e-4f);
__m256 _ps256_minus_cephes_DP3 = _mm256_set1_ps(-3.77489497744594108e-8f);
__m256 _ps256_sign_mask = _mm256_set1_ps(-0.f);
__m128i _pi32avx_1 = _mm_set1_epi32(1);
__m128i _pi32avx_inv1 = _mm_set1_epi32(~1);
__m128i _pi32avx_2 = _mm_set1_epi32(2);
__m128i _pi32avx_4 = _mm_set1_epi32(4);
__m256 _ps256_cephes_FOPI = _mm256_set1_ps(1.27323954473516f); // 4 / PI
__m256 _ps256_sincof_p0 = _mm256_set1_ps(-1.9515295891E-4f);
__m256 _ps256_sincof_p1 = _mm256_set1_ps( 8.3321608736E-3f);
__m256 _ps256_sincof_p2 = _mm256_set1_ps(-1.6666654611E-1f);
__m256 _ps256_coscof_p0 = _mm256_set1_ps( 2.443315711809948E-005f);
__m256 _ps256_coscof_p1 = _mm256_set1_ps(-1.388731625493765E-003f);
__m256 _ps256_coscof_p2 = _mm256_set1_ps( 4.166664568298827E-002f);
__m256 _ps256_1 = _mm256_set1_ps(1.f);
__m256 _ps256_0p5 = _mm256_set1_ps(0.5f);
__m256 phase_step_rad_array = _mm256_set1_ps(8*phase_step_rad);
__m256 phase_rad_array, x, s, c, swap_sign_bit_sin, sign_bit_cos, poly_mask, z, tmp, y, y2, ysin1, ysin2;
__m256 xmm1, xmm2, xmm3, sign_bit_sin;
__m256i imm0, imm2, imm4, tmp256i;
__m128i imm0_1, imm0_2, imm2_1, imm2_2, imm4_1, imm4_2;
__VOLK_ATTR_ALIGNED(32) float sin_value[8];
__VOLK_ATTR_ALIGNED(32) float cos_value[8];
phase_rad_array = _mm256_set_ps (phase_rad_init + 7*phase_step_rad, phase_rad_init + 6*phase_step_rad, phase_rad_init + 5*phase_step_rad, phase_rad_init + 4*phase_step_rad, phase_rad_init + 3*phase_step_rad, phase_rad_init + 2*phase_step_rad, phase_rad_init + phase_step_rad, phase_rad_init);
for(int i = 0; i < sse_iters; i++)
{
x = phase_rad_array;
/* extract the sign bit (upper one) */
sign_bit_sin = _mm256_and_ps(x, _ps256_sign_mask);
/* take the absolute value */
x = _mm256_xor_ps(x, sign_bit_sin);
/* scale by 4/Pi */
y = _mm256_mul_ps(x, _ps256_cephes_FOPI);
/* we use SSE2 routines to perform the integer ops */
//COPY_IMM_TO_XMM(_mm256_cvttps_epi32(y),imm2_1,imm2_2);
tmp256i = _mm256_cvttps_epi32(y);
imm2_1 = _mm256_extractf128_si256 (tmp256i, 0);
imm2_2 = _mm256_extractf128_si256 (tmp256i, 1);
imm2_1 = _mm_add_epi32(imm2_1, _pi32avx_1);
imm2_2 = _mm_add_epi32(imm2_2, _pi32avx_1);
imm2_1 = _mm_and_si128(imm2_1, _pi32avx_inv1);
imm2_2 = _mm_and_si128(imm2_2, _pi32avx_inv1);
//COPY_XMM_TO_IMM(imm2_1,imm2_2,imm2);
//_mm256_set_m128i not defined in some versions of immintrin.h
//imm2 = _mm256_set_m128i (imm2_2, imm2_1);
imm2 = _mm256_insertf128_si256(_mm256_castsi128_si256(imm2_1),(imm2_2),1);
y = _mm256_cvtepi32_ps(imm2);
imm4_1 = imm2_1;
imm4_2 = imm2_2;
imm0_1 = _mm_and_si128(imm2_1, _pi32avx_4);
imm0_2 = _mm_and_si128(imm2_2, _pi32avx_4);
imm0_1 = _mm_slli_epi32(imm0_1, 29);
imm0_2 = _mm_slli_epi32(imm0_2, 29);
//COPY_XMM_TO_IMM(imm0_1, imm0_2, imm0);
//_mm256_set_m128i not defined in some versions of immintrin.h
//imm0 = _mm256_set_m128i (imm0_2, imm0_1);
imm0 = _mm256_insertf128_si256(_mm256_castsi128_si256(imm0_1),(imm0_2),1);
imm2_1 = _mm_and_si128(imm2_1, _pi32avx_2);
imm2_2 = _mm_and_si128(imm2_2, _pi32avx_2);
imm2_1 = _mm_cmpeq_epi32(imm2_1, _mm_setzero_si128());
imm2_2 = _mm_cmpeq_epi32(imm2_2, _mm_setzero_si128());
//COPY_XMM_TO_IMM(imm2_1, imm2_2, imm2);
//_mm256_set_m128i not defined in some versions of immintrin.h
//imm2 = _mm256_set_m128i (imm2_2, imm2_1);
imm2 = _mm256_insertf128_si256(_mm256_castsi128_si256(imm2_1),(imm2_2),1);
swap_sign_bit_sin = _mm256_castsi256_ps(imm0);
poly_mask = _mm256_castsi256_ps(imm2);
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = _ps256_minus_cephes_DP1;
xmm2 = _ps256_minus_cephes_DP2;
xmm3 = _ps256_minus_cephes_DP3;
xmm1 = _mm256_mul_ps(y, xmm1);
xmm2 = _mm256_mul_ps(y, xmm2);
xmm3 = _mm256_mul_ps(y, xmm3);
x = _mm256_add_ps(x, xmm1);
x = _mm256_add_ps(x, xmm2);
x = _mm256_add_ps(x, xmm3);
imm4_1 = _mm_sub_epi32(imm4_1, _pi32avx_2);
imm4_2 = _mm_sub_epi32(imm4_2, _pi32avx_2);
imm4_1 = _mm_andnot_si128(imm4_1, _pi32avx_4);
imm4_2 = _mm_andnot_si128(imm4_2, _pi32avx_4);
imm4_1 = _mm_slli_epi32(imm4_1, 29);
imm4_2 = _mm_slli_epi32(imm4_2, 29);
//COPY_XMM_TO_IMM(imm4_1, imm4_2, imm4);
//_mm256_set_m128i not defined in some versions of immintrin.h
//imm4 = _mm256_set_m128i (imm4_2, imm4_1);
imm4 = _mm256_insertf128_si256(_mm256_castsi128_si256(imm4_1),(imm4_2),1);
sign_bit_cos = _mm256_castsi256_ps(imm4);
sign_bit_sin = _mm256_xor_ps(sign_bit_sin, swap_sign_bit_sin);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
z = _mm256_mul_ps(x,x);
y = _ps256_coscof_p0;
y = _mm256_mul_ps(y, z);
y = _mm256_add_ps(y, _ps256_coscof_p1);
y = _mm256_mul_ps(y, z);
y = _mm256_add_ps(y, _ps256_coscof_p2);
y = _mm256_mul_ps(y, z);
y = _mm256_mul_ps(y, z);
tmp = _mm256_mul_ps(z, _ps256_0p5);
y = _mm256_sub_ps(y, tmp);
y = _mm256_add_ps(y, _ps256_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
y2 = _ps256_sincof_p0;
y2 = _mm256_mul_ps(y2, z);
y2 = _mm256_add_ps(y2, _ps256_sincof_p1);
y2 = _mm256_mul_ps(y2, z);
y2 = _mm256_add_ps(y2, _ps256_sincof_p2);
y2 = _mm256_mul_ps(y2, z);
y2 = _mm256_mul_ps(y2, x);
y2 = _mm256_add_ps(y2, x);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
ysin2 = _mm256_and_ps(xmm3, y2);
ysin1 = _mm256_andnot_ps(xmm3, y);
y2 = _mm256_sub_ps(y2,ysin2);
y = _mm256_sub_ps(y, ysin1);
xmm1 = _mm256_add_ps(ysin1,ysin2);
xmm2 = _mm256_add_ps(y,y2);
/* update the sign */
s = _mm256_xor_ps(xmm1, sign_bit_sin);
c = _mm256_xor_ps(xmm2, sign_bit_cos);
//GNSS-SDR needs to return -sin
s = _mm256_xor_ps(s, _ps256_sign_mask);
_mm256_store_ps ((float*)sin_value, s);
_mm256_store_ps ((float*)cos_value, c);
for(int i = 0; i < 8; i++)
{
d_carr_sign[i] = lv_cmake(cos_value[i], sin_value[i]);
}
d_carr_sign += 8;
phase_rad_array = _mm256_add_ps (phase_rad_array, phase_step_rad_array);
}
if (num_points%8!=0)
{
__VOLK_ATTR_ALIGNED(32) float phase_rad_store[8];
_mm256_store_ps ((float*)phase_rad_store, phase_rad_array);
float phase_rad = phase_rad_store[0];
for(int i = 0; i < num_points%8; i++)
{
*d_carr_sign = lv_cmake(cos(phase_rad), -sin(phase_rad));
d_carr_sign++;
phase_rad += phase_step_rad;
}
}
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_s32f_x2_update_local_carrier_32fc_a_sse2(lv_32fc_t* d_carr_sign, const float phase_rad_init, const float phase_step_rad, unsigned int num_points)
{
// float* pointer1 = (float*)&phase_rad_init;
// *pointer1 = 0;
// float* pointer2 = (float*)&phase_step_rad;
// *pointer2 = 0.5;
const unsigned int sse_iters = num_points / 4;
__m128 _ps_minus_cephes_DP1 = _mm_set1_ps(-0.78515625f);
__m128 _ps_minus_cephes_DP2 = _mm_set1_ps(-2.4187564849853515625e-4f);
__m128 _ps_minus_cephes_DP3 = _mm_set1_ps(-3.77489497744594108e-8f);
__m128 _ps_sign_mask = _mm_set1_ps(-0.f);
__m128i _pi32_1 = _mm_set1_epi32(1);
__m128i _pi32_inv1 = _mm_set1_epi32(~1);
__m128i _pi32_2 = _mm_set1_epi32(2);
__m128i _pi32_4 = _mm_set1_epi32(4);
__m128 _ps_cephes_FOPI = _mm_set1_ps(1.27323954473516f); // 4 / PI
__m128 _ps_sincof_p0 = _mm_set1_ps(-1.9515295891E-4f);
__m128 _ps_sincof_p1 = _mm_set1_ps( 8.3321608736E-3f);
__m128 _ps_sincof_p2 = _mm_set1_ps(-1.6666654611E-1f);
__m128 _ps_coscof_p0 = _mm_set1_ps( 2.443315711809948E-005f);
__m128 _ps_coscof_p1 = _mm_set1_ps(-1.388731625493765E-003f);
__m128 _ps_coscof_p2 = _mm_set1_ps( 4.166664568298827E-002f);
__m128 _ps_1 = _mm_set1_ps(1.f);
__m128 _ps_0p5 = _mm_set1_ps(0.5f);
__m128 phase_step_rad_array = _mm_set1_ps(4*phase_step_rad);
__m128 phase_rad_array, x, s, c, swap_sign_bit_sin, sign_bit_cos, poly_mask, z, tmp, y, y2, ysin1, ysin2;
__m128 xmm1, xmm2, xmm3, sign_bit_sin;
__m128i emm0, emm2, emm4;
__VOLK_ATTR_ALIGNED(16) float sin_value[4];
__VOLK_ATTR_ALIGNED(16) float cos_value[4];
phase_rad_array = _mm_set_ps (phase_rad_init+3*phase_step_rad, phase_rad_init+2*phase_step_rad, phase_rad_init+phase_step_rad, phase_rad_init);
for(unsigned int i = 0; i < sse_iters; i++)
{
x = phase_rad_array;
/* extract the sign bit (upper one) */
sign_bit_sin = _mm_and_ps(x, _ps_sign_mask);
/* take the absolute value */
x = _mm_xor_ps(x, sign_bit_sin);
/* scale by 4/Pi */
y = _mm_mul_ps(x, _ps_cephes_FOPI);
/* store the integer part of y in emm2 */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, _pi32_1);
emm2 = _mm_and_si128(emm2, _pi32_inv1);
y = _mm_cvtepi32_ps(emm2);
emm4 = emm2;
/* get the swap sign flag for the sine */
emm0 = _mm_and_si128(emm2, _pi32_4);
emm0 = _mm_slli_epi32(emm0, 29);
swap_sign_bit_sin = _mm_castsi128_ps(emm0);
/* get the polynom selection mask for the sine*/
emm2 = _mm_and_si128(emm2, _pi32_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
poly_mask = _mm_castsi128_ps(emm2);
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = _mm_mul_ps(y, _ps_minus_cephes_DP1);
xmm2 = _mm_mul_ps(y, _ps_minus_cephes_DP2);
xmm3 = _mm_mul_ps(y, _ps_minus_cephes_DP3);
x = _mm_add_ps(_mm_add_ps(x, xmm1), _mm_add_ps(xmm2, xmm3));
emm4 = _mm_sub_epi32(emm4, _pi32_2);
emm4 = _mm_andnot_si128(emm4, _pi32_4);
emm4 = _mm_slli_epi32(emm4, 29);
sign_bit_cos = _mm_castsi128_ps(emm4);
sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
z = _mm_mul_ps(x,x);
y = _ps_coscof_p0;
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, _ps_coscof_p1);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, _ps_coscof_p2);
y = _mm_mul_ps(y, _mm_mul_ps(z, z));
tmp = _mm_mul_ps(z, _ps_0p5);
y = _mm_sub_ps(y, tmp);
y = _mm_add_ps(y, _ps_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
y2 = _ps_sincof_p0;
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, _ps_sincof_p1);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, _ps_sincof_p2);
y2 = _mm_mul_ps(y2, _mm_mul_ps(z, x));
y2 = _mm_add_ps(y2, x);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
ysin2 = _mm_and_ps(xmm3, y2);
ysin1 = _mm_andnot_ps(xmm3, y);
y2 = _mm_sub_ps(y2,ysin2);
y = _mm_sub_ps(y, ysin1);
xmm1 = _mm_add_ps(ysin1,ysin2);
xmm2 = _mm_add_ps(y,y2);
/* update the sign */
s = _mm_xor_ps(xmm1, sign_bit_sin);
c = _mm_xor_ps(xmm2, sign_bit_cos);
//GNSS-SDR needs to return -sin
s = _mm_xor_ps(s, _ps_sign_mask);
_mm_store_ps ((float*)sin_value, s);
_mm_store_ps ((float*)cos_value, c);
for(unsigned int e = 0; e < 4; e++)
{
d_carr_sign[e] = lv_cmake(cos_value[e], sin_value[e]);
}
d_carr_sign += 4;
phase_rad_array = _mm_add_ps (phase_rad_array, phase_step_rad_array);
}
if (num_points % 4 != 0)
{
__VOLK_ATTR_ALIGNED(16) float phase_rad_store[4];
_mm_store_ps ((float*)phase_rad_store, phase_rad_array);
float phase_rad = phase_rad_store[0];
for(unsigned int i = 0; i < num_points%4; i++)
{
*d_carr_sign = lv_cmake(cos(phase_rad), -sin(phase_rad));
d_carr_sign++;
phase_rad += phase_step_rad;
}
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_s32f_x2_update_local_carrier_32fc_a_generic(lv_32fc_t* d_carr_sign, const float phase_rad_init, const float phase_step_rad, unsigned int num_points)
{
// float* pointer1 = (float*)&phase_rad_init;
// *pointer1 = 0;
// float* pointer2 = (float*)&phase_step_rad;
// *pointer2 = 0.5;
float phase_rad = phase_rad_init;
for(unsigned int i = 0; i < num_points; i++)
{
*d_carr_sign = lv_cmake(cos(phase_rad), -sin(phase_rad));
d_carr_sign++;
phase_rad += phase_step_rad;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_a_H */

View File

@ -17,6 +17,23 @@
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
#
# Copyright 2011-2012,2014 Free Software Foundation, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
# header file detection
########################################################################
@ -79,10 +96,6 @@ if(HAVE_POSIX_MEMALIGN)
add_definitions(-DHAVE_POSIX_MEMALIGN)
endif(HAVE_POSIX_MEMALIGN)
if(NOT DEFINED _XOPEN_SOURCE)
add_definitions(-D_XOPEN_SOURCE=700)
endif(NOT DEFINED _XOPEN_SOURCE)
########################################################################
# detect x86 flavor of CPU
########################################################################
@ -244,7 +257,6 @@ endif(NOT CPU_IS_x86)
# implement overruling in the ORC case,
# since ORC always passes flag detection
########################################################################
find_package(ORC)
if(NOT ORC_FOUND)
OVERRULE_ARCH(orc "ORC support not found")
endif()
@ -254,17 +266,20 @@ endif()
# this makes things work when both -m32 and -m64 pass
########################################################################
if(NOT CROSSCOMPILE_MULTILIB AND CPU_IS_x86)
if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
include(CheckTypeSize)
check_type_size("void*[8]" SIZEOF_CPU BUILTIN_TYPES_ONLY)
if (${SIZEOF_CPU} EQUAL 64)
OVERRULE_ARCH(32 "CPU width is 64 bits")
endif()
if( CMAKE_SIZEOF_VOID_P EQUAL 4 )
if (${SIZEOF_CPU} EQUAL 32)
OVERRULE_ARCH(64 "CPU width is 32 bits")
endif()
#MSVC 64 bit does not have MMX, overrule it
if (CMAKE_SIZEOF_VOID_P EQUAL 8 AND MSVC)
if (${SIZEOF_CPU} EQUAL 64 AND MSVC)
OVERRULE_ARCH(mmx "No MMX for Win64")
endif()
endif()
########################################################################
@ -336,24 +351,24 @@ gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machines.tmpl.h ${CMAKE_B
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machines.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr_machines.c)
set(BASE_CFLAGS NONE)
STRING(TOUPPER ${CMAKE_BUILD_TYPE} CBTU)
MESSAGE(STATUS BUILT TYPE ${CBTU})
MESSAGE(STATUS "Base cflags = ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS}")
string(TOUPPER ${CMAKE_BUILD_TYPE} CBTU)
message(STATUS "BUILD TYPE = ${CBTU}")
message(STATUS "Base cflags = ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS}")
set(COMPILER_INFO "")
IF(MSVC)
IF(MSVC90) #Visual Studio 9
SET(cmake_c_compiler_version "Microsoft Visual Studio 9.0")
ELSE(MSVC10) #Visual Studio 10
SET(cmake_c_compiler_version "Microsoft Visual Studio 10.0")
ELSE(MSVC11) #Visual Studio 11
SET(cmake_c_compiler_version "Microsoft Visual Studio 11.0")
ELSE(MSVC12) #Visual Studio 12
if(MSVC)
if(MSVC90) #Visual Studio 9
set(cmake_c_compiler_version "Microsoft Visual Studio 9.0")
elseif(MSVC10) #Visual Studio 10
set(cmake_c_compiler_version "Microsoft Visual Studio 10.0")
elseif(MSVC11) #Visual Studio 11
set(cmake_c_compiler_version "Microsoft Visual Studio 11.0")
elseif(MSVC12) #Visual Studio 12
SET(cmake_c_compiler_version "Microsoft Visual Studio 12.0")
ENDIF()
ELSE()
endif()
else()
execute_process(COMMAND ${CMAKE_C_COMPILER} --version
OUTPUT_VARIABLE cmake_c_compiler_version)
ENDIF(MSVC)
endif(MSVC)
set(COMPILER_INFO "${CMAKE_C_COMPILER}:::${CMAKE_C_FLAGS_${GRCBTU}} ${CMAKE_C_FLAGS}\n${CMAKE_CXX_COMPILER}:::${CMAKE_CXX_FLAGS_${GRCBTU}} ${CMAKE_CXX_FLAGS}\n" )
foreach(machine_name ${available_machines})
@ -402,7 +417,7 @@ include_directories(
# on by default, but let users turn it off
########################################################################
if(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
set(ASM_ARCHS_AVAILABLE "armv7")
set(ASM_ARCHS_AVAILABLE "neon")
set(FULL_C_FLAGS "${CMAKE_C_FLAGS}" "${CMAKE_CXX_COMPILER_ARG1}")
@ -410,10 +425,10 @@ if(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
# if we find one that matches our current system architecture
# set up the assembler flags and include the source files
foreach(ARCH ${ASM_ARCHS_AVAILABLE})
string(REGEX MATCH "${ARCH}" ASM_ARCH "${FULL_C_FLAGS}")
if( ASM_ARCH STREQUAL "armv7" )
string(REGEX MATCH "${ARCH}" ASM_ARCH "${available_archs}")
if( ASM_ARCH STREQUAL "neon" )
message(STATUS "---- Adding ASM files") # we always use ATT syntax
message(STATUS "-- Detected armv7 architecture; enabling ASM")
message(STATUS "-- Detected neon architecture; enabling ASM")
# setup architecture specific assembler flags
set(ARCH_ASM_FLAGS "-mfpu=neon -g")
# then add the files
@ -450,7 +465,7 @@ if(ORC_FOUND)
list(APPEND volk_gnsssdr_libraries ${ORC_LIBRARIES})
#setup orc functions
file(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc)
file(GLOB orc_files ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr/asm/orc/*.orc)
foreach(orc_file ${orc_files})
#extract the name for the generated c source from the orc file
@ -469,16 +484,10 @@ else()
message(STATUS "Did not find liborc and orcc, disabling orc support...")
endif()
########################################################################
# Handle the generated constants
########################################################################
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c
"import time;print time.strftime('%a, %d %b %Y %H:%M:%S', time.gmtime())"
OUTPUT_VARIABLE BUILD_DATE OUTPUT_STRIP_TRAILING_WHITESPACE
)
message(STATUS "Loading build date ${BUILD_DATE} into constants...")
message(STATUS "Loading version ${VERSION} into constants...")
#double escape for windows backslash path separators
@ -520,71 +529,84 @@ if(MSVC)
set_source_files_properties(${volk_gnsssdr_sources} PROPERTIES LANGUAGE CXX)
endif()
#create the volk_gnsssdr runtime library
#Use object library for faster overall build in newer versions of cmake
if(CMAKE_VERSION VERSION_GREATER "2.8.7")
#Create a volk_gnsssdr object library (requires cmake >= 2.8.8)
add_library(volk_gnsssdr_obj OBJECT ${volk_gnsssdr_sources})
#MODIFICATIONS BY GNSS-SDR
file(GLOB orc ${CMAKE_SOURCE_DIR}/orc/*.orc)
file(GLOB CommonMacros ${CMAKE_SOURCE_DIR}/kernels/CommonMacros/*.h ${CMAKE_SOURCE_DIR}/kernels/CommonMacros/README.txt)
#Add dynamic library
add_library(volk_gnsssdr SHARED $<TARGET_OBJECTS:volk_gnsssdr_obj>)
target_link_libraries(volk_gnsssdr ${volk_gnsssdr_libraries})
#add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources})
if(ENABLE_STATIC_LIBS)
add_library(volk_gnsssdr STATIC ${volk_gnsssdr_sources} ${h_files} ${CommonMacros} ${orc})
else(ENABLE_STATIC_LIBS)
add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources} ${h_files} ${CommonMacros} ${orc})
endif(ENABLE_STATIC_LIBS)
#Configure target properties
set_target_properties(volk_gnsssdr_obj PROPERTIES COMPILE_FLAGS "-fPIC")
set_target_properties(volk_gnsssdr PROPERTIES SOVERSION ${LIBVER})
set_target_properties(volk_gnsssdr PROPERTIES DEFINE_SYMBOL "volk_gnsssdr_EXPORTS")
source_group("Kernels" FILES ${h_files})
source_group("Common Macros" FILES ${CommonMacros})
source_group("ORC Files" FILES ${orc})
#END OF MODIFICATIONS
target_link_libraries(volk_gnsssdr ${volk_gnsssdr_libraries})
set_target_properties(volk_gnsssdr PROPERTIES SOVERSION ${LIBVER})
set_target_properties(volk_gnsssdr PROPERTIES DEFINE_SYMBOL "volk_gnsssdr_EXPORTS")
install(TARGETS volk_gnsssdr
LIBRARY DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_runtime" # .so file
ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file
RUNTIME DESTINATION bin COMPONENT "volk_gnsssdr_runtime" # .dll file
)
if(ENABLE_STATIC_LIBS)
add_library(volk_gnsssdr_static STATIC ${volk_gnsssdr_sources})
if(NOT WIN32)
set_target_properties(volk_gnsssdr_static
PROPERTIES OUTPUT_NAME volk_gnsssdr)
endif(NOT WIN32)
install(TARGETS volk_gnsssdr_static
ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file
#Install locations
install(TARGETS volk_gnsssdr
LIBRARY DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_runtime" # .so file
ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file
RUNTIME DESTINATION bin COMPONENT "volk_gnsssdr_runtime" # .dll file
)
endif(ENABLE_STATIC_LIBS)
#Configure static library
if(ENABLE_STATIC_LIBS)
add_library(volk_gnsssdr_static STATIC $<TARGET_OBJECTS:volk_gnsssdr_obj>)
set_target_properties(volk_gnsssdr_static PROPERTIES OUTPUT_NAME volk_gnsssdr)
install(TARGETS volk_gnsssdr_static
ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel"
)
endif(ENABLE_STATIC_LIBS)
#Older cmake versions (slower to build when building dynamic/static libs)
else()
#create the volk_gnsssdr runtime library
add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources})
target_link_libraries(volk_gnsssdr ${volk_gnsssdr_libraries})
set_target_properties(volk_gnsssdr PROPERTIES SOVERSION ${LIBVER})
set_target_properties(volk_gnsssdr PROPERTIES DEFINE_SYMBOL "volk_gnsssdr_EXPORTS")
install(TARGETS volk_gnsssdr
LIBRARY DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_runtime" # .so file
ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file
RUNTIME DESTINATION bin COMPONENT "volk_gnsssdr_runtime" # .dll file
)
if(ENABLE_STATIC_LIBS)
add_library(volk_gnsssdr_static STATIC ${volk_gnsssdr_sources})
if(NOT WIN32)
set_target_properties(volk_gnsssdr_static
PROPERTIES OUTPUT_NAME volk_gnsssdr)
endif(NOT WIN32)
install(TARGETS volk_gnsssdr_static
ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file
)
endif(ENABLE_STATIC_LIBS)
endif(CMAKE_VERSION VERSION_GREATER "2.8.7")
########################################################################
# Build the QA test application
########################################################################
if(ENABLE_TESTING)
if(Boost_FOUND)
#include Boost headers
include_directories(${Boost_INCLUDE_DIRS})
make_directory(${CMAKE_CURRENT_BINARY_DIR}/.unittest)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES
COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN"
)
include_directories(${Boost_INCLUDE_DIRS})
link_directories(${Boost_LIBRARY_DIRS})
add_executable(test_all
${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc
${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc
include(VolkAddTest)
VOLK_ADD_TEST(test_all
SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc
${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc
TARGET_DEPS volk_gnsssdr
)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(Clang_required_link "c++")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(Clang_required_link "")
endif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
target_link_libraries(test_all volk_gnsssdr ${Boost_LIBRARIES} ${Clang_required_link})
add_test(qa_volk_gnsssdr_test_all test_all)
endif(Boost_FOUND)
endif(ENABLE_TESTING)

View File

@ -0,0 +1,84 @@
/*!
* \file kernel_tests.h
* \author Carles Fernandez-Prades, 2015. cfernandez(at)cttc.es
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#include "qa_utils.h"
#include <vector>
#include <boost/assign/list_of.hpp>
#include <volk_gnsssdr/volk_gnsssdr.h>
// macros for initializing volk_gnsssdr_test_case_t. Macros are needed to generate
// function names of the pattern kernel_name_*
// for puppets we need to get all the func_variants for the puppet and just
// keep track of the actual function name to write to results
#define VOLK_INIT_PUPP(func, puppet_master_func, test_params)\
volk_gnsssdr_test_case_t(func##_get_func_desc(), (void(*)())func##_manual, std::string(#func),\
std::string(#puppet_master_func), test_params)
#define VOLK_INIT_TEST(func, test_params)\
volk_gnsssdr_test_case_t(func##_get_func_desc(), (void(*)())func##_manual, std::string(#func),\
test_params)
std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t test_params)
{
// Some kernels need a lower tolerance
volk_gnsssdr_test_params_t test_params_inacc = volk_gnsssdr_test_params_t(1e-3, test_params.scalar(),
test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex());
volk_gnsssdr_test_params_t test_params_int1 = volk_gnsssdr_test_params_t(1, test_params.scalar(),
test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex());
std::vector<volk_gnsssdr_test_case_t> test_cases = boost::assign::list_of
// no one uses these, so don't test them
//VOLK_PROFILE(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_max_star_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
// we need a puppet for this one
//(VOLK_INIT_TEST(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f, test_params))
//(VOLK_INIT_TEST(volk_gnsssdr_32f_null_32f, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8i_accumulator_s8i, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8i_index_max_16u, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8i_max_s8i, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8i_x2_add_8i, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8ic_conjugate_8ic, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8ic_magnitude_squared_8i, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8ic_x2_dot_prod_8ic, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8ic_x2_multiply_8ic, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_8u_x2_multiply_8u, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_64f_accumulator_64f, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_8ic, test_params))
;
return test_cases;
}

View File

@ -1,111 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_16s_add_quad_aligned16.h>
#include <volk_gnsssdr/volk_gnsssdr_16s_add_quad_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for sse2
#ifndef LV_HAVE_SSE2
void qa_16s_add_quad_aligned16::t1()
{
printf("sse2 not available... no test performed\n");
}
#else
void qa_16s_add_quad_aligned16::t1()
{
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3200;
const int ITERS = 100000;
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short input4[vlen];
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
__VOLK_ATTR_ALIGNED(16) short output2[vlen];
__VOLK_ATTR_ALIGNED(16) short output3[vlen];
__VOLK_ATTR_ALIGNED(16) short output01[vlen];
__VOLK_ATTR_ALIGNED(16) short output11[vlen];
__VOLK_ATTR_ALIGNED(16) short output21[vlen];
__VOLK_ATTR_ALIGNED(16) short output31[vlen];
for(int i = 0; i < vlen; ++i)
{
short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
input0[i] = plus0 - minus0;
input1[i] = plus1 - minus1;
input2[i] = plus2 - minus2;
input3[i] = plus3 - minus3;
input4[i] = plus4 - minus4;
}
printf("16s_add_quad_aligned\n");
start = clock();
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_16s_add_quad_aligned16_manual(output0, output1, output2, output3, input0, input1, input2, input3, input4, vlen << 1 , "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_16s_add_quad_aligned16_manual(output01, output11, output21, output31, input0, input1, input2, input3, input4, vlen << 1 , "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total);
for(int i = 0; i < 1; ++i)
{
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
for(int i = 0; i < vlen; ++i)
{
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]);
CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]);
CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]);
}
}
#endif

View File

@ -1,36 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H
#define INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_16s_add_quad_aligned16 : public CppUnit::TestCase
{
CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H */

View File

@ -1,123 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_16s_branch_4_state_8_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for ssse3
#ifndef LV_HAVE_SSSE3
void qa_16s_branch_4_state_8_aligned16::t1()
{
printf("ssse3 not available... no test performed\n");
}
#else
void qa_16s_branch_4_state_8_aligned16::t1()
{
const int num_iters = 1000000;
const int vlen = 32;
static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03};
static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01};
static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
static char* permuters[4] = {permute0, permute1, permute2, permute3};
unsigned int num_bytes = vlen << 1;
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
__VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short target3[vlen];
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = {
7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = {
0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = {
0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
for(int i = 0; i < vlen; ++i)
{
src0[i] = i;
}
printf("16s_branch_4_state_8_aligned\n");
start = clock();
for(int i = 0; i < num_iters; ++i)
{
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time: %f\n", total);
start = clock();
for(int i = 0; i < num_iters; ++i)
{
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("branch_4_state_8_time, ssse3: %f\n", total);
start = clock();
for(int i = 0; i < num_iters; ++i)
{
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time, generic: %f\n", total);
for(int i = 0; i < vlen; ++i)
{
printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
}
for(int i = 0; i < vlen; ++i)
{
CPPUNIT_ASSERT(target[i] == target2[i]);
CPPUNIT_ASSERT(target[i] == target3[i]);
}
}
#endif

View File

@ -1,36 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H
#define INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_16s_branch_4_state_8_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_16s_branch_4_state_8_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H */

View File

@ -1,101 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_16s_permute_and_scalar_add_aligned16.h>
#include <volk_gnsssdr/volk_gnsssdr_16s_permute_and_scalar_add_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for sse2
#ifndef LV_HAVE_SSE2
void qa_16s_permute_and_scalar_add_aligned16::t1()
{
printf("sse2 not available... no test performed\n");
}
#else
void qa_16s_permute_and_scalar_add_aligned16::t1()
{
const int vlen = 64;
unsigned int num_bytes = vlen << 1;
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
__VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen];
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
for(int i = 0; i < vlen; ++i)
{
src0[i] = i;
permute_indexes[i] = (3 * i)%vlen;
cntl0[i] = 0xff;
cntl1[i] = 0xff * (i%2);
cntl2[i] = 0xff * ((i>>1)%2);
cntl3[i] = 0xff * ((i%4) == 3);
}
printf("16s_permute_and_scalar_add_aligned\n");
start = clock();
for(int i = 0; i < 100000; ++i)
{
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int i = 0; i < 100000; ++i)
{
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total);
//for(int i = 0; i < vlen; ++i) {
//printf("generic... %d, sse2... %d\n", target[i], target2[i]);
//}
for(int i = 0; i < vlen; ++i)
{
CPPUNIT_ASSERT(target[i] == target2[i]);
}
}
#endif

View File

@ -1,36 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
#define INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_16s_permute_and_scalar_add_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_16s_permute_and_scalar_add_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H */

View File

@ -1,82 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_16s_quad_max_star_aligned16.h>
#include <volk_gnsssdr/volk_gnsssdr_16s_quad_max_star_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for sse2
#ifndef LV_HAVE_SSE2
void qa_16s_quad_max_star_aligned16::t1()
{
printf("sse2 not available... no test performed\n");
}
#else
void qa_16s_quad_max_star_aligned16::t1()
{
const int vlen = 34;
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
for(int i = 0; i < vlen; ++i)
{
short plus0 = (short) (rand() - (RAND_MAX/2));
short plus1 = (short) (rand() - (RAND_MAX/2));
short plus2 = (short) (rand() - (RAND_MAX/2));
short plus3 = (short) (rand() - (RAND_MAX/2));
short minus0 = (short) (rand() - (RAND_MAX/2));
short minus1 = (short) (rand() - (RAND_MAX/2));
short minus2 = (short) (rand() - (RAND_MAX/2));
short minus3 = (short) (rand() - (RAND_MAX/2));
input0[i] = plus0 - minus0;
input1[i] = plus1 - minus1;
input2[i] = plus2 - minus2;
input3[i] = plus3 - minus3;
}
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, input3, 2*vlen, "generic");
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, input3, 2*vlen, "sse2");
printf("16s_quad_max_star_aligned\n");
for(int i = 0; i < vlen; ++i)
{
printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]);
}
for(int i = 0; i < vlen; ++i)
{
CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
}
}
#endif

View File

@ -1,37 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H
#define INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_16s_quad_max_star_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_16s_quad_max_star_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H */

View File

@ -1,86 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_32f_fm_detect_aligned16.h>
#include <volk_gnsssdr/volk_gnsssdr_32f_fm_detect_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for sse
#ifndef LV_HAVE_SSE
void qa_32f_fm_detect_aligned16::t1()
{
printf("sse not available... no test performed\n");
}
#else
void qa_32f_fm_detect_aligned16::t1()
{
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) float input0[vlen];
__VOLK_ATTR_ALIGNED(16) float output0[vlen];
__VOLK_ATTR_ALIGNED(16) float output01[vlen];
for(int i = 0; i < vlen; ++i)
{
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
}
printf("32f_fm_detect_aligned\n");
start = clock();
float save = 0.1;
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
save = 0.1;
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, "sse");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse_time: %f\n", total);
//for(int i = 0; i < 1; ++i)
// {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
//}
for(int i = 0; i < vlen; ++i)
{
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4);
}
}
#endif

View File

@ -1,36 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H
#define INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_32f_fm_detect_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_32f_fm_detect_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H */

View File

@ -1,122 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#include <volk_gnsssdr/volk_gnsssdr_runtime.h>
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_32f_index_max_aligned16.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define ERR_DELTA (1e-4)
#define NUM_ITERS 1000000
#define VEC_LEN 3097
static float uniform()
{
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
}
static void
random_floats (float *buf, unsigned n)
{
unsigned int i = 0;
for (; i < n; i++)
{
buf[i] = uniform () * 32767;
}
}
#ifndef LV_HAVE_SSE
void qa_32f_index_max_aligned16::t1()
{
printf("sse not available... no test performed\n");
}
#else
void qa_32f_index_max_aligned16::t1()
{
const int vlen = VEC_LEN;
volk_gnsssdr_runtime_init();
volk_gnsssdr_environment_init();
int ret;
unsigned int* target_sse4_1;
unsigned int* target_sse;
unsigned int* target_generic;
float* src0 ;
unsigned int i_target_sse4_1;
target_sse4_1 = &i_target_sse4_1;
unsigned int i_target_sse;
target_sse = &i_target_sse;
unsigned int i_target_generic;
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
random_floats((float*)src0, vlen);
printf("32f_index_max_aligned16\n");
clock_t start, end;
double total;
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
get_volk_gnsssdr_runtime()->volk_gnsssdr_32f_index_max_aligned16(target_sse4_1, src0, vlen);
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse4.1 time: %f\n", total);
printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
free(src0);
}
#endif /*LV_HAVE_SSE3*/

View File

@ -1,37 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H
#define INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_32f_index_max_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_32f_index_max_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H */

View File

@ -1,104 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_32fc_index_max_aligned16.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define ERR_DELTA (1e-4)
#define NUM_ITERS 1000000
#define VEC_LEN 3096
static float uniform()
{
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
}
static void
random_floats (float *buf, unsigned n)
{
unsigned int i = 0;
for (; i < n; i++)
{
buf[i] = uniform () * 32767;
}
}
#ifndef LV_HAVE_SSE3
void qa_32fc_index_max_aligned16::t1()
{
printf("sse3 not available... no test performed\n");
}
#else
void qa_32fc_index_max_aligned16::t1()
{
const int vlen = VEC_LEN;
volk_gnsssdr_environment_init();
int ret;
unsigned int* target;
unsigned int* target_generic;
std::complex<float>* src0 ;
unsigned int i_target;
target = &i_target;
unsigned int i_target_generic;
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen << 3);
random_floats((float*)src0, vlen * 2);
printf("32fc_index_max_aligned16\n");
clock_t start, end;
double total;
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3 time: %f\n", total);
printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
free(src0);
}
#endif /*LV_HAVE_SSE3*/

View File

@ -1,36 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H
#define INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_32fc_index_max_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_32fc_index_max_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H */

View File

@ -1,87 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_32fc_power_spectral_density_32f_aligned16.h>
#include <volk_gnsssdr/volk_gnsssdr_32fc_power_spectral_density_32f_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for sse3
#ifndef LV_HAVE_SSE3
void qa_32fc_power_spectral_density_32f_aligned16::t1()
{
printf("sse3 not available... no test performed\n");
}
#else
void qa_32fc_power_spectral_density_32f_aligned16::t1()
{
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
__VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
__VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
const float scalar = vlen;
const float rbw = 1.7;
float* inputLoad = (float*)input0;
for(int i = 0; i < 2*vlen; ++i)
{
inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
}
printf("32fc_power_spectral_density_32f_aligned\n");
start = clock();
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_generic, input0, scalar, rbw, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, scalar, rbw, vlen, "sse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3_time: %f\n", total);
//for(int i = 0; i < 1; ++i) {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
//}
for(int i = 0; i < vlen; ++i)
{
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
}
}
#endif

View File

@ -1,36 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
#define INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_32fc_power_spectral_density_32f_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_32fc_power_spectral_density_32f_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H */

View File

@ -28,6 +28,9 @@
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <volk_gnsssdr/volk_gnsssdr_common.h>
/************************************************
* VOLK QA type definitions *
************************************************/
struct volk_gnsssdr_type_t {
bool is_float;
bool is_scalar;
@ -37,44 +40,104 @@ struct volk_gnsssdr_type_t {
std::string str;
};
class volk_gnsssdr_test_time_t {
public:
std::string name;
double time;
std::string units;
bool pass;
};
class volk_gnsssdr_test_results_t {
public:
std::string name;
std::string config_name;
unsigned int vlen;
unsigned int iter;
std::map<std::string, volk_gnsssdr_test_time_t> results;
std::string best_arch_a;
std::string best_arch_u;
};
class volk_gnsssdr_test_params_t {
private:
float _tol;
lv_32fc_t _scalar;
unsigned int _vlen;
unsigned int _iter;
bool _benchmark_mode;
std::string _kernel_regex;
public:
// ctor
volk_gnsssdr_test_params_t(float tol, lv_32fc_t scalar, unsigned int vlen, unsigned int iter,
bool benchmark_mode, std::string kernel_regex) :
_tol(tol), _scalar(scalar), _vlen(vlen), _iter(iter),
_benchmark_mode(benchmark_mode), _kernel_regex(kernel_regex) {};
// getters
float tol() {return _tol;};
lv_32fc_t scalar() {return _scalar;};
unsigned int vlen() {return _vlen;};
unsigned int iter() {return _iter;};
bool benchmark_mode() {return _benchmark_mode;};
std::string kernel_regex() {return _kernel_regex;};
};
class volk_gnsssdr_test_case_t {
private:
volk_gnsssdr_func_desc_t _desc;
void(*_kernel_ptr)();
std::string _name;
volk_gnsssdr_test_params_t _test_parameters;
std::string _puppet_master_name;
public:
volk_gnsssdr_func_desc_t desc() {return _desc;};
void (*kernel_ptr()) () {return _kernel_ptr;};
std::string name() {return _name;};
std::string puppet_master_name() {return _puppet_master_name;};
volk_gnsssdr_test_params_t test_parameters() {return _test_parameters;};
// normal ctor
volk_gnsssdr_test_case_t(volk_gnsssdr_func_desc_t desc, void(*kernel_ptr)(), std::string name,
volk_gnsssdr_test_params_t test_parameters) :
_desc(desc), _kernel_ptr(kernel_ptr), _name(name), _test_parameters(test_parameters),
_puppet_master_name("NULL")
{};
// ctor for puppets
volk_gnsssdr_test_case_t(volk_gnsssdr_func_desc_t desc, void(*kernel_ptr)(), std::string name,
std::string puppet_master_name, volk_gnsssdr_test_params_t test_parameters) :
_desc(desc), _kernel_ptr(kernel_ptr), _name(name), _test_parameters(test_parameters),
_puppet_master_name(puppet_master_name)
{};
};
/************************************************
* VOLK QA functions *
************************************************/
volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string);
float uniform(void);
void random_floats(float *buf, unsigned n);
class volk_gnsssdr_test_time_t
{
public:
std::string name;
double time;
std::string units;
};
class volk_gnsssdr_test_results_t
{
public:
std::string name;
std::string config_name;
int vlen;
int iter;
std::map<std::string, volk_gnsssdr_test_time_t> results;
std::string best_arch_a;
std::string best_arch_u;
};
bool run_volk_gnsssdr_tests(
volk_gnsssdr_func_desc_t,
void(*)(),
std::string,
volk_gnsssdr_test_params_t,
std::vector<volk_gnsssdr_test_results_t> *results = NULL,
std::string puppet_master_name = "NULL"
);
bool run_volk_gnsssdr_tests(
volk_gnsssdr_func_desc_t,
void(*)(),
std::string,
float,
lv_32fc_t,
int,
int,
std::vector<volk_gnsssdr_test_results_t> *results = NULL,
std::string puppet_master_name = "NULL",
bool benchmark_mode=false,
std::string kernel_regex=""
);
volk_gnsssdr_func_desc_t,
void(*)(),
std::string,
float,
lv_32fc_t,
unsigned int,
unsigned int,
std::vector<volk_gnsssdr_test_results_t> *results = NULL,
std::string puppet_master_name = "NULL",
bool benchmark_mode = false
);
#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) \
@ -84,8 +147,8 @@ bool run_volk_gnsssdr_tests(
std::string(#func), tol, scalar, len, iter, 0, "NULL"), \
0); \
}
#define VOLK_PROFILE(func, tol, scalar, len, iter, results, bnmode, kernel_regex) run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results, "NULL", bnmode, kernel_regex)
#define VOLK_PUPPET_PROFILE(func, puppet_master_func, tol, scalar, len, iter, results, bnmode, kernel_regex) run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results, std::string(#puppet_master_func), bnmode, kernel_regex)
#define VOLK_PROFILE(func, test_params, results) run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), test_params, results, "NULL")
#define VOLK_PUPPET_PROFILE(func, puppet_master_func, test_params, results) run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), test_params, results, std::string(#puppet_master_func))
typedef void (*volk_gnsssdr_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
typedef void (*volk_gnsssdr_fn_2arg)(void *, void *, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_3arg)(void *, void *, void *, unsigned int, const char*);
@ -97,6 +160,8 @@ typedef void (*volk_gnsssdr_fn_1arg_s32fc)(void *, lv_32fc_t, unsigned int, cons
typedef void (*volk_gnsssdr_fn_2arg_s32fc)(void *, void *, lv_32fc_t, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_3arg_s32fc)(void *, void *, void *, lv_32fc_t, unsigned int, const char*);
//ADDED BY GNSS-SDR. START
typedef void (*volk_gnsssdr_fn_1arg_s8i)(void *, char, unsigned int, const char*); //one input vector, one scalar char input
typedef void (*volk_gnsssdr_fn_2arg_s8i)(void *, void *, char, unsigned int, const char*);

View File

@ -19,42 +19,111 @@
#include "qa_utils.h"
#include "kernel_tests.h"
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <boost/test/unit_test.hpp>
//GNSS-SDR PROTO-KERNELS
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204603, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8i_index_max_16u, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 20462, 1);
#include <vector>
#include <utility>
#include <iostream>
#include <fstream>
VOLK_RUN_TESTS(volk_gnsssdr_8i_max_s8i, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_64f_accumulator_64f, 3, 0, 20462, 1);
void print_qa_xml(std::vector<volk_gnsssdr_test_results_t> results, unsigned int nfails);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_convert_16ic, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32f_convert_8ic, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_convert_8ic, 3, 0, 20462, 1);
int main()
{
bool qa_ret_val = 0;
VOLK_RUN_TESTS(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 20462, 1);
float def_tol = 1e-6;
lv_32fc_t def_scalar = 327.0;
int def_iter = 1;
int def_vlen = 131071;
bool def_benchmark_mode = true;
std::string def_kernel_regex = "";
VOLK_RUN_TESTS(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x7_cw_vepl_corr_TEST_32fc_x5, 1e-4, 0, 20462, 1);
volk_gnsssdr_test_params_t test_params(def_tol, def_scalar, def_vlen, def_iter,
def_benchmark_mode, def_kernel_regex);
std::vector<volk_gnsssdr_test_case_t> test_cases = init_test_list(test_params);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_s32f_x2_update_local_carrier_32fc, 1e-4, 0, 20462, 1);
std::vector<std::string> qa_failures;
std::vector<volk_gnsssdr_test_results_t> results;
// Test every kernel reporting failures when they occur
for(unsigned int ii = 0; ii < test_cases.size(); ++ii) {
bool qa_result = false;
volk_gnsssdr_test_case_t test_case = test_cases[ii];
try {
qa_result = run_volk_gnsssdr_tests(test_case.desc(), test_case.kernel_ptr(), test_case.name(),
test_case.test_parameters(), &results, test_case.puppet_master_name());
}
catch(...) {
// TODO: what exceptions might we need to catch and how do we handle them?
std::cerr << "Exception found on kernel: " << test_case.name() << std::endl;
qa_result = false;
}
if(qa_result) {
std::cerr << "Failure on " << test_case.name() << std::endl;
qa_failures.push_back(test_case.name());
}
}
// Generate XML results
print_qa_xml(results, qa_failures.size());
// Summarize QA results
std::cerr << "Kernel QA finished: " << qa_failures.size() << " failures out of "
<< test_cases.size() << " tests." << std::endl;
if(qa_failures.size() > 0) {
std::cerr << "The following kernels failed QA:" << std::endl;
for(unsigned int ii = 0; ii < qa_failures.size(); ++ii) {
std::cerr << " " << qa_failures[ii] << std::endl;
}
qa_ret_val = 1;
}
return qa_ret_val;
}
/*
* This function prints qa results as XML output similar to output
* from Junit. For reference output see http://llg.cubic.org/docs/junit/
*/
void print_qa_xml(std::vector<volk_gnsssdr_test_results_t> results, unsigned int nfails)
{
std::ofstream qa_file;
qa_file.open(".unittest/kernels.xml");
qa_file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << std::endl;
qa_file << "<testsuites name=\"kernels\" " <<
"tests=\"" << results.size() << "\" " <<
"failures=\"" << nfails << "\" id=\"1\">" << std::endl;
// Results are in a vector by kernel. Each element has a result
// map containing time and arch name with test result
for(unsigned int ii=0; ii < results.size(); ++ii) {
volk_gnsssdr_test_results_t result = results[ii];
qa_file << " <testsuite name=\"" << result.name << "\">" << std::endl;
std::map<std::string, volk_gnsssdr_test_time_t>::iterator kernel_time_pair;
for(kernel_time_pair = result.results.begin(); kernel_time_pair != result.results.end(); ++kernel_time_pair) {
volk_gnsssdr_test_time_t test_time = kernel_time_pair->second;
qa_file << " <testcase name=\"" << test_time.name << "\" " <<
"classname=\"" << result.name << "\" " <<
"time=\"" << test_time.time << "\">" << std::endl;
if(!test_time.pass)
qa_file << " <failure " <<
"message=\"fail on arch " << test_time.name << "\">" <<
"</failure>" << std::endl;
qa_file << " </testcase>" << std::endl;
}
qa_file << " </testsuite>" << std::endl;
}
qa_file << "</testsuites>" << std::endl;
qa_file.close();
}

View File

@ -19,7 +19,7 @@
########################################################################
# Install python files and apps
########################################################################
include(GrPython)
include(VolkPython)
VOLK_PYTHON_INSTALL(
FILES
@ -35,4 +35,4 @@ VOLK_PYTHON_INSTALL(
volk_gnsssdr_modtool
DESTINATION ${VOLK_RUNTIME_DIR}
COMPONENT "volk_gnsssdr"
)
)

View File

@ -1,9 +1,9 @@
#
# Copyright 2013 Free Software Foundation, Inc.
# Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
#
# This file is part of GNU Radio
# This file is part of GNSS-SDR.
#
# GNU Radio is free software; you can redistribute it and/or modify
# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.

View File

@ -88,4 +88,5 @@ extern VOLK_API volk_gnsssdr_func_desc_t $(kern.name)_get_func_desc(void);
__VOLK_DECL_END
#endif /*INCLUDED_VOLK_GNSSSDR_RUNTIME*/

View File

@ -19,6 +19,7 @@
#include <volk_gnsssdr/volk_gnsssdr_cpu.h>
#include <volk_gnsssdr/volk_gnsssdr_config_fixed.h>
#include <stdlib.h>
#include <string.h>
struct VOLK_CPU volk_gnsssdr_cpu;
@ -30,11 +31,7 @@ struct VOLK_CPU volk_gnsssdr_cpu;
//implement get cpuid for gcc compilers using a system or local copy of cpuid.h
#if defined(__GNUC__)
#if defined(HAVE_CPUID_H)
#include <cpuid.h>
#else
#include "gcc_x86_cpuid.h"
#endif
#include <cpuid.h>
#define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3)
/* Return Intel AVX extended CPU capabilities register.
@ -69,9 +66,20 @@ struct VOLK_CPU volk_gnsssdr_cpu;
#endif //defined(VOLK_CPU_x86)
static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit) {
#if defined(VOLK_CPU_x86)
unsigned int regs[4];
__cpuid_count(level, count, regs[0], regs[1], regs[2], regs[3]);
return regs[reg] >> bit & 0x01;
#else
return 0;
#endif
}
static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) {
#if defined(VOLK_CPU_x86)
unsigned int regs[4];
memset(regs, 0, sizeof(unsigned int)*4);
cpuid_x86(op, regs);
return regs[reg] >> bit & 0x01;
#else
@ -82,6 +90,7 @@ static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsi
static inline unsigned int check_extended_cpuid(unsigned int val) {
#if defined(VOLK_CPU_x86)
unsigned int regs[4];
memset(regs, 0, sizeof(unsigned int)*4);
cpuid_x86(0x80000000, regs);
return regs[0] >= val;
#else
@ -97,6 +106,14 @@ static inline unsigned int get_avx_enabled(void) {
#endif
}
static inline unsigned int get_avx2_enabled(void) {
#if defined(VOLK_CPU_x86)
return __xgetbv() & 0x6;
#else
return 0;
#endif
}
//neon detection is linux specific
#if defined(__arm__) && defined(__linux__)
#include <asm/hwcap.h>
@ -129,14 +146,6 @@ static int has_neon(void){
#endif
}
static int has_ppc(void){
#ifdef __PPC__
return 1;
#else
return 0;
#endif
}
#for $arch in $archs
static int i_can_has_$arch.name (void) {
#for $check, $params in $arch.checks

View File

@ -23,7 +23,7 @@ endif(ENABLE_CUDA)
set(TRACKING_ADAPTER_SOURCES
galileo_e1_dll_pll_veml_tracking.cc
galileo_volk_e1_dll_pll_veml_tracking.cc
#galileo_volk_e1_dll_pll_veml_tracking.cc
galileo_e1_tcp_connector_tracking.cc
gps_l1_ca_dll_fll_pll_tracking.cc
gps_l1_ca_dll_pll_optim_tracking.cc

View File

@ -25,7 +25,7 @@ endif(ENABLE_CUDA)
set(TRACKING_GR_BLOCKS_SOURCES
galileo_e1_dll_pll_veml_tracking_cc.cc
galileo_volk_e1_dll_pll_veml_tracking_cc.cc
#galileo_volk_e1_dll_pll_veml_tracking_cc.cc
galileo_e1_tcp_connector_tracking_cc.cc
gps_l1_ca_dll_fll_pll_tracking_cc.cc
gps_l1_ca_dll_pll_optim_tracking_cc.cc

View File

@ -83,7 +83,7 @@
#include "gps_l1_ca_dll_fll_pll_tracking.h"
#include "gps_l1_ca_tcp_connector_tracking.h"
#include "galileo_e1_dll_pll_veml_tracking.h"
#include "galileo_volk_e1_dll_pll_veml_tracking.h"
//#include "galileo_volk_e1_dll_pll_veml_tracking.h"
#include "galileo_e1_tcp_connector_tracking.h"
#include "galileo_e5a_dll_pll_tracking.h"
#include "gps_l2_m_dll_pll_tracking.h"
@ -1342,12 +1342,12 @@ std::unique_ptr<GNSSBlockInterface> GNSSBlockFactory::GetBlock(
out_streams, queue));
block = std::move(block_);
}
else if (implementation.compare("Galileo_volk_E1_DLL_PLL_VEML_Tracking") == 0)
{
std::unique_ptr<GNSSBlockInterface> block_(new GalileoVolkE1DllPllVemlTracking(configuration.get(), role, in_streams,
out_streams, queue));
block = std::move(block_);
}
// else if (implementation.compare("Galileo_volk_E1_DLL_PLL_VEML_Tracking") == 0)
// {
// std::unique_ptr<GNSSBlockInterface> block_(new GalileoVolkE1DllPllVemlTracking(configuration.get(), role, in_streams,
// out_streams, queue));
// block = std::move(block_);
// }
else if (implementation.compare("Galileo_E1_TCP_CONNECTOR_Tracking") == 0)
{
std::unique_ptr<GNSSBlockInterface> block_(new GalileoE1TcpConnectorTracking(configuration.get(), role, in_streams,
@ -1613,12 +1613,12 @@ std::unique_ptr<TrackingInterface> GNSSBlockFactory::GetTrkBlock(
out_streams, queue));
block = std::move(block_);
}
else if (implementation.compare("Galileo_Volk_E1_DLL_PLL_VEML_Tracking") == 0)
{
std::unique_ptr<TrackingInterface> block_(new GalileoVolkE1DllPllVemlTracking(configuration.get(), role, in_streams,
out_streams, queue));
block = std::move(block_);
}
// else if (implementation.compare("Galileo_Volk_E1_DLL_PLL_VEML_Tracking") == 0)
// {
// std::unique_ptr<TrackingInterface> block_(new GalileoVolkE1DllPllVemlTracking(configuration.get(), role, in_streams,
// out_streams, queue));
// block = std::move(block_);
// }
else if (implementation.compare("Galileo_E1_TCP_CONNECTOR_Tracking") == 0)
{
std::unique_ptr<TrackingInterface> block_(new GalileoE1TcpConnectorTracking(configuration.get(), role, in_streams,
@ -1631,12 +1631,12 @@ std::unique_ptr<TrackingInterface> GNSSBlockFactory::GetTrkBlock(
out_streams, queue));
block = std::move(block_);
}
else if (implementation.compare("Galileo_volk_E1_DLL_PLL_VEML_Tracking") == 0)
{
std::unique_ptr<TrackingInterface> block_(new GalileoVolkE1DllPllVemlTracking(configuration.get(), role, in_streams,
out_streams, queue));
block = std::move(block_);
}
// else if (implementation.compare("Galileo_volk_E1_DLL_PLL_VEML_Tracking") == 0)
// {
// std::unique_ptr<TrackingInterface> block_(new GalileoVolkE1DllPllVemlTracking(configuration.get(), role, in_streams,
// out_streams, queue));
// block = std::move(block_);
// }
else if (implementation.compare("GPS_L2_M_DLL_PLL_Tracking") == 0)
{
std::unique_ptr<TrackingInterface> block_(new GpsL2MDllPllTracking(configuration.get(), role, in_streams,

View File

@ -696,7 +696,7 @@ private:
{
std::string message;
Rtcm_Message msg;
queue_->wait_and_pop(message);
queue_->wait_and_pop(message); //message += '\n';
if(message.compare("Goodbye") == 0) break;
const char *char_msg = message.c_str();
msg.body_length(message.length());