mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2025-01-27 17:34:53 +00:00
Master branch + volk_gnsssdr module
This commit is contained in:
parent
490879c33a
commit
60cc3777b6
462
CMakeLists.txt
462
CMakeLists.txt
@ -16,26 +16,58 @@
|
||||
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
########################################################################
|
||||
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
|
||||
message(FATAL_ERROR "Prevented in-tree build. This is bad practice. Try 'cd build && cmake ../' ")
|
||||
endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
|
||||
|
||||
########################################################################
|
||||
# Project setup
|
||||
########################################################################
|
||||
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
|
||||
message(FATAL_ERROR "Prevented in-tree build. This is bad practice. Try 'cd build && cmake ../' ")
|
||||
endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
|
||||
cmake_minimum_required(VERSION 2.8)
|
||||
project(gnss-sdr CXX C)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
|
||||
file(RELATIVE_PATH RELATIVE_CMAKE_CALL ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
|
||||
########################################################################
|
||||
# Determine optional blocks/libraries to be built (default: not built)
|
||||
# Enable them here or at the command line by doing 'cmake -DENABLE_XXX=ON ../'
|
||||
########################################################################
|
||||
|
||||
option(ENABLE_GN3S "Enable the use of the GN3S dongle as signal source (experimental)" OFF)
|
||||
option(ENABLE_ARRAY "Enable the use of CTTC's antenna array front-end as signal source (experimental)" OFF)
|
||||
option(ENABLE_RTLSDR "Enable the use of RTL dongles as signal source (experimental)" OFF)
|
||||
option(ENABLE_OPENCL "Enable building of processing blocks implemented with OpenCL (experimental)" OFF)
|
||||
option(ENABLE_GPERFTOOLS "Enable linking to Gperftools libraries (tcmalloc and profiler)" OFF)
|
||||
option(ENABLE_GENERIC_ARCH "Builds a portable binary" OFF)
|
||||
option(ENABLE_VOLK_GNSSSDR "Enable building of volk_gnsssdr module: some volk protokernels coded by gnss-sdr" OFF)
|
||||
|
||||
|
||||
###############################
|
||||
# GNSS-SDR version information
|
||||
###############################
|
||||
# Get the current working branch
|
||||
execute_process(
|
||||
COMMAND git rev-parse --abbrev-ref HEAD
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_BRANCH
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
|
||||
# Get the latest abbreviated commit hash of the working branch
|
||||
execute_process(
|
||||
COMMAND git log -1 --format=%h
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GIT_COMMIT_HASH
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
|
||||
# Set the version information here
|
||||
set(VERSION_INFO_MAJOR_VERSION 0)
|
||||
set(VERSION_INFO_API_COMPAT 0)
|
||||
set(VERSION_INFO_MINOR_VERSION 3)
|
||||
set(VERSION_INFO_MINOR_VERSION 3.git-${GIT_BRANCH}-${GIT_COMMIT_HASH})
|
||||
set(VERSION ${VERSION_INFO_MAJOR_VERSION}.${VERSION_INFO_API_COMPAT}.${VERSION_INFO_MINOR_VERSION})
|
||||
|
||||
file(RELATIVE_PATH RELATIVE_CMAKE_CALL ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
|
||||
########################################################################
|
||||
# Environment setup
|
||||
@ -156,10 +188,16 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
endif(${DARWIN_VERSION} MATCHES "10")
|
||||
endif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
|
||||
|
||||
#select the release build type by default to get optimization flags
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
message(STATUS "Build type not specified: defaulting to Release.")
|
||||
if(ENABLE_GPERFTOOLS)
|
||||
set(CMAKE_BUILD_TYPE "RelWithDebInfo")
|
||||
message(STATUS "Build type not specified: defaulting to RelWithDebInfo.")
|
||||
else(ENABLE_GPERFTOOLS)
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
message(STATUS "Build type not specified: defaulting to Release.")
|
||||
endif(ENABLE_GPERFTOOLS)
|
||||
endif(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "")
|
||||
|
||||
@ -182,6 +220,7 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
endif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
|
||||
endif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
|
||||
|
||||
################################################################################
|
||||
# Googletest - http://code.google.com/p/googletest/
|
||||
################################################################################
|
||||
@ -203,7 +242,6 @@ endif(GTEST_DIR)
|
||||
################################################################################
|
||||
# Boost - http://www.boost.org
|
||||
################################################################################
|
||||
|
||||
if(UNIX AND EXISTS "/usr/lib64")
|
||||
list(APPEND BOOST_LIBRARYDIR "/usr/lib64") # Fedora 64-bit fix
|
||||
endif(UNIX AND EXISTS "/usr/lib64")
|
||||
@ -231,9 +269,7 @@ endif(NOT Boost_FOUND)
|
||||
################################################################################
|
||||
# GNU Radio - http://gnuradio.org/redmine/projects/gnuradio/wiki
|
||||
################################################################################
|
||||
|
||||
find_package(Gnuradio)
|
||||
|
||||
if(NOT GNURADIO_RUNTIME_FOUND)
|
||||
message(STATUS "CMake cannot find GNU Radio >= 3.7")
|
||||
if(OS_IS_LINUX)
|
||||
@ -281,6 +317,40 @@ if(NOT GNURADIO_TRELLIS_FOUND)
|
||||
endif()
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Volk_gnsssdr module
|
||||
#In order to use volk_gnsssr module it is necessary to add:
|
||||
# 1) include_directories(..${VOLK_GNSSSDR_INCLUDE_DIRS}..)
|
||||
# 2) target_link_libraries(..${VOLK_GNSSSDR_LIBRARIES}..)
|
||||
###############################################################################
|
||||
|
||||
if(ENABLE_VOLK_GNSSSDR)
|
||||
message(STATUS "The volk_gnsssdr module with custom protokernels coded by gnss-sdr will be compiled.")
|
||||
message(STATUS "You can disable it with 'cmake -DENABLE_VOLK_GNSSSDR=OFF ../'" )
|
||||
else(ENABLE_VOLK_GNSSSDR)
|
||||
message(STATUS "The volk_gnsssdr module with custom protokernels coded by gnss-sdr is not enabled. Some configurations that use custom protokernels will not work." )
|
||||
message(STATUS "Enable it with 'cmake -D ENABLE_VOLK_GNSSSDR=ON ../'." )
|
||||
endif(ENABLE_VOLK_GNSSSDR)
|
||||
|
||||
if(ENABLE_VOLK_GNSSSDR)
|
||||
set(VOLK_GNSSSDR_BASE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/src/algorithms/libs/volk_gnsssdr)
|
||||
add_subdirectory(${VOLK_GNSSSDR_BASE_PATH})
|
||||
|
||||
set(VOLK_GNSSSDR_INCLUDE_DIRS
|
||||
${VOLK_GNSSSDR_BASE_PATH}/include
|
||||
${CMAKE_CURRENT_BINARY_DIR}/src/algorithms/libs/volk_gnsssdr/include
|
||||
)
|
||||
|
||||
set(VOLK_GNSSSDR_LIBRARIES
|
||||
#Path to libs of volk_gnsssdr target: ${VOLK_GNSSSDR_BASE_PATH}/lib/Debug/libvolk_gnsssdr.dylib
|
||||
volk_gnsssdr
|
||||
)
|
||||
|
||||
message(" * INCLUDES: ${VOLK_GNSSSDR_INCLUDE_DIRS} ")
|
||||
message(" * LIBS: ${VOLK_GNSSSDR_LIBRARIES} ")
|
||||
message("-- END OF: Setup volk_gnsssdr as a subproject.")
|
||||
endif(ENABLE_VOLK_GNSSSDR)
|
||||
|
||||
|
||||
################################################################################
|
||||
# gflags - http://code.google.com/p/gflags/
|
||||
@ -356,7 +426,6 @@ endif(NOT GFlags_FOUND OR LOCAL_GLOG)
|
||||
################################################################################
|
||||
# glog - http://code.google.com/p/google-glog/
|
||||
################################################################################
|
||||
|
||||
find_package(GLOG)
|
||||
set(glog_RELEASE 0.3.3)
|
||||
if (NOT GLOG_FOUND OR LOCAL_GFLAGS)
|
||||
@ -458,97 +527,14 @@ endif(NOT GLOG_FOUND OR LOCAL_GFLAGS)
|
||||
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
# GPerftools - http://code.google.com/p/gperftools/
|
||||
################################################################################
|
||||
|
||||
set(GCC_GPERFTOOLS_FLAGS "")
|
||||
find_package(Gperftools)
|
||||
if ( NOT GPERFTOOLS_FOUND )
|
||||
message(STATUS "The optional library GPerftools has not been found.")
|
||||
else( NOT GPERFTOOLS_FOUND )
|
||||
message (STATUS "GPerftools library found." )
|
||||
link_libraries(${GPERFTOOLS_PROFILER} ${GPERFTOOLS_TCMALLOC})
|
||||
endif( NOT GPERFTOOLS_FOUND )
|
||||
list(APPEND CMAKE_CXX_FLAGS ${GCC_GPERFTOOLS_FLAGS})
|
||||
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
# Doxygen - http://www.stack.nl/~dimitri/doxygen/index.html
|
||||
################################################################################
|
||||
|
||||
find_package(Doxygen)
|
||||
if(DOXYGEN_FOUND)
|
||||
message(STATUS "Doxygen found.")
|
||||
message(STATUS "You can build the documentation with 'make doc'." )
|
||||
message(STATUS "When done, point your browser to ${CMAKE_SOURCE_DIR}/html/index.html")
|
||||
set(HAVE_DOT ${DOXYGEN_DOT_FOUND})
|
||||
file(TO_NATIVE_PATH ${CMAKE_SOURCE_DIR} top_srcdir)
|
||||
file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} top_builddir)
|
||||
find_package(LATEX)
|
||||
if (PDFLATEX_COMPILER)
|
||||
set(GENERATE_PDF_DOCUMENTATION "YES")
|
||||
set(GNSSSDR_USE_MATHJAX "NO")
|
||||
else(PDFLATEX_COMPILER)
|
||||
set(GENERATE_PDF_DOCUMENTATION "NO")
|
||||
set(GNSSSDR_USE_MATHJAX "YES")
|
||||
endif(PDFLATEX_COMPILER)
|
||||
configure_file(${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile.in
|
||||
${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile
|
||||
@ONLY
|
||||
)
|
||||
add_custom_target(doc
|
||||
${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
COMMENT "Generating API documentation with Doxygen." VERBATIM
|
||||
)
|
||||
if(LATEX_COMPILER)
|
||||
message(STATUS "'make pdfmanual' will generate a manual at ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf")
|
||||
add_custom_target(pdfmanual
|
||||
COMMAND ${CMAKE_MAKE_PROGRAM}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy refman.pdf ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf
|
||||
COMMAND ${CMAKE_MAKE_PROGRAM} clean
|
||||
DEPENDS doc
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/docs/latex
|
||||
COMMENT "Generating PDF manual with Doxygen." VERBATIM
|
||||
)
|
||||
endif(LATEX_COMPILER)
|
||||
message(STATUS "'make doc-clean' will clean the documentation.")
|
||||
add_custom_target(doc-clean
|
||||
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/html
|
||||
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/latex
|
||||
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf
|
||||
COMMENT "Cleaning documentation." VERBATIM
|
||||
)
|
||||
else(DOXYGEN_FOUND)
|
||||
message(STATUS " Doxygen has not been found in your system.")
|
||||
message(STATUS " You can get nice code documentation by using it!")
|
||||
message(STATUS " Get it from http://www.stack.nl/~dimitri/doxygen/index.html")
|
||||
if(OS_IS_LINUX)
|
||||
if(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
|
||||
message(" or simply by doing 'sudo yum install doxygen-latex'.")
|
||||
else(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
|
||||
message(" or simply by doing 'sudo apt-get install doxygen-latex'.")
|
||||
endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
|
||||
endif(OS_IS_LINUX)
|
||||
if(OS_IS_MACOSX)
|
||||
message(STATUS " or simply by doing 'sudo port install doxygen +latex'.")
|
||||
endif(OS_IS_MACOSX)
|
||||
endif(DOXYGEN_FOUND)
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
# Armadillo - http://arma.sourceforge.net/
|
||||
################################################################################
|
||||
|
||||
if(OS_IS_LINUX)
|
||||
#############################################
|
||||
#############################################################################
|
||||
# Check that LAPACK is found in the system
|
||||
#############################################
|
||||
# LAPACK is required for matrix decompositions (eg. SVD) and matrix inverse.
|
||||
#############################################################################
|
||||
find_library(LAPACK lapack)
|
||||
if(NOT LAPACK)
|
||||
message(" The LAPACK library has not been found.")
|
||||
@ -562,9 +548,11 @@ if(OS_IS_LINUX)
|
||||
endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
|
||||
message(FATAL_ERROR "LAPACK is required to build gnss-sdr")
|
||||
endif(NOT LAPACK)
|
||||
#############################################
|
||||
#############################################################################
|
||||
# Check that BLAS is found in the system
|
||||
#############################################
|
||||
# BLAS is used for matrix multiplication.
|
||||
# Without BLAS, matrix multiplication will still work, but might be slower.
|
||||
#############################################################################
|
||||
find_library(BLAS blas)
|
||||
if(NOT BLAS)
|
||||
message(" The BLAS library has not been found.")
|
||||
@ -641,31 +629,22 @@ if(NOT ARMADILLO_FOUND)
|
||||
endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
|
||||
message(FATAL_ERROR "The patch command is required to download and build armadillo")
|
||||
endif(NOT PATCH_EXECUTABLE)
|
||||
set(armadillo_RELEASE 4.300.9)
|
||||
set(armadillo_MD5 "d51d1beb2a335f3002702d112c4814f3")
|
||||
set(armadillo_RELEASE 4.400.0)
|
||||
set(armadillo_MD5 "616744dbc96af1c5d6d32c6c69f6fe94")
|
||||
if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}/armadillo-${armadillo_RELEASE}.tar.gz)
|
||||
set(ARMADILLO_PATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_no.patch)
|
||||
file(WRITE ${ARMADILLO_PATCH_FILE} "")
|
||||
set(ARMADILLO_PATCH_FILE2 ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_no2.patch)
|
||||
file(WRITE ${ARMADILLO_PATCH_FILE2} "")
|
||||
else(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}/armadillo-${armadillo_RELEASE}.tar.gz)
|
||||
set(ARMADILLO_PATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_staticlib.patch)
|
||||
set(ARMADILLO_PATCH_FILE2 ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_enable_lapack.patch)
|
||||
set(ARMADILLO_PATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_enable_lapack.patch)
|
||||
file(WRITE ${ARMADILLO_PATCH_FILE}
|
||||
"30c30
|
||||
< set(ARMA_USE_LAPACK false)
|
||||
---
|
||||
> set(ARMA_USE_LAPACK true)
|
||||
312c312
|
||||
< add_library( armadillo SHARED \${PROJECT_SOURCE_DIR}/src/wrapper.cpp )
|
||||
---
|
||||
> add_library( armadillo STATIC \${PROJECT_SOURCE_DIR}/src/wrapper.cpp )
|
||||
")
|
||||
file(WRITE ${ARMADILLO_PATCH_FILE2}
|
||||
"12c12
|
||||
< // #define ARMA_USE_LAPACK
|
||||
---
|
||||
> #define ARMA_USE_LAPACK
|
||||
> #define ARMA_USE_LAPACK
|
||||
19c19
|
||||
< // #define ARMA_USE_BLAS
|
||||
---
|
||||
> #define ARMA_USE_BLAS
|
||||
")
|
||||
endif(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}/armadillo-${armadillo_RELEASE}.tar.gz)
|
||||
ExternalProject_Add(
|
||||
@ -673,9 +652,9 @@ if(NOT ARMADILLO_FOUND)
|
||||
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}
|
||||
URL http://sourceforge.net/projects/arma/files/armadillo-${armadillo_RELEASE}.tar.gz
|
||||
DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}
|
||||
URL_MD5 ${armadillo_MD5}
|
||||
PATCH_COMMAND patch -N <BINARY_DIR>/CMakeLists.txt ${ARMADILLO_PATCH_FILE} && patch -N <BINARY_DIR>/include/armadillo_bits/config.hpp ${ARMADILLO_PATCH_FILE2}
|
||||
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
URL_MD5 ${armadillo_MD5}
|
||||
PATCH_COMMAND patch -N <BINARY_DIR>/include/armadillo_bits/config.hpp ${ARMADILLO_PATCH_FILE}
|
||||
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DBUILD_SHARED_LIBS=OFF
|
||||
BUILD_IN_SOURCE 1
|
||||
BUILD_COMMAND make
|
||||
UPDATE_COMMAND ""
|
||||
@ -686,7 +665,14 @@ if(NOT ARMADILLO_FOUND)
|
||||
ExternalProject_Get_Property(armadillo-${armadillo_RELEASE} binary_dir)
|
||||
set(ARMADILLO_INCLUDE_DIRS ${binary_dir}/include )
|
||||
find_library(LAPACK NAMES lapack HINTS /usr/lib /usr/local/lib /usr/lib64)
|
||||
set(ARMADILLO_LIBRARIES ${LAPACK} ${GFORTRAN} ${binary_dir}/${CMAKE_FIND_LIBRARY_PREFIXES}armadillo.a)
|
||||
if(OS_IS_MACOSX)
|
||||
find_library(BLAS blas)
|
||||
endif(OS_IS_MACOSX)
|
||||
find_package(OpenBLAS)
|
||||
if(OPENBLAS_FOUND)
|
||||
set(BLAS ${OPENBLAS})
|
||||
endif(OPENBLAS_FOUND)
|
||||
set(ARMADILLO_LIBRARIES ${BLAS} ${LAPACK} ${GFORTRAN} ${binary_dir}/${CMAKE_FIND_LIBRARY_PREFIXES}armadillo.a)
|
||||
set(LOCAL_ARMADILLO true CACHE STRING "Armadillo downloaded and built automatically" FORCE)
|
||||
# Save a copy at the thirdparty folder
|
||||
file(COPY ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}
|
||||
@ -700,27 +686,6 @@ endif(NOT ARMADILLO_FOUND)
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
# OpenCL
|
||||
###############################################################################
|
||||
find_package(OpenCL)
|
||||
if($ENV{DISABLE_OPENCL})
|
||||
set(DISABLE_OPENCL TRUE)
|
||||
endif($ENV{DISABLE_OPENCL})
|
||||
if(DISABLE_OPENCL)
|
||||
set(OPENCL_FOUND FALSE)
|
||||
else(DISABLE_OPENCL)
|
||||
if(OPENCL_FOUND)
|
||||
message(STATUS "OpenCL has been found and will be used by some processing blocks")
|
||||
message(STATUS "You can disable OpenCL use by doing 'cmake -DDISABLE_OPENCL=1 ../' ")
|
||||
endif(OPENCL_FOUND)
|
||||
endif(DISABLE_OPENCL)
|
||||
if(NOT OPENCL_FOUND)
|
||||
message(STATUS "Processing blocks using OpenCL will not be built.")
|
||||
endif(NOT OPENCL_FOUND)
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
# OpenSSL - http://www.openssl.org
|
||||
################################################################################
|
||||
@ -742,41 +707,173 @@ if(NOT OPENSSL_FOUND)
|
||||
endif(NOT OPENSSL_FOUND)
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
# Doxygen - http://www.stack.nl/~dimitri/doxygen/index.html (OPTIONAL)
|
||||
################################################################################
|
||||
find_package(Doxygen)
|
||||
if(DOXYGEN_FOUND)
|
||||
message(STATUS "Doxygen found.")
|
||||
message(STATUS "You can build the documentation with 'make doc'." )
|
||||
message(STATUS "When done, point your browser to ${CMAKE_SOURCE_DIR}/html/index.html")
|
||||
set(HAVE_DOT ${DOXYGEN_DOT_FOUND})
|
||||
file(TO_NATIVE_PATH ${CMAKE_SOURCE_DIR} top_srcdir)
|
||||
file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} top_builddir)
|
||||
find_package(LATEX)
|
||||
if (PDFLATEX_COMPILER)
|
||||
set(GENERATE_PDF_DOCUMENTATION "YES")
|
||||
set(GNSSSDR_USE_MATHJAX "NO")
|
||||
else(PDFLATEX_COMPILER)
|
||||
set(GENERATE_PDF_DOCUMENTATION "NO")
|
||||
set(GNSSSDR_USE_MATHJAX "YES")
|
||||
endif(PDFLATEX_COMPILER)
|
||||
configure_file(${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile.in
|
||||
${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile
|
||||
@ONLY
|
||||
)
|
||||
add_custom_target(doc
|
||||
${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
COMMENT "Generating API documentation with Doxygen." VERBATIM
|
||||
)
|
||||
if(LATEX_COMPILER)
|
||||
message(STATUS "'make pdfmanual' will generate a manual at ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf")
|
||||
add_custom_target(pdfmanual
|
||||
COMMAND ${CMAKE_MAKE_PROGRAM}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy refman.pdf ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf
|
||||
COMMAND ${CMAKE_MAKE_PROGRAM} clean
|
||||
DEPENDS doc
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/docs/latex
|
||||
COMMENT "Generating PDF manual with Doxygen." VERBATIM
|
||||
)
|
||||
endif(LATEX_COMPILER)
|
||||
message(STATUS "'make doc-clean' will clean the documentation.")
|
||||
add_custom_target(doc-clean
|
||||
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/html
|
||||
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/latex
|
||||
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf
|
||||
COMMENT "Cleaning documentation." VERBATIM
|
||||
)
|
||||
else(DOXYGEN_FOUND)
|
||||
message(STATUS " Doxygen has not been found in your system.")
|
||||
message(STATUS " You can get nice code documentation by using it!")
|
||||
message(STATUS " Get it from http://www.stack.nl/~dimitri/doxygen/index.html")
|
||||
if(OS_IS_LINUX)
|
||||
if(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
|
||||
message(" or simply by doing 'sudo yum install doxygen-latex'.")
|
||||
else(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
|
||||
message(" or simply by doing 'sudo apt-get install doxygen-latex'.")
|
||||
endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
|
||||
endif(OS_IS_LINUX)
|
||||
if(OS_IS_MACOSX)
|
||||
message(STATUS " or simply by doing 'sudo port install doxygen +latex'.")
|
||||
endif(OS_IS_MACOSX)
|
||||
endif(DOXYGEN_FOUND)
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
# OpenCL (OPTIONAL)
|
||||
###############################################################################
|
||||
if(ENABLE_OPENCL)
|
||||
find_package(OpenCL)
|
||||
if($ENV{DISABLE_OPENCL})
|
||||
set(DISABLE_OPENCL TRUE)
|
||||
endif($ENV{DISABLE_OPENCL})
|
||||
if(DISABLE_OPENCL)
|
||||
set(OPENCL_FOUND FALSE)
|
||||
else(DISABLE_OPENCL)
|
||||
if(OPENCL_FOUND)
|
||||
message(STATUS "OpenCL has been found and will be used by some processing blocks")
|
||||
message(STATUS "You can disable OpenCL use by doing 'cmake -DENABLE_OPENCL=OFF ../' ")
|
||||
endif(OPENCL_FOUND)
|
||||
endif(DISABLE_OPENCL)
|
||||
if(ENABLE_GENERIC_ARCH)
|
||||
set(OPENCL_FOUND FALSE)
|
||||
message(STATUS "ENABLE_GENERIC_ARCH is set to ON so the use of OpenCL has been disabled.")
|
||||
endif(ENABLE_GENERIC_ARCH)
|
||||
if(NOT OPENCL_FOUND)
|
||||
message(STATUS "Processing blocks using OpenCL will not be built.")
|
||||
endif(NOT OPENCL_FOUND)
|
||||
else(ENABLE_OPENCL)
|
||||
set(OPENCL_FOUND FALSE)
|
||||
endif(ENABLE_OPENCL)
|
||||
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
# GPerftools - http://code.google.com/p/gperftools/ (OPTIONAL)
|
||||
################################################################################
|
||||
|
||||
if(ENABLE_GPERFTOOLS)
|
||||
find_package(Gperftools)
|
||||
if ( NOT GPERFTOOLS_FOUND )
|
||||
message(STATUS "Although ENABLE_GPERFTOOLS has been set to ON, GPerftools has not been found.")
|
||||
message(STATUS "Binaries will be compiled without 'tcmalloc' and 'profiler' libraries.")
|
||||
message(STATUS "You can install GPerftools from http://code.google.com/p/gperftools/")
|
||||
else( NOT GPERFTOOLS_FOUND )
|
||||
message(STATUS "GPerftools libraries found." )
|
||||
message(STATUS "Binaries will be compiled with 'tcmalloc' and 'profiler' libraries.")
|
||||
endif( NOT GPERFTOOLS_FOUND )
|
||||
endif(ENABLE_GPERFTOOLS)
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
# Setup of optional drivers
|
||||
################################################################################
|
||||
if( $ENV{GN3S_DRIVER} )
|
||||
message(STATUS "GN3S_DRIVER variable found." )
|
||||
# copy firmware to install folder
|
||||
# Build project gr-gn3s
|
||||
else( $ENV{GN3S_DRIVER} )
|
||||
if( GN3S_DRIVER )
|
||||
message(STATUS "GN3S driver will be compiled")
|
||||
else( GNSS_DRIVER )
|
||||
message(STATUS "GN3S_DRIVER is not defined." )
|
||||
message(STATUS "Define it with 'export GN3S_DRIVER=1' to add support for the GN3S dongle." )
|
||||
endif( GN3S_DRIVER )
|
||||
endif($ENV{GN3S_DRIVER} )
|
||||
|
||||
if( $ENV{RAW_ARRAY_DRIVER} )
|
||||
message(STATUS "RAW_ARRAY_DRIVER variable found." )
|
||||
if($ENV{GN3S_DRIVER})
|
||||
message(STATUS "GN3S_DRIVER environment variable found." )
|
||||
set(ENABLE_GN3S ON)
|
||||
endif($ENV{GN3S_DRIVER})
|
||||
if(GN3S_DRIVER)
|
||||
set(ENABLE_GN3S ON)
|
||||
endif(GN3S_DRIVER)
|
||||
if(ENABLE_GN3S)
|
||||
message(STATUS "The GN3S driver will be compiled.")
|
||||
message(STATUS "You can disable it with 'cmake -DENABLE_GN3S=OFF ../'" )
|
||||
else(ENABLE_GN3S)
|
||||
message(STATUS "The (optional and experimental) GN3S driver is not enabled." )
|
||||
message(STATUS "Enable it with 'cmake -DENABLE_GN3S=ON ../' to add support for the GN3S dongle." )
|
||||
endif(ENABLE_GN3S)
|
||||
|
||||
|
||||
if($ENV{RAW_ARRAY_DRIVER})
|
||||
message(STATUS "RAW_ARRAY_DRIVER environment variable found." )
|
||||
set(ENABLE_ARRAY ON)
|
||||
endif($ENV{RAW_ARRAY_DRIVER})
|
||||
if(RAW_ARRAY_DRIVER)
|
||||
set(ENABLE_ARRAY ON)
|
||||
endif(RAW_ARRAY_DRIVER)
|
||||
if(ENABLE_ARRAY)
|
||||
message(STATUS "CTTC's Antenna Array front-end driver will be compiled." )
|
||||
message(STATUS "You can disable it with 'cmake -DENABLE_ARRAY=OFF ../'" )
|
||||
# copy firmware to install folder
|
||||
# Build project gr-dbfcttc
|
||||
else( $ENV{RAW_ARRAY_DRIVER} )
|
||||
if( RAW_ARRAY_DRIVER )
|
||||
message(STATUS "RAW_ARRAY_DRIVER driver will be compiled")
|
||||
else( RAW_ARRAY_DRIVER )
|
||||
message(STATUS "RAW_ARRAY_DRIVER is not defined." )
|
||||
message(STATUS "Define it with 'export RAW_ARRAY_DRIVER=1' to add support for the CTTC experimental array front-end." )
|
||||
endif( RAW_ARRAY_DRIVER )
|
||||
endif($ENV{RAW_ARRAY_DRIVER} )
|
||||
else(ENABLE_ARRAY)
|
||||
message(STATUS "The (optional) CTTC's Antenna Array front-end driver is not enabled." )
|
||||
message(STATUS "Enable it with 'cmake -DENABLE_ARRAY=ON ../' to add support for the CTTC experimental array front-end." )
|
||||
endif(ENABLE_ARRAY)
|
||||
|
||||
if( $ENV{RTLSDR_DRIVER} )
|
||||
message(STATUS "RTLSDR_DRIVER variable found." )
|
||||
|
||||
if($ENV{RTLSDR_DRIVER})
|
||||
message(STATUS "RTLSDR_DRIVER environment variable found." )
|
||||
set(ENABLE_RTLSDR ON)
|
||||
endif($ENV{RTLSDR_DRIVER})
|
||||
if(RAW_ARRAY_DRIVER)
|
||||
set(ENABLE_RTLSDR ON)
|
||||
endif(RAW_ARRAY_DRIVER)
|
||||
if(ENABLE_RTLSDR)
|
||||
message(STATUS "The driver for RTL-based dongles will be compiled." )
|
||||
message(STATUS "You can disable it with 'cmake -DENABLE_RTLSDR=OFF ../'" )
|
||||
# find libosmosdr (done in src/algorithms/signal_sources/adapters)
|
||||
# find gr-osmosdr (done in src/algorithms/signal_sources/adapters)
|
||||
endif($ENV{RTLSDR_DRIVER} )
|
||||
|
||||
else(ENABLE_RTLSDR)
|
||||
message(STATUS "The (optional) driver for RTL-based dongles is not enabled." )
|
||||
message(STATUS "Enable it with 'cmake -DENABLE_RTLSDR=ON ../' to add support for Realtek's RTL2832U-based USB dongles." )
|
||||
endif(ENABLE_RTLSDR)
|
||||
|
||||
|
||||
########################################################################
|
||||
@ -802,7 +899,11 @@ if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
|
||||
if(OS_IS_MACOSX)
|
||||
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -march=corei7 -mfpmath=sse")
|
||||
else(OS_IS_MACOSX)
|
||||
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -march=native -mfpmath=sse")
|
||||
if(ENABLE_GENERIC_ARCH)
|
||||
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -mtune=generic")
|
||||
else(ENABLE_GENERIC_ARCH)
|
||||
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -march=native -mfpmath=sse")
|
||||
endif(ENABLE_GENERIC_ARCH)
|
||||
endif(OS_IS_MACOSX)
|
||||
endif(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
|
||||
|
||||
@ -811,13 +912,18 @@ if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
|
||||
add_definitions(-fvisibility=hidden)
|
||||
endif()
|
||||
|
||||
# Set GPerftools related flags if it is available
|
||||
# See http://gperftools.googlecode.com/svn/trunk/README
|
||||
if(GPERFTOOLS_FOUND)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
|
||||
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free")
|
||||
endif(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
|
||||
endif(GPERFTOOLS_FOUND)
|
||||
if(ENABLE_GPERFTOOLS)
|
||||
# Set GPerftools related flags if it is available
|
||||
# See http://gperftools.googlecode.com/svn/trunk/README
|
||||
if(GPERFTOOLS_FOUND)
|
||||
if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
|
||||
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free")
|
||||
endif(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -fno-builtin")
|
||||
endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
endif(GPERFTOOLS_FOUND)
|
||||
endif(ENABLE_GPERFTOOLS)
|
||||
|
||||
list(APPEND CMAKE_CXX_FLAGS ${MY_CXX_FLAGS})
|
||||
|
||||
|
183
src/algorithms/libs/volk_gnsssdr/CMakeLists.txt
Normal file
183
src/algorithms/libs/volk_gnsssdr/CMakeLists.txt
Normal file
@ -0,0 +1,183 @@
|
||||
#
|
||||
# Copyright 2011 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
########################################################################
|
||||
# Project setup
|
||||
########################################################################
|
||||
cmake_minimum_required(VERSION 2.6)
|
||||
if(NOT DEFINED CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "Choose build type: None Debug Release RelWithDebInfo MinSizeRel")
|
||||
project(volk_gnsssdr)
|
||||
enable_language(CXX)
|
||||
enable_language(C)
|
||||
enable_testing()
|
||||
set(VERSION 0.1)
|
||||
set(LIBVER 0.0.0)
|
||||
|
||||
set(CMAKE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) #allows this to be a sub-project
|
||||
set(CMAKE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) #allows this to be a sub-project
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) #location for custom "Modules"
|
||||
|
||||
########################################################################
|
||||
# Environment setup
|
||||
########################################################################
|
||||
IF(NOT DEFINED BOOST_ROOT)
|
||||
SET(BOOST_ROOT ${CMAKE_INSTALL_PREFIX})
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED CROSSCOMPILE_MULTILIB)
|
||||
SET(CROSSCOMPILE_MULTILIB "")
|
||||
ENDIF()
|
||||
SET(CROSSCOMPILE_MULTILIB ${CROSSCOMPILE_MULTILIB} CACHE STRING "Define \"true\" if you have and want to use multiple C development libs installed for cross compile")
|
||||
|
||||
|
||||
########################################################################
|
||||
# Dependencies setup
|
||||
########################################################################
|
||||
include(GrPython) #sets PYTHON_EXECUTABLE and PYTHON_DASH_B
|
||||
VOLK_PYTHON_CHECK_MODULE("python >= 2.5" sys "sys.version.split()[0] >= '2.5'" PYTHON_MIN_VER_FOUND)
|
||||
VOLK_PYTHON_CHECK_MODULE("Cheetah >= 2.0.0" Cheetah "Cheetah.Version >= '2.0.0'" CHEETAH_FOUND)
|
||||
|
||||
if(NOT PYTHON_MIN_VER_FOUND)
|
||||
message(FATAL_ERROR "Python 2.5 or greater required to build VOLK")
|
||||
endif()
|
||||
|
||||
if(NOT CHEETAH_FOUND)
|
||||
message(FATAL_ERROR "Cheetah templates required to build VOLK")
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
if (NOT DEFINED BOOST_ALL_DYN_LINK)
|
||||
set(BOOST_ALL_DYN_LINK TRUE)
|
||||
endif()
|
||||
set(BOOST_ALL_DYN_LINK "${BOOST_ALL_DYN_LINK}" CACHE BOOL "boost enable dynamic linking")
|
||||
if(BOOST_ALL_DYN_LINK)
|
||||
add_definitions(-DBOOST_ALL_DYN_LINK) #setup boost auto-linking in msvc
|
||||
else(BOOST_ALL_DYN_LINK)
|
||||
unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link
|
||||
endif(BOOST_ALL_DYN_LINK)
|
||||
endif(MSVC)
|
||||
include(VolkBoost)
|
||||
|
||||
if(NOT Boost_FOUND)
|
||||
message(FATAL_ERROR "VOLK Requires boost to build")
|
||||
endif()
|
||||
|
||||
option(ENABLE_ORC "Enable Orc" True)
|
||||
if(ENABLE_ORC)
|
||||
find_package(ORC)
|
||||
else(ENABLE_ORC)
|
||||
message(STATUS "Disabling use of ORC")
|
||||
endif(ENABLE_ORC)
|
||||
|
||||
########################################################################
|
||||
# Setup the package config file
|
||||
########################################################################
|
||||
#set variables found in the pc.in file
|
||||
set(prefix ${CMAKE_INSTALL_PREFIX})
|
||||
set(exec_prefix "\${prefix}")
|
||||
set(libdir "\${exec_prefix}/lib${LIB_SUFFIX}")
|
||||
set(includedir "\${prefix}/include")
|
||||
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr.pc.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.pc
|
||||
@ONLY)
|
||||
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.pc
|
||||
DESTINATION lib${LIB_SUFFIX}/pkgconfig
|
||||
COMPONENT "volk_gnsssdr_devel"
|
||||
)
|
||||
|
||||
########################################################################
|
||||
# Install all headers in the include directories
|
||||
########################################################################
|
||||
set(VOLK_RUNTIME_DIR bin)
|
||||
set(VOLK_LIBRARY_DIR lib${LIB_SUFFIX})
|
||||
set(VOLK_INCLUDE_DIR include)
|
||||
|
||||
install(
|
||||
DIRECTORY ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr
|
||||
DESTINATION include COMPONENT "volk_gnsssdr_devel"
|
||||
FILES_MATCHING PATTERN "*.h"
|
||||
)
|
||||
|
||||
install(FILES
|
||||
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_prefs.h
|
||||
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_complex.h
|
||||
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_common.h
|
||||
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h
|
||||
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h
|
||||
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h
|
||||
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_typedefs.h
|
||||
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_malloc.h
|
||||
DESTINATION include/volk_gnsssdr
|
||||
COMPONENT "volk_gnsssdr_devel"
|
||||
)
|
||||
|
||||
########################################################################
|
||||
# Install cmake search routine for external use
|
||||
########################################################################
|
||||
|
||||
if(NOT CMAKE_MODULES_DIR)
|
||||
set(CMAKE_MODULES_DIR lib${LIB_SUFFIX}/cmake)
|
||||
endif(NOT CMAKE_MODULES_DIR)
|
||||
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/VolkConfig.cmake
|
||||
DESTINATION ${CMAKE_MODULES_DIR}/volk_gnsssdr
|
||||
COMPONENT "volk_gnsssdr_devel"
|
||||
)
|
||||
|
||||
########################################################################
|
||||
# On Apple only, set install name and use rpath correctly, if not already set
|
||||
########################################################################
|
||||
if(APPLE)
|
||||
if(NOT CMAKE_INSTALL_NAME_DIR)
|
||||
set(CMAKE_INSTALL_NAME_DIR
|
||||
${CMAKE_INSTALL_PREFIX}/${GR_LIBRARY_DIR} CACHE
|
||||
PATH "Library Install Name Destination Directory" FORCE)
|
||||
endif(NOT CMAKE_INSTALL_NAME_DIR)
|
||||
if(NOT CMAKE_INSTALL_RPATH)
|
||||
set(CMAKE_INSTALL_RPATH
|
||||
${CMAKE_INSTALL_PREFIX}/${GR_LIBRARY_DIR} CACHE
|
||||
PATH "Library Install RPath" FORCE)
|
||||
endif(NOT CMAKE_INSTALL_RPATH)
|
||||
if(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
|
||||
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON CACHE
|
||||
BOOL "Do Build Using Library Install RPath" FORCE)
|
||||
endif(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
|
||||
endif(APPLE)
|
||||
|
||||
########################################################################
|
||||
# Setup the library
|
||||
########################################################################
|
||||
add_subdirectory(lib)
|
||||
|
||||
########################################################################
|
||||
# And the utility apps
|
||||
########################################################################
|
||||
add_subdirectory(apps)
|
||||
add_subdirectory(python/volk_gnsssdr_modtool)
|
||||
|
||||
########################################################################
|
||||
# Print summary
|
||||
########################################################################
|
||||
message(STATUS "Using install prefix: ${CMAKE_INSTALL_PREFIX}")
|
61
src/algorithms/libs/volk_gnsssdr/apps/CMakeLists.txt
Normal file
61
src/algorithms/libs/volk_gnsssdr/apps/CMakeLists.txt
Normal file
@ -0,0 +1,61 @@
|
||||
#
|
||||
# Copyright 2011-2013 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
########################################################################
|
||||
# Setup profiler
|
||||
########################################################################
|
||||
if(Boost_FOUND)
|
||||
|
||||
if(MSVC)
|
||||
include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc)
|
||||
endif(MSVC)
|
||||
|
||||
include_directories(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
${CMAKE_SOURCE_DIR}/include
|
||||
${CMAKE_BINARY_DIR}/include
|
||||
${CMAKE_SOURCE_DIR}/lib
|
||||
${CMAKE_BINARY_DIR}/lib
|
||||
${Boost_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
# MAKE volk_gnsssdr_profile
|
||||
add_executable(volk_gnsssdr_profile
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_profile.cc
|
||||
${CMAKE_SOURCE_DIR}/lib/qa_utils.cc
|
||||
)
|
||||
|
||||
target_link_libraries(volk_gnsssdr_profile volk_gnsssdr ${Boost_LIBRARIES})
|
||||
|
||||
install(
|
||||
TARGETS volk_gnsssdr_profile
|
||||
DESTINATION bin
|
||||
COMPONENT "volk_gnsssdr"
|
||||
)
|
||||
|
||||
# MAKE volk_gnsssdr-config-info
|
||||
add_executable(volk_gnsssdr-config-info volk_gnsssdr-config-info.cc)
|
||||
target_link_libraries(volk_gnsssdr-config-info volk_gnsssdr ${Boost_LIBRARIES})
|
||||
|
||||
install(
|
||||
TARGETS volk_gnsssdr-config-info
|
||||
DESTINATION bin
|
||||
COMPONENT "volk_gnsssdr"
|
||||
)
|
||||
|
||||
endif(Boost_FOUND)
|
@ -0,0 +1,96 @@
|
||||
/* -*- c++ -*- */
|
||||
/*
|
||||
* Copyright 2013 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include <volk_gnsssdr/constants.h>
|
||||
#include "volk_gnsssdr/volk_gnsssdr.h"
|
||||
#include <boost/program_options.hpp>
|
||||
#include <iostream>
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
po::options_description desc("Program options: volk_gnsssdr-config-info [options]");
|
||||
po::variables_map vm;
|
||||
|
||||
desc.add_options()
|
||||
("help,h", "print help message")
|
||||
("prefix", "print VOLK installation prefix")
|
||||
("builddate", "print VOLK build date (RFC2822 format)")
|
||||
("cc", "print VOLK C compiler version")
|
||||
("cflags", "print VOLK CFLAGS")
|
||||
("all-machines", "print VOLK machines built into library")
|
||||
("avail-machines", "print VOLK machines the current platform can use")
|
||||
("machine", "print the VOLK machine that will be used")
|
||||
("version,v", "print VOLK version")
|
||||
;
|
||||
|
||||
try {
|
||||
po::store(po::parse_command_line(argc, argv, desc), vm);
|
||||
po::notify(vm);
|
||||
}
|
||||
catch (po::error& error){
|
||||
std::cerr << "Error: " << error.what() << std::endl << std::endl;
|
||||
std::cerr << desc << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(vm.size() == 0 || vm.count("help")) {
|
||||
std::cout << desc << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(vm.count("prefix"))
|
||||
std::cout << volk_gnsssdr_prefix() << std::endl;
|
||||
|
||||
if(vm.count("builddate"))
|
||||
std::cout << volk_gnsssdr_build_date() << std::endl;
|
||||
|
||||
if(vm.count("version"))
|
||||
std::cout << volk_gnsssdr_version() << std::endl;
|
||||
|
||||
if(vm.count("cc"))
|
||||
std::cout << volk_gnsssdr_c_compiler() << std::endl;
|
||||
|
||||
if(vm.count("cflags"))
|
||||
std::cout << volk_gnsssdr_compiler_flags() << std::endl;
|
||||
|
||||
// stick an extra ';' to make output of this and avail-machines the
|
||||
// same structure for easier parsing
|
||||
if(vm.count("all-machines"))
|
||||
std::cout << volk_gnsssdr_available_machines() << ";" << std::endl;
|
||||
|
||||
if(vm.count("avail-machines")) {
|
||||
volk_gnsssdr_list_machines();
|
||||
}
|
||||
|
||||
if(vm.count("machine")) {
|
||||
std::cout << volk_gnsssdr_get_machine() << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
163
src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr_profile.cc
Normal file
163
src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr_profile.cc
Normal file
@ -0,0 +1,163 @@
|
||||
/* -*- c++ -*- */
|
||||
/*
|
||||
* Copyright 2012-2014 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "qa_utils.h"
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_prefs.h>
|
||||
|
||||
#include <ciso646>
|
||||
#include <vector>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <boost/program_options.hpp>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
namespace fs = boost::filesystem;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// Adding program options
|
||||
boost::program_options::options_description desc("Options");
|
||||
desc.add_options()
|
||||
("help,h", "Print help messages")
|
||||
("benchmark,b",
|
||||
boost::program_options::value<bool>()->default_value( false )
|
||||
->implicit_value( true ),
|
||||
"Run all kernels (benchmark mode)")
|
||||
("tests-regex,R",
|
||||
boost::program_options::value<std::string>(),
|
||||
"Run tests matching regular expression.")
|
||||
;
|
||||
|
||||
// Handle the options that were given
|
||||
boost::program_options::variables_map vm;
|
||||
bool benchmark_mode;
|
||||
std::string kernel_regex;
|
||||
bool store_results = true;
|
||||
try {
|
||||
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
|
||||
boost::program_options::notify(vm);
|
||||
benchmark_mode = vm.count("benchmark")?vm["benchmark"].as<bool>():false;
|
||||
if ( vm.count("tests-regex" ) ) {
|
||||
kernel_regex = vm["tests-regex"].as<std::string>();
|
||||
store_results = false;
|
||||
std::cout << "Warning: using a regexp will not save results to a config" << std::endl;
|
||||
}
|
||||
else {
|
||||
kernel_regex = ".*";
|
||||
store_results = true;
|
||||
}
|
||||
} catch (boost::program_options::error& error) {
|
||||
std::cerr << "Error: " << error.what() << std::endl << std::endl;
|
||||
std::cerr << desc << std::endl;
|
||||
return 1;
|
||||
}
|
||||
/** --help option
|
||||
*/
|
||||
if ( vm.count("help") )
|
||||
{
|
||||
std::cout << "The VOLK profiler." << std::endl
|
||||
<< desc << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// Run tests
|
||||
std::vector<std::string> results;
|
||||
|
||||
//VOLK_PROFILE(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_16i_max_star_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_32u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_64u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
|
||||
//VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, lv_32fc_t(1.0, 0.5), 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
|
||||
// Until we can update the config on a kernel by kernel basis
|
||||
// do not overwrite volk_gnsssdr_config when using a regex.
|
||||
|
||||
//GNSS-SDR PROTO-KERNELS
|
||||
//lv_32fc_t sfv = lv_cmake((float)1, (float)2);
|
||||
//VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, sfv, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_16i_s32f_convert_32f, 1e-4, 32768.0, 204602, 10000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_32f_accumulator_s32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_32f_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8i_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8i_max_s8i, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_32f_x2_add_32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_32fc_conjugate_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_32fc_magnitude_squared_32f, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_32fc_x2_dot_prod_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_32fc_x2_multiply_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
VOLK_PROFILE(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||
if(store_results) {
|
||||
char path[1024];
|
||||
volk_gnsssdr_get_config_path(path);
|
||||
|
||||
const fs::path config_path(path);
|
||||
|
||||
if (not fs::exists(config_path.branch_path()))
|
||||
{
|
||||
std::cout << "Creating " << config_path.branch_path() << "..." << std::endl;
|
||||
fs::create_directories(config_path.branch_path());
|
||||
}
|
||||
|
||||
std::cout << "Writing " << config_path << "..." << std::endl;
|
||||
std::ofstream config(config_path.string().c_str());
|
||||
if(!config.is_open()) { //either we don't have write access or we don't have the dir yet
|
||||
std::cout << "Error opening file " << config_path << std::endl;
|
||||
}
|
||||
|
||||
config << "\
|
||||
#this file is generated by volk_gnsssdr_profile.\n\
|
||||
#the function name is followed by the preferred architecture.\n\
|
||||
";
|
||||
|
||||
BOOST_FOREACH(std::string result, results) {
|
||||
config << result << std::endl;
|
||||
}
|
||||
config.close();
|
||||
}
|
||||
else {
|
||||
std::cout << "Warning: config not generated" << std::endl;
|
||||
}
|
||||
}
|
@ -0,0 +1,138 @@
|
||||
# CMAKE_PARSE_ARGUMENTS(<prefix> <options> <one_value_keywords> <multi_value_keywords> args...)
|
||||
#
|
||||
# CMAKE_PARSE_ARGUMENTS() is intended to be used in macros or functions for
|
||||
# parsing the arguments given to that macro or function.
|
||||
# It processes the arguments and defines a set of variables which hold the
|
||||
# values of the respective options.
|
||||
#
|
||||
# The <options> argument contains all options for the respective macro,
|
||||
# i.e. keywords which can be used when calling the macro without any value
|
||||
# following, like e.g. the OPTIONAL keyword of the install() command.
|
||||
#
|
||||
# The <one_value_keywords> argument contains all keywords for this macro
|
||||
# which are followed by one value, like e.g. DESTINATION keyword of the
|
||||
# install() command.
|
||||
#
|
||||
# The <multi_value_keywords> argument contains all keywords for this macro
|
||||
# which can be followed by more than one value, like e.g. the TARGETS or
|
||||
# FILES keywords of the install() command.
|
||||
#
|
||||
# When done, CMAKE_PARSE_ARGUMENTS() will have defined for each of the
|
||||
# keywords listed in <options>, <one_value_keywords> and
|
||||
# <multi_value_keywords> a variable composed of the given <prefix>
|
||||
# followed by "_" and the name of the respective keyword.
|
||||
# These variables will then hold the respective value from the argument list.
|
||||
# For the <options> keywords this will be TRUE or FALSE.
|
||||
#
|
||||
# All remaining arguments are collected in a variable
|
||||
# <prefix>_UNPARSED_ARGUMENTS, this can be checked afterwards to see whether
|
||||
# your macro was called with unrecognized parameters.
|
||||
#
|
||||
# As an example here a my_install() macro, which takes similar arguments as the
|
||||
# real install() command:
|
||||
#
|
||||
# function(MY_INSTALL)
|
||||
# set(options OPTIONAL FAST)
|
||||
# set(oneValueArgs DESTINATION RENAME)
|
||||
# set(multiValueArgs TARGETS CONFIGURATIONS)
|
||||
# cmake_parse_arguments(MY_INSTALL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
|
||||
# ...
|
||||
#
|
||||
# Assume my_install() has been called like this:
|
||||
# my_install(TARGETS foo bar DESTINATION bin OPTIONAL blub)
|
||||
#
|
||||
# After the cmake_parse_arguments() call the macro will have set the following
|
||||
# variables:
|
||||
# MY_INSTALL_OPTIONAL = TRUE
|
||||
# MY_INSTALL_FAST = FALSE (this option was not used when calling my_install()
|
||||
# MY_INSTALL_DESTINATION = "bin"
|
||||
# MY_INSTALL_RENAME = "" (was not used)
|
||||
# MY_INSTALL_TARGETS = "foo;bar"
|
||||
# MY_INSTALL_CONFIGURATIONS = "" (was not used)
|
||||
# MY_INSTALL_UNPARSED_ARGUMENTS = "blub" (no value expected after "OPTIONAL"
|
||||
#
|
||||
# You can the continue and process these variables.
|
||||
#
|
||||
# Keywords terminate lists of values, e.g. if directly after a one_value_keyword
|
||||
# another recognized keyword follows, this is interpreted as the beginning of
|
||||
# the new option.
|
||||
# E.g. my_install(TARGETS foo DESTINATION OPTIONAL) would result in
|
||||
# MY_INSTALL_DESTINATION set to "OPTIONAL", but MY_INSTALL_DESTINATION would
|
||||
# be empty and MY_INSTALL_OPTIONAL would be set to TRUE therefor.
|
||||
|
||||
#=============================================================================
|
||||
# Copyright 2010 Alexander Neundorf <neundorf@kde.org>
|
||||
#
|
||||
# Distributed under the OSI-approved BSD License (the "License");
|
||||
# see accompanying file Copyright.txt for details.
|
||||
#
|
||||
# This software is distributed WITHOUT ANY WARRANTY; without even the
|
||||
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the License for more information.
|
||||
#=============================================================================
|
||||
# (To distribute this file outside of CMake, substitute the full
|
||||
# License text for the above reference.)
|
||||
|
||||
|
||||
if(__CMAKE_PARSE_ARGUMENTS_INCLUDED)
|
||||
return()
|
||||
endif()
|
||||
set(__CMAKE_PARSE_ARGUMENTS_INCLUDED TRUE)
|
||||
|
||||
|
||||
function(CMAKE_PARSE_ARGUMENTS prefix _optionNames _singleArgNames _multiArgNames)
|
||||
# first set all result variables to empty/FALSE
|
||||
foreach(arg_name ${_singleArgNames} ${_multiArgNames})
|
||||
set(${prefix}_${arg_name})
|
||||
endforeach(arg_name)
|
||||
|
||||
foreach(option ${_optionNames})
|
||||
set(${prefix}_${option} FALSE)
|
||||
endforeach(option)
|
||||
|
||||
set(${prefix}_UNPARSED_ARGUMENTS)
|
||||
|
||||
set(insideValues FALSE)
|
||||
set(currentArgName)
|
||||
|
||||
# now iterate over all arguments and fill the result variables
|
||||
foreach(currentArg ${ARGN})
|
||||
list(FIND _optionNames "${currentArg}" optionIndex) # ... then this marks the end of the arguments belonging to this keyword
|
||||
list(FIND _singleArgNames "${currentArg}" singleArgIndex) # ... then this marks the end of the arguments belonging to this keyword
|
||||
list(FIND _multiArgNames "${currentArg}" multiArgIndex) # ... then this marks the end of the arguments belonging to this keyword
|
||||
|
||||
if(${optionIndex} EQUAL -1 AND ${singleArgIndex} EQUAL -1 AND ${multiArgIndex} EQUAL -1)
|
||||
if(insideValues)
|
||||
if("${insideValues}" STREQUAL "SINGLE")
|
||||
set(${prefix}_${currentArgName} ${currentArg})
|
||||
set(insideValues FALSE)
|
||||
elseif("${insideValues}" STREQUAL "MULTI")
|
||||
list(APPEND ${prefix}_${currentArgName} ${currentArg})
|
||||
endif()
|
||||
else(insideValues)
|
||||
list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg})
|
||||
endif(insideValues)
|
||||
else()
|
||||
if(NOT ${optionIndex} EQUAL -1)
|
||||
set(${prefix}_${currentArg} TRUE)
|
||||
set(insideValues FALSE)
|
||||
elseif(NOT ${singleArgIndex} EQUAL -1)
|
||||
set(currentArgName ${currentArg})
|
||||
set(${prefix}_${currentArgName})
|
||||
set(insideValues "SINGLE")
|
||||
elseif(NOT ${multiArgIndex} EQUAL -1)
|
||||
set(currentArgName ${currentArg})
|
||||
set(${prefix}_${currentArgName})
|
||||
set(insideValues "MULTI")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
endforeach(currentArg)
|
||||
|
||||
# propagate the result variables to the caller:
|
||||
foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames})
|
||||
set(${prefix}_${arg_name} ${${prefix}_${arg_name}} PARENT_SCOPE)
|
||||
endforeach(arg_name)
|
||||
set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} PARENT_SCOPE)
|
||||
|
||||
endfunction(CMAKE_PARSE_ARGUMENTS _options _singleArgs _multiArgs)
|
36
src/algorithms/libs/volk_gnsssdr/cmake/FindORC.cmake
Normal file
36
src/algorithms/libs/volk_gnsssdr/cmake/FindORC.cmake
Normal file
@ -0,0 +1,36 @@
|
||||
FIND_PACKAGE(PkgConfig)
|
||||
PKG_CHECK_MODULES(PC_ORC "orc-0.4 > 0.4.11")
|
||||
|
||||
|
||||
|
||||
|
||||
FIND_PROGRAM(ORCC_EXECUTABLE orcc
|
||||
HINTS ${PC_ORC_TOOLSDIR}
|
||||
PATHS ${ORC_ROOT}/bin ${CMAKE_INSTALL_PREFIX}/bin)
|
||||
|
||||
FIND_PATH(ORC_INCLUDE_DIR NAMES orc/orc.h
|
||||
HINTS ${PC_ORC_INCLUDEDIR}
|
||||
PATHS ${ORC_ROOT}/include/orc-0.4 ${CMAKE_INSTALL_PREFIX}/include/orc-0.4)
|
||||
|
||||
|
||||
FIND_PATH(ORC_LIBRARY_DIR NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}orc-0.4${CMAKE_SHARED_LIBRARY_SUFFIX}
|
||||
HINTS ${PC_ORC_LIBDIR}
|
||||
PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX})
|
||||
|
||||
FIND_LIBRARY(ORC_LIB orc-0.4
|
||||
HINTS ${PC_ORC_LIBRARY_DIRS}
|
||||
PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX})
|
||||
|
||||
LIST(APPEND ORC_LIBRARY
|
||||
${ORC_LIB}
|
||||
)
|
||||
|
||||
|
||||
SET(ORC_INCLUDE_DIRS ${ORC_INCLUDE_DIR})
|
||||
SET(ORC_LIBRARIES ${ORC_LIBRARY})
|
||||
SET(ORC_LIBRARY_DIRS ${ORC_LIBRARY_DIR})
|
||||
|
||||
INCLUDE(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(ORC "orc files" ORC_LIBRARY ORC_INCLUDE_DIR ORCC_EXECUTABLE)
|
||||
|
||||
mark_as_advanced(ORC_INCLUDE_DIR ORC_LIBRARY ORCC_EXECUTABLE)
|
234
src/algorithms/libs/volk_gnsssdr/cmake/GrPython.cmake
Normal file
234
src/algorithms/libs/volk_gnsssdr/cmake/GrPython.cmake
Normal file
@ -0,0 +1,234 @@
|
||||
# Copyright 2010-2011,2013 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GNU Radio
|
||||
#
|
||||
# GNU Radio is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# GNU Radio is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GNU Radio; see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
|
||||
if(DEFINED __INCLUDED_VOLK_PYTHON_CMAKE)
|
||||
return()
|
||||
endif()
|
||||
set(__INCLUDED_VOLK_PYTHON_CMAKE TRUE)
|
||||
|
||||
########################################################################
|
||||
# Setup the python interpreter:
|
||||
# This allows the user to specify a specific interpreter,
|
||||
# or finds the interpreter via the built-in cmake module.
|
||||
########################################################################
|
||||
#this allows the user to override PYTHON_EXECUTABLE
|
||||
if(PYTHON_EXECUTABLE)
|
||||
|
||||
set(PYTHONINTERP_FOUND TRUE)
|
||||
|
||||
#otherwise if not set, try to automatically find it
|
||||
else(PYTHON_EXECUTABLE)
|
||||
|
||||
#use the built-in find script
|
||||
find_package(PythonInterp 2)
|
||||
|
||||
#and if that fails use the find program routine
|
||||
if(NOT PYTHONINTERP_FOUND)
|
||||
find_program(PYTHON_EXECUTABLE NAMES python python2 python2.7 python2.6 python2.5)
|
||||
if(PYTHON_EXECUTABLE)
|
||||
set(PYTHONINTERP_FOUND TRUE)
|
||||
endif(PYTHON_EXECUTABLE)
|
||||
endif(NOT PYTHONINTERP_FOUND)
|
||||
|
||||
endif(PYTHON_EXECUTABLE)
|
||||
|
||||
#make the path to the executable appear in the cmake gui
|
||||
set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} CACHE FILEPATH "python interpreter")
|
||||
|
||||
#make sure we can use -B with python (introduced in 2.6)
|
||||
if(PYTHON_EXECUTABLE)
|
||||
execute_process(
|
||||
COMMAND ${PYTHON_EXECUTABLE} -B -c ""
|
||||
OUTPUT_QUIET ERROR_QUIET
|
||||
RESULT_VARIABLE PYTHON_HAS_DASH_B_RESULT
|
||||
)
|
||||
if(PYTHON_HAS_DASH_B_RESULT EQUAL 0)
|
||||
set(PYTHON_DASH_B "-B")
|
||||
endif()
|
||||
endif(PYTHON_EXECUTABLE)
|
||||
|
||||
########################################################################
|
||||
# Check for the existence of a python module:
|
||||
# - desc a string description of the check
|
||||
# - mod the name of the module to import
|
||||
# - cmd an additional command to run
|
||||
# - have the result variable to set
|
||||
########################################################################
|
||||
macro(VOLK_PYTHON_CHECK_MODULE desc mod cmd have)
|
||||
message(STATUS "")
|
||||
message(STATUS "Python checking for ${desc}")
|
||||
execute_process(
|
||||
COMMAND ${PYTHON_EXECUTABLE} -c "
|
||||
#########################################
|
||||
try: import ${mod}
|
||||
except:
|
||||
try: ${mod}
|
||||
except: exit(-1)
|
||||
try: assert ${cmd}
|
||||
except: exit(-1)
|
||||
#########################################"
|
||||
RESULT_VARIABLE ${have}
|
||||
)
|
||||
if(${have} EQUAL 0)
|
||||
message(STATUS "Python checking for ${desc} - found")
|
||||
set(${have} TRUE)
|
||||
else(${have} EQUAL 0)
|
||||
message(STATUS "Python checking for ${desc} - not found")
|
||||
set(${have} FALSE)
|
||||
endif(${have} EQUAL 0)
|
||||
endmacro(VOLK_PYTHON_CHECK_MODULE)
|
||||
|
||||
########################################################################
|
||||
# Sets the python installation directory VOLK_PYTHON_DIR
|
||||
########################################################################
|
||||
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "
|
||||
from distutils import sysconfig
|
||||
print sysconfig.get_python_lib(plat_specific=True, prefix='')
|
||||
" OUTPUT_VARIABLE VOLK_PYTHON_DIR OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
file(TO_CMAKE_PATH ${VOLK_PYTHON_DIR} VOLK_PYTHON_DIR)
|
||||
|
||||
########################################################################
|
||||
# Create an always-built target with a unique name
|
||||
# Usage: VOLK_UNIQUE_TARGET(<description> <dependencies list>)
|
||||
########################################################################
|
||||
function(VOLK_UNIQUE_TARGET desc)
|
||||
file(RELATIVE_PATH reldir ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import re, hashlib
|
||||
unique = hashlib.md5('${reldir}${ARGN}').hexdigest()[:5]
|
||||
print(re.sub('\\W', '_', '${desc} ${reldir} ' + unique))"
|
||||
OUTPUT_VARIABLE _target OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
add_custom_target(${_target} ALL DEPENDS ${ARGN})
|
||||
endfunction(VOLK_UNIQUE_TARGET)
|
||||
|
||||
########################################################################
|
||||
# Install python sources (also builds and installs byte-compiled python)
|
||||
########################################################################
|
||||
function(VOLK_PYTHON_INSTALL)
|
||||
include(CMakeParseArgumentsCopy)
|
||||
CMAKE_PARSE_ARGUMENTS(VOLK_PYTHON_INSTALL "" "DESTINATION;COMPONENT" "FILES;PROGRAMS" ${ARGN})
|
||||
|
||||
####################################################################
|
||||
if(VOLK_PYTHON_INSTALL_FILES)
|
||||
####################################################################
|
||||
install(${ARGN}) #installs regular python files
|
||||
|
||||
#create a list of all generated files
|
||||
unset(pysrcfiles)
|
||||
unset(pycfiles)
|
||||
unset(pyofiles)
|
||||
foreach(pyfile ${VOLK_PYTHON_INSTALL_FILES})
|
||||
get_filename_component(pyfile ${pyfile} ABSOLUTE)
|
||||
list(APPEND pysrcfiles ${pyfile})
|
||||
|
||||
#determine if this file is in the source or binary directory
|
||||
file(RELATIVE_PATH source_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${pyfile})
|
||||
string(LENGTH "${source_rel_path}" source_rel_path_len)
|
||||
file(RELATIVE_PATH binary_rel_path ${CMAKE_CURRENT_BINARY_DIR} ${pyfile})
|
||||
string(LENGTH "${binary_rel_path}" binary_rel_path_len)
|
||||
|
||||
#and set the generated path appropriately
|
||||
if(${source_rel_path_len} GREATER ${binary_rel_path_len})
|
||||
set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${binary_rel_path})
|
||||
else()
|
||||
set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${source_rel_path})
|
||||
endif()
|
||||
list(APPEND pycfiles ${pygenfile}c)
|
||||
list(APPEND pyofiles ${pygenfile}o)
|
||||
|
||||
#ensure generation path exists
|
||||
get_filename_component(pygen_path ${pygenfile} PATH)
|
||||
file(MAKE_DIRECTORY ${pygen_path})
|
||||
|
||||
endforeach(pyfile)
|
||||
|
||||
#the command to generate the pyc files
|
||||
add_custom_command(
|
||||
DEPENDS ${pysrcfiles} OUTPUT ${pycfiles}
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pycfiles}
|
||||
)
|
||||
|
||||
#the command to generate the pyo files
|
||||
add_custom_command(
|
||||
DEPENDS ${pysrcfiles} OUTPUT ${pyofiles}
|
||||
COMMAND ${PYTHON_EXECUTABLE} -O ${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pyofiles}
|
||||
)
|
||||
|
||||
#create install rule and add generated files to target list
|
||||
set(python_install_gen_targets ${pycfiles} ${pyofiles})
|
||||
install(FILES ${python_install_gen_targets}
|
||||
DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION}
|
||||
COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT}
|
||||
)
|
||||
|
||||
|
||||
####################################################################
|
||||
elseif(VOLK_PYTHON_INSTALL_PROGRAMS)
|
||||
####################################################################
|
||||
file(TO_NATIVE_PATH ${PYTHON_EXECUTABLE} pyexe_native)
|
||||
|
||||
if (CMAKE_CROSSCOMPILING)
|
||||
set(pyexe_native "/usr/bin/env python")
|
||||
endif()
|
||||
|
||||
foreach(pyfile ${VOLK_PYTHON_INSTALL_PROGRAMS})
|
||||
get_filename_component(pyfile_name ${pyfile} NAME)
|
||||
get_filename_component(pyfile ${pyfile} ABSOLUTE)
|
||||
string(REPLACE "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" pyexefile "${pyfile}.exe")
|
||||
list(APPEND python_install_gen_targets ${pyexefile})
|
||||
|
||||
get_filename_component(pyexefile_path ${pyexefile} PATH)
|
||||
file(MAKE_DIRECTORY ${pyexefile_path})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${pyexefile} DEPENDS ${pyfile}
|
||||
COMMAND ${PYTHON_EXECUTABLE} -c
|
||||
"open('${pyexefile}','w').write('\#!${pyexe_native}\\n'+open('${pyfile}').read())"
|
||||
COMMENT "Shebangin ${pyfile_name}"
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
#on windows, python files need an extension to execute
|
||||
get_filename_component(pyfile_ext ${pyfile} EXT)
|
||||
if(WIN32 AND NOT pyfile_ext)
|
||||
set(pyfile_name "${pyfile_name}.py")
|
||||
endif()
|
||||
|
||||
install(PROGRAMS ${pyexefile} RENAME ${pyfile_name}
|
||||
DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION}
|
||||
COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT}
|
||||
)
|
||||
endforeach(pyfile)
|
||||
|
||||
endif()
|
||||
|
||||
VOLK_UNIQUE_TARGET("pygen" ${python_install_gen_targets})
|
||||
|
||||
endfunction(VOLK_PYTHON_INSTALL)
|
||||
|
||||
########################################################################
|
||||
# Write the python helper script that generates byte code files
|
||||
########################################################################
|
||||
file(WRITE ${CMAKE_BINARY_DIR}/python_compile_helper.py "
|
||||
import sys, py_compile
|
||||
files = sys.argv[1:]
|
||||
srcs, gens = files[:len(files)/2], files[len(files)/2:]
|
||||
for src, gen in zip(srcs, gens):
|
||||
py_compile.compile(file=src, cfile=gen, doraise=True)
|
||||
")
|
98
src/algorithms/libs/volk_gnsssdr/cmake/VolkBoost.cmake
Normal file
98
src/algorithms/libs/volk_gnsssdr/cmake/VolkBoost.cmake
Normal file
@ -0,0 +1,98 @@
|
||||
# Copyright 2010-2011 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GNU Radio
|
||||
#
|
||||
# GNU Radio is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# GNU Radio is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GNU Radio; see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
|
||||
if(DEFINED __INCLUDED_VOLK_BOOST_CMAKE)
|
||||
return()
|
||||
endif()
|
||||
set(__INCLUDED_VOLK_BOOST_CMAKE TRUE)
|
||||
|
||||
########################################################################
|
||||
# Setup Boost and handle some system specific things
|
||||
########################################################################
|
||||
|
||||
set(BOOST_REQUIRED_COMPONENTS
|
||||
filesystem
|
||||
system
|
||||
unit_test_framework
|
||||
program_options
|
||||
)
|
||||
|
||||
if(UNIX AND NOT BOOST_ROOT AND EXISTS "/usr/lib64")
|
||||
list(APPEND BOOST_LIBRARYDIR "/usr/lib64") #fedora 64-bit fix
|
||||
endif(UNIX AND NOT BOOST_ROOT AND EXISTS "/usr/lib64")
|
||||
|
||||
if(MSVC)
|
||||
set(BOOST_REQUIRED_COMPONENTS ${BOOST_REQUIRED_COMPONENTS} chrono)
|
||||
|
||||
if (NOT DEFINED BOOST_ALL_DYN_LINK)
|
||||
set(BOOST_ALL_DYN_LINK TRUE)
|
||||
endif()
|
||||
set(BOOST_ALL_DYN_LINK "${BOOST_ALL_DYN_LINK}" CACHE BOOL "boost enable dynamic linking")
|
||||
if(BOOST_ALL_DYN_LINK)
|
||||
add_definitions(-DBOOST_ALL_DYN_LINK) #setup boost auto-linking in msvc
|
||||
else(BOOST_ALL_DYN_LINK)
|
||||
unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link
|
||||
endif(BOOST_ALL_DYN_LINK)
|
||||
endif(MSVC)
|
||||
|
||||
find_package(Boost "1.35" COMPONENTS ${BOOST_REQUIRED_COMPONENTS})
|
||||
|
||||
# This does not allow us to disable specific versions. It is used
|
||||
# internally by cmake to know the formation newer versions. As newer
|
||||
# Boost version beyond what is shown here are produced, we must extend
|
||||
# this list. To disable Boost versions, see below.
|
||||
set(Boost_ADDITIONAL_VERSIONS
|
||||
"1.35.0" "1.35" "1.36.0" "1.36" "1.37.0" "1.37" "1.38.0" "1.38" "1.39.0" "1.39"
|
||||
"1.40.0" "1.40" "1.41.0" "1.41" "1.42.0" "1.42" "1.43.0" "1.43" "1.44.0" "1.44"
|
||||
"1.45.0" "1.45" "1.46.0" "1.46" "1.47.0" "1.47" "1.48.0" "1.48" "1.49.0" "1.49"
|
||||
"1.50.0" "1.50" "1.51.0" "1.51" "1.52.0" "1.52" "1.53.0" "1.53" "1.54.0" "1.54"
|
||||
"1.55.0" "1.55" "1.56.0" "1.56" "1.57.0" "1.57" "1.58.0" "1.58" "1.59.0" "1.59"
|
||||
"1.60.0" "1.60" "1.61.0" "1.61" "1.62.0" "1.62" "1.63.0" "1.63" "1.64.0" "1.64"
|
||||
"1.65.0" "1.65" "1.66.0" "1.66" "1.67.0" "1.67" "1.68.0" "1.68" "1.69.0" "1.69"
|
||||
)
|
||||
|
||||
# Boost 1.52 disabled, see https://svn.boost.org/trac/boost/ticket/7669
|
||||
# Similar problems with Boost 1.46 and 1.47.
|
||||
|
||||
OPTION(ENABLE_BAD_BOOST "Enable known bad versions of Boost" OFF)
|
||||
if(ENABLE_BAD_BOOST)
|
||||
MESSAGE(STATUS "Enabling use of known bad versions of Boost.")
|
||||
endif(ENABLE_BAD_BOOST)
|
||||
|
||||
# For any unsuitable Boost version, add the version number below in
|
||||
# the following format: XXYYZZ
|
||||
# Where:
|
||||
# XX is the major version ('10' for version 1)
|
||||
# YY is the minor version number ('46' for 1.46)
|
||||
# ZZ is the patcher version number (typically just '00')
|
||||
set(Boost_NOGO_VERSIONS
|
||||
104600 104601 104700 105200
|
||||
)
|
||||
|
||||
foreach(ver ${Boost_NOGO_VERSIONS})
|
||||
if(${Boost_VERSION} EQUAL ${ver})
|
||||
if(NOT ENABLE_BAD_BOOST)
|
||||
MESSAGE(STATUS "WARNING: Found a known bad version of Boost (v${Boost_VERSION}). Disabling.")
|
||||
set(Boost_FOUND FALSE)
|
||||
else(NOT ENABLE_BAD_BOOST)
|
||||
MESSAGE(STATUS "WARNING: Found a known bad version of Boost (v${Boost_VERSION}). Continuing anyway.")
|
||||
set(Boost_FOUND TRUE)
|
||||
endif(NOT ENABLE_BAD_BOOST)
|
||||
endif(${Boost_VERSION} EQUAL ${ver})
|
||||
endforeach(ver)
|
26
src/algorithms/libs/volk_gnsssdr/cmake/VolkConfig.cmake
Normal file
26
src/algorithms/libs/volk_gnsssdr/cmake/VolkConfig.cmake
Normal file
@ -0,0 +1,26 @@
|
||||
INCLUDE(FindPkgConfig)
|
||||
PKG_CHECK_MODULES(PC_VOLK volk_gnsssdr)
|
||||
|
||||
FIND_PATH(
|
||||
VOLK_INCLUDE_DIRS
|
||||
NAMES volk_gnsssdr/volk_gnsssdr.h
|
||||
HINTS $ENV{VOLK_DIR}/include
|
||||
${PC_VOLK_INCLUDEDIR}
|
||||
PATHS /usr/local/include
|
||||
/usr/include
|
||||
)
|
||||
|
||||
FIND_LIBRARY(
|
||||
VOLK_LIBRARIES
|
||||
NAMES volk_gnsssdr
|
||||
HINTS $ENV{VOLK_DIR}/lib
|
||||
${PC_VOLK_LIBDIR}
|
||||
PATHS /usr/local/lib
|
||||
/usr/local/lib64
|
||||
/usr/lib
|
||||
/usr/lib64
|
||||
)
|
||||
|
||||
INCLUDE(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
|
||||
MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
|
58
src/algorithms/libs/volk_gnsssdr/cmake/msvc/config.h
Normal file
58
src/algorithms/libs/volk_gnsssdr/cmake/msvc/config.h
Normal file
@ -0,0 +1,58 @@
|
||||
#ifndef _MSC_VER // [
|
||||
#error "Use this header only with Microsoft Visual C++ compilers!"
|
||||
#endif // _MSC_VER ]
|
||||
|
||||
#ifndef _MSC_CONFIG_H_ // [
|
||||
#define _MSC_CONFIG_H_
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// enable inline functions for C code
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#ifndef __cplusplus
|
||||
# define inline __inline
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// signed size_t
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#include <stddef.h>
|
||||
typedef ptrdiff_t ssize_t;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// rint functions
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#include <math.h>
|
||||
static inline long lrint(double x){return (long)(x > 0.0 ? x + 0.5 : x - 0.5);}
|
||||
static inline long lrintf(float x){return (long)(x > 0.0f ? x + 0.5f : x - 0.5f);}
|
||||
static inline long long llrint(double x){return (long long)(x > 0.0 ? x + 0.5 : x - 0.5);}
|
||||
static inline long long llrintf(float x){return (long long)(x > 0.0f ? x + 0.5f : x - 0.5f);}
|
||||
static inline double rint(double x){return (x > 0.0)? floor(x + 0.5) : ceil(x - 0.5);}
|
||||
static inline float rintf(float x){return (x > 0.0f)? floorf(x + 0.5f) : ceilf(x - 0.5f);}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// math constants
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#define INFINITY HUGE_VAL
|
||||
|
||||
# define M_E 2.7182818284590452354 /* e */
|
||||
# define M_LOG2E 1.4426950408889634074 /* log_2 e */
|
||||
# define M_LOG10E 0.43429448190325182765 /* log_10 e */
|
||||
# define M_LN2 0.69314718055994530942 /* log_e 2 */
|
||||
# define M_LN10 2.30258509299404568402 /* log_e 10 */
|
||||
# define M_PI 3.14159265358979323846 /* pi */
|
||||
# define M_PI_2 1.57079632679489661923 /* pi/2 */
|
||||
# define M_PI_4 0.78539816339744830962 /* pi/4 */
|
||||
# define M_1_PI 0.31830988618379067154 /* 1/pi */
|
||||
# define M_2_PI 0.63661977236758134308 /* 2/pi */
|
||||
# define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
|
||||
# define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
|
||||
# define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// random and srandom
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#include <stdlib.h>
|
||||
static inline long int random (void) { return rand(); }
|
||||
static inline void srandom (unsigned int seed) { srand(seed); }
|
||||
|
||||
#endif // _MSC_CONFIG_H_ ]
|
301
src/algorithms/libs/volk_gnsssdr/cmake/msvc/inttypes.h
Normal file
301
src/algorithms/libs/volk_gnsssdr/cmake/msvc/inttypes.h
Normal file
@ -0,0 +1,301 @@
|
||||
// ISO C9x compliant inttypes.h for Microsoft Visual Studio
|
||||
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
|
||||
//
|
||||
// Copyright (c) 2006 Alexander Chemeris
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. The name of the author may be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _MSC_VER // [
|
||||
#error "Use this header only with Microsoft Visual C++ compilers!"
|
||||
#endif // _MSC_VER ]
|
||||
|
||||
#ifndef _MSC_INTTYPES_H_ // [
|
||||
#define _MSC_INTTYPES_H_
|
||||
|
||||
#if _MSC_VER > 1000
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// 7.8 Format conversion of integer types
|
||||
|
||||
typedef struct {
|
||||
intmax_t quot;
|
||||
intmax_t rem;
|
||||
} imaxdiv_t;
|
||||
|
||||
// 7.8.1 Macros for format specifiers
|
||||
|
||||
// The fprintf macros for signed integers are:
|
||||
#define PRId8 "d"
|
||||
#define PRIi8 "i"
|
||||
#define PRIdLEAST8 "d"
|
||||
#define PRIiLEAST8 "i"
|
||||
#define PRIdFAST8 "d"
|
||||
#define PRIiFAST8 "i"
|
||||
|
||||
#define PRId16 "hd"
|
||||
#define PRIi16 "hi"
|
||||
#define PRIdLEAST16 "hd"
|
||||
#define PRIiLEAST16 "hi"
|
||||
#define PRIdFAST16 "hd"
|
||||
#define PRIiFAST16 "hi"
|
||||
|
||||
#define PRId32 "I32d"
|
||||
#define PRIi32 "I32i"
|
||||
#define PRIdLEAST32 "I32d"
|
||||
#define PRIiLEAST32 "I32i"
|
||||
#define PRIdFAST32 "I32d"
|
||||
#define PRIiFAST32 "I32i"
|
||||
|
||||
#define PRId64 "I64d"
|
||||
#define PRIi64 "I64i"
|
||||
#define PRIdLEAST64 "I64d"
|
||||
#define PRIiLEAST64 "I64i"
|
||||
#define PRIdFAST64 "I64d"
|
||||
#define PRIiFAST64 "I64i"
|
||||
|
||||
#define PRIdMAX "I64d"
|
||||
#define PRIiMAX "I64i"
|
||||
|
||||
#define PRIdPTR "Id"
|
||||
#define PRIiPTR "Ii"
|
||||
|
||||
// The fprintf macros for unsigned integers are:
|
||||
#define PRIo8 "o"
|
||||
#define PRIu8 "u"
|
||||
#define PRIx8 "x"
|
||||
#define PRIX8 "X"
|
||||
#define PRIoLEAST8 "o"
|
||||
#define PRIuLEAST8 "u"
|
||||
#define PRIxLEAST8 "x"
|
||||
#define PRIXLEAST8 "X"
|
||||
#define PRIoFAST8 "o"
|
||||
#define PRIuFAST8 "u"
|
||||
#define PRIxFAST8 "x"
|
||||
#define PRIXFAST8 "X"
|
||||
|
||||
#define PRIo16 "ho"
|
||||
#define PRIu16 "hu"
|
||||
#define PRIx16 "hx"
|
||||
#define PRIX16 "hX"
|
||||
#define PRIoLEAST16 "ho"
|
||||
#define PRIuLEAST16 "hu"
|
||||
#define PRIxLEAST16 "hx"
|
||||
#define PRIXLEAST16 "hX"
|
||||
#define PRIoFAST16 "ho"
|
||||
#define PRIuFAST16 "hu"
|
||||
#define PRIxFAST16 "hx"
|
||||
#define PRIXFAST16 "hX"
|
||||
|
||||
#define PRIo32 "I32o"
|
||||
#define PRIu32 "I32u"
|
||||
#define PRIx32 "I32x"
|
||||
#define PRIX32 "I32X"
|
||||
#define PRIoLEAST32 "I32o"
|
||||
#define PRIuLEAST32 "I32u"
|
||||
#define PRIxLEAST32 "I32x"
|
||||
#define PRIXLEAST32 "I32X"
|
||||
#define PRIoFAST32 "I32o"
|
||||
#define PRIuFAST32 "I32u"
|
||||
#define PRIxFAST32 "I32x"
|
||||
#define PRIXFAST32 "I32X"
|
||||
|
||||
#define PRIo64 "I64o"
|
||||
#define PRIu64 "I64u"
|
||||
#define PRIx64 "I64x"
|
||||
#define PRIX64 "I64X"
|
||||
#define PRIoLEAST64 "I64o"
|
||||
#define PRIuLEAST64 "I64u"
|
||||
#define PRIxLEAST64 "I64x"
|
||||
#define PRIXLEAST64 "I64X"
|
||||
#define PRIoFAST64 "I64o"
|
||||
#define PRIuFAST64 "I64u"
|
||||
#define PRIxFAST64 "I64x"
|
||||
#define PRIXFAST64 "I64X"
|
||||
|
||||
#define PRIoMAX "I64o"
|
||||
#define PRIuMAX "I64u"
|
||||
#define PRIxMAX "I64x"
|
||||
#define PRIXMAX "I64X"
|
||||
|
||||
#define PRIoPTR "Io"
|
||||
#define PRIuPTR "Iu"
|
||||
#define PRIxPTR "Ix"
|
||||
#define PRIXPTR "IX"
|
||||
|
||||
// The fscanf macros for signed integers are:
|
||||
#define SCNd8 "d"
|
||||
#define SCNi8 "i"
|
||||
#define SCNdLEAST8 "d"
|
||||
#define SCNiLEAST8 "i"
|
||||
#define SCNdFAST8 "d"
|
||||
#define SCNiFAST8 "i"
|
||||
|
||||
#define SCNd16 "hd"
|
||||
#define SCNi16 "hi"
|
||||
#define SCNdLEAST16 "hd"
|
||||
#define SCNiLEAST16 "hi"
|
||||
#define SCNdFAST16 "hd"
|
||||
#define SCNiFAST16 "hi"
|
||||
|
||||
#define SCNd32 "ld"
|
||||
#define SCNi32 "li"
|
||||
#define SCNdLEAST32 "ld"
|
||||
#define SCNiLEAST32 "li"
|
||||
#define SCNdFAST32 "ld"
|
||||
#define SCNiFAST32 "li"
|
||||
|
||||
#define SCNd64 "I64d"
|
||||
#define SCNi64 "I64i"
|
||||
#define SCNdLEAST64 "I64d"
|
||||
#define SCNiLEAST64 "I64i"
|
||||
#define SCNdFAST64 "I64d"
|
||||
#define SCNiFAST64 "I64i"
|
||||
|
||||
#define SCNdMAX "I64d"
|
||||
#define SCNiMAX "I64i"
|
||||
|
||||
#ifdef _WIN64 // [
|
||||
# define SCNdPTR "I64d"
|
||||
# define SCNiPTR "I64i"
|
||||
#else // _WIN64 ][
|
||||
# define SCNdPTR "ld"
|
||||
# define SCNiPTR "li"
|
||||
#endif // _WIN64 ]
|
||||
|
||||
// The fscanf macros for unsigned integers are:
|
||||
#define SCNo8 "o"
|
||||
#define SCNu8 "u"
|
||||
#define SCNx8 "x"
|
||||
#define SCNX8 "X"
|
||||
#define SCNoLEAST8 "o"
|
||||
#define SCNuLEAST8 "u"
|
||||
#define SCNxLEAST8 "x"
|
||||
#define SCNXLEAST8 "X"
|
||||
#define SCNoFAST8 "o"
|
||||
#define SCNuFAST8 "u"
|
||||
#define SCNxFAST8 "x"
|
||||
#define SCNXFAST8 "X"
|
||||
|
||||
#define SCNo16 "ho"
|
||||
#define SCNu16 "hu"
|
||||
#define SCNx16 "hx"
|
||||
#define SCNX16 "hX"
|
||||
#define SCNoLEAST16 "ho"
|
||||
#define SCNuLEAST16 "hu"
|
||||
#define SCNxLEAST16 "hx"
|
||||
#define SCNXLEAST16 "hX"
|
||||
#define SCNoFAST16 "ho"
|
||||
#define SCNuFAST16 "hu"
|
||||
#define SCNxFAST16 "hx"
|
||||
#define SCNXFAST16 "hX"
|
||||
|
||||
#define SCNo32 "lo"
|
||||
#define SCNu32 "lu"
|
||||
#define SCNx32 "lx"
|
||||
#define SCNX32 "lX"
|
||||
#define SCNoLEAST32 "lo"
|
||||
#define SCNuLEAST32 "lu"
|
||||
#define SCNxLEAST32 "lx"
|
||||
#define SCNXLEAST32 "lX"
|
||||
#define SCNoFAST32 "lo"
|
||||
#define SCNuFAST32 "lu"
|
||||
#define SCNxFAST32 "lx"
|
||||
#define SCNXFAST32 "lX"
|
||||
|
||||
#define SCNo64 "I64o"
|
||||
#define SCNu64 "I64u"
|
||||
#define SCNx64 "I64x"
|
||||
#define SCNX64 "I64X"
|
||||
#define SCNoLEAST64 "I64o"
|
||||
#define SCNuLEAST64 "I64u"
|
||||
#define SCNxLEAST64 "I64x"
|
||||
#define SCNXLEAST64 "I64X"
|
||||
#define SCNoFAST64 "I64o"
|
||||
#define SCNuFAST64 "I64u"
|
||||
#define SCNxFAST64 "I64x"
|
||||
#define SCNXFAST64 "I64X"
|
||||
|
||||
#define SCNoMAX "I64o"
|
||||
#define SCNuMAX "I64u"
|
||||
#define SCNxMAX "I64x"
|
||||
#define SCNXMAX "I64X"
|
||||
|
||||
#ifdef _WIN64 // [
|
||||
# define SCNoPTR "I64o"
|
||||
# define SCNuPTR "I64u"
|
||||
# define SCNxPTR "I64x"
|
||||
# define SCNXPTR "I64X"
|
||||
#else // _WIN64 ][
|
||||
# define SCNoPTR "lo"
|
||||
# define SCNuPTR "lu"
|
||||
# define SCNxPTR "lx"
|
||||
# define SCNXPTR "lX"
|
||||
#endif // _WIN64 ]
|
||||
|
||||
// 7.8.2 Functions for greatest-width integer types
|
||||
|
||||
// 7.8.2.1 The imaxabs function
|
||||
#define imaxabs _abs64
|
||||
|
||||
// 7.8.2.2 The imaxdiv function
|
||||
|
||||
// This is modified version of div() function from Microsoft's div.c found
|
||||
// in %MSVC.NET%\crt\src\div.c
|
||||
#ifdef STATIC_IMAXDIV // [
|
||||
static
|
||||
#else // STATIC_IMAXDIV ][
|
||||
_inline
|
||||
#endif // STATIC_IMAXDIV ]
|
||||
imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
|
||||
{
|
||||
imaxdiv_t result;
|
||||
|
||||
result.quot = numer / denom;
|
||||
result.rem = numer % denom;
|
||||
|
||||
if (numer < 0 && result.rem > 0) {
|
||||
// did division wrong; must fix up
|
||||
++result.quot;
|
||||
result.rem -= denom;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// 7.8.2.3 The strtoimax and strtoumax functions
|
||||
#define strtoimax _strtoi64
|
||||
#define strtoumax _strtoui64
|
||||
|
||||
// 7.8.2.4 The wcstoimax and wcstoumax functions
|
||||
#define wcstoimax _wcstoi64
|
||||
#define wcstoumax _wcstoui64
|
||||
|
||||
|
||||
#endif // _MSC_INTTYPES_H_ ]
|
45
src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdbool.h
Normal file
45
src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdbool.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (C) 2005, 2006 Apple Computer, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Library General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Library General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Library General Public License
|
||||
* along with this library; see the file COPYING.LIB. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef STDBOOL_WIN32_H
|
||||
#define STDBOOL_WIN32_H
|
||||
|
||||
#ifndef _MSC_VER // [
|
||||
#error "Use this header only with Microsoft Visual C++ compilers!"
|
||||
#endif // _MSC_VER ]
|
||||
|
||||
#ifndef __cplusplus
|
||||
|
||||
typedef unsigned char bool;
|
||||
|
||||
#define true 1
|
||||
#define false 0
|
||||
|
||||
#ifndef CASSERT
|
||||
#define CASSERT(exp, name) typedef int dummy##name [(exp) ? 1 : -1];
|
||||
#endif
|
||||
|
||||
CASSERT(sizeof(bool) == 1, bool_is_one_byte)
|
||||
CASSERT(true, true_is_true)
|
||||
CASSERT(!false, false_is_false)
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
251
src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdint.h
Normal file
251
src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdint.h
Normal file
@ -0,0 +1,251 @@
|
||||
// ISO C9x compliant stdint.h for Microsoft Visual Studio
|
||||
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
|
||||
//
|
||||
// Copyright (c) 2006-2008 Alexander Chemeris
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. The name of the author may be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _MSC_VER // [
|
||||
#error "Use this header only with Microsoft Visual C++ compilers!"
|
||||
#endif // _MSC_VER ]
|
||||
|
||||
#ifndef _MSC_STDINT_H_ // [
|
||||
#define _MSC_STDINT_H_
|
||||
|
||||
#if _MSC_VER > 1000
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
|
||||
// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
|
||||
// or compiler give many errors like this:
|
||||
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
# include <wchar.h>
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
// Define _W64 macros to mark types changing their size, like intptr_t.
|
||||
#ifndef _W64
|
||||
# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
|
||||
# define _W64 __w64
|
||||
# else
|
||||
# define _W64
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
// 7.18.1 Integer types
|
||||
|
||||
// 7.18.1.1 Exact-width integer types
|
||||
|
||||
// Visual Studio 6 and Embedded Visual C++ 4 doesn't
|
||||
// realize that, e.g. char has the same size as __int8
|
||||
// so we give up on __intX for them.
|
||||
#if (_MSC_VER < 1300)
|
||||
typedef signed char int8_t;
|
||||
typedef signed short int16_t;
|
||||
typedef signed int int32_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
#else
|
||||
typedef signed __int8 int8_t;
|
||||
typedef signed __int16 int16_t;
|
||||
typedef signed __int32 int32_t;
|
||||
typedef unsigned __int8 uint8_t;
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
#endif
|
||||
typedef signed __int64 int64_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
|
||||
|
||||
// 7.18.1.2 Minimum-width integer types
|
||||
typedef int8_t int_least8_t;
|
||||
typedef int16_t int_least16_t;
|
||||
typedef int32_t int_least32_t;
|
||||
typedef int64_t int_least64_t;
|
||||
typedef uint8_t uint_least8_t;
|
||||
typedef uint16_t uint_least16_t;
|
||||
typedef uint32_t uint_least32_t;
|
||||
typedef uint64_t uint_least64_t;
|
||||
|
||||
// 7.18.1.3 Fastest minimum-width integer types
|
||||
typedef int8_t int_fast8_t;
|
||||
typedef int16_t int_fast16_t;
|
||||
typedef int32_t int_fast32_t;
|
||||
typedef int64_t int_fast64_t;
|
||||
typedef uint8_t uint_fast8_t;
|
||||
typedef uint16_t uint_fast16_t;
|
||||
typedef uint32_t uint_fast32_t;
|
||||
typedef uint64_t uint_fast64_t;
|
||||
|
||||
// 7.18.1.4 Integer types capable of holding object pointers
|
||||
#ifdef _WIN64 // [
|
||||
typedef signed __int64 intptr_t;
|
||||
typedef unsigned __int64 uintptr_t;
|
||||
#else // _WIN64 ][
|
||||
typedef _W64 signed int intptr_t;
|
||||
typedef _W64 unsigned int uintptr_t;
|
||||
#endif // _WIN64 ]
|
||||
|
||||
// 7.18.1.5 Greatest-width integer types
|
||||
typedef int64_t intmax_t;
|
||||
typedef uint64_t uintmax_t;
|
||||
|
||||
|
||||
// 7.18.2 Limits of specified-width integer types
|
||||
|
||||
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
|
||||
|
||||
// 7.18.2.1 Limits of exact-width integer types
|
||||
#define INT8_MIN ((int8_t)_I8_MIN)
|
||||
#define INT8_MAX _I8_MAX
|
||||
#define INT16_MIN ((int16_t)_I16_MIN)
|
||||
#define INT16_MAX _I16_MAX
|
||||
#define INT32_MIN ((int32_t)_I32_MIN)
|
||||
#define INT32_MAX _I32_MAX
|
||||
#define INT64_MIN ((int64_t)_I64_MIN)
|
||||
#define INT64_MAX _I64_MAX
|
||||
#define UINT8_MAX _UI8_MAX
|
||||
#define UINT16_MAX _UI16_MAX
|
||||
#define UINT32_MAX _UI32_MAX
|
||||
#define UINT64_MAX _UI64_MAX
|
||||
|
||||
// 7.18.2.2 Limits of minimum-width integer types
|
||||
#define INT_LEAST8_MIN INT8_MIN
|
||||
#define INT_LEAST8_MAX INT8_MAX
|
||||
#define INT_LEAST16_MIN INT16_MIN
|
||||
#define INT_LEAST16_MAX INT16_MAX
|
||||
#define INT_LEAST32_MIN INT32_MIN
|
||||
#define INT_LEAST32_MAX INT32_MAX
|
||||
#define INT_LEAST64_MIN INT64_MIN
|
||||
#define INT_LEAST64_MAX INT64_MAX
|
||||
#define UINT_LEAST8_MAX UINT8_MAX
|
||||
#define UINT_LEAST16_MAX UINT16_MAX
|
||||
#define UINT_LEAST32_MAX UINT32_MAX
|
||||
#define UINT_LEAST64_MAX UINT64_MAX
|
||||
|
||||
// 7.18.2.3 Limits of fastest minimum-width integer types
|
||||
#define INT_FAST8_MIN INT8_MIN
|
||||
#define INT_FAST8_MAX INT8_MAX
|
||||
#define INT_FAST16_MIN INT16_MIN
|
||||
#define INT_FAST16_MAX INT16_MAX
|
||||
#define INT_FAST32_MIN INT32_MIN
|
||||
#define INT_FAST32_MAX INT32_MAX
|
||||
#define INT_FAST64_MIN INT64_MIN
|
||||
#define INT_FAST64_MAX INT64_MAX
|
||||
#define UINT_FAST8_MAX UINT8_MAX
|
||||
#define UINT_FAST16_MAX UINT16_MAX
|
||||
#define UINT_FAST32_MAX UINT32_MAX
|
||||
#define UINT_FAST64_MAX UINT64_MAX
|
||||
|
||||
// 7.18.2.4 Limits of integer types capable of holding object pointers
|
||||
#ifdef _WIN64 // [
|
||||
# define INTPTR_MIN INT64_MIN
|
||||
# define INTPTR_MAX INT64_MAX
|
||||
# define UINTPTR_MAX UINT64_MAX
|
||||
#else // _WIN64 ][
|
||||
# define INTPTR_MIN INT32_MIN
|
||||
# define INTPTR_MAX INT32_MAX
|
||||
# define UINTPTR_MAX UINT32_MAX
|
||||
#endif // _WIN64 ]
|
||||
|
||||
// 7.18.2.5 Limits of greatest-width integer types
|
||||
#define INTMAX_MIN INT64_MIN
|
||||
#define INTMAX_MAX INT64_MAX
|
||||
#define UINTMAX_MAX UINT64_MAX
|
||||
|
||||
// 7.18.3 Limits of other integer types
|
||||
|
||||
#ifdef _WIN64 // [
|
||||
# define PTRDIFF_MIN _I64_MIN
|
||||
# define PTRDIFF_MAX _I64_MAX
|
||||
#else // _WIN64 ][
|
||||
# define PTRDIFF_MIN _I32_MIN
|
||||
# define PTRDIFF_MAX _I32_MAX
|
||||
#endif // _WIN64 ]
|
||||
|
||||
#define SIG_ATOMIC_MIN INT_MIN
|
||||
#define SIG_ATOMIC_MAX INT_MAX
|
||||
|
||||
#ifndef SIZE_MAX // [
|
||||
# ifdef _WIN64 // [
|
||||
# define SIZE_MAX _UI64_MAX
|
||||
# else // _WIN64 ][
|
||||
# define SIZE_MAX _UI32_MAX
|
||||
# endif // _WIN64 ]
|
||||
#endif // SIZE_MAX ]
|
||||
|
||||
// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
|
||||
#ifndef WCHAR_MIN // [
|
||||
# define WCHAR_MIN 0
|
||||
#endif // WCHAR_MIN ]
|
||||
#ifndef WCHAR_MAX // [
|
||||
# define WCHAR_MAX _UI16_MAX
|
||||
#endif // WCHAR_MAX ]
|
||||
|
||||
#define WINT_MIN 0
|
||||
#define WINT_MAX _UI16_MAX
|
||||
|
||||
#endif // __STDC_LIMIT_MACROS ]
|
||||
|
||||
|
||||
// 7.18.4 Limits of other integer types
|
||||
|
||||
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
|
||||
|
||||
// 7.18.4.1 Macros for minimum-width integer constants
|
||||
|
||||
#define INT8_C(val) val##i8
|
||||
#define INT16_C(val) val##i16
|
||||
#define INT32_C(val) val##i32
|
||||
#define INT64_C(val) val##i64
|
||||
|
||||
#define UINT8_C(val) val##ui8
|
||||
#define UINT16_C(val) val##ui16
|
||||
#define UINT32_C(val) val##ui32
|
||||
#define UINT64_C(val) val##ui64
|
||||
|
||||
// 7.18.4.2 Macros for greatest-width integer constants
|
||||
#ifndef INTMAX_C
|
||||
#define INTMAX_C INT64_C
|
||||
#endif
|
||||
#ifndef UINTMAX_C
|
||||
#define UINTMAX_C UINT64_C
|
||||
#endif
|
||||
|
||||
#endif // __STDC_CONSTANT_MACROS ]
|
||||
|
||||
|
||||
#endif // _MSC_STDINT_H_ ]
|
204
src/algorithms/libs/volk_gnsssdr/gen/archs.xml
Normal file
204
src/algorithms/libs/volk_gnsssdr/gen/archs.xml
Normal file
@ -0,0 +1,204 @@
|
||||
<!-- archs appear in order of significance for blind, de-facto version ordering -->
|
||||
<grammar>
|
||||
|
||||
<arch name="generic"> <!-- name is required-->
|
||||
</arch>
|
||||
|
||||
<arch name="altivec">
|
||||
<flag compiler="gnu">-maltivec</flag>
|
||||
<alignment>16</alignment>
|
||||
<check name="has_ppc"></check>
|
||||
</arch>
|
||||
|
||||
<arch name="softfp">
|
||||
<flag compiler="gnu">-mfloat-abi=softfp</flag>
|
||||
</arch>
|
||||
|
||||
<arch name="hardfp">
|
||||
<flag compiler="gnu">-mfloat-abi=hard</flag>
|
||||
</arch>
|
||||
|
||||
<arch name="neon">
|
||||
<flag compiler="gnu">-mfpu=neon</flag>
|
||||
<flag compiler="gnu">-funsafe-math-optimizations</flag>
|
||||
<alignment>16</alignment>
|
||||
<check name="has_neon"></check>
|
||||
</arch>
|
||||
|
||||
<arch name="32">
|
||||
<flag compiler="gnu">-m32</flag>
|
||||
</arch>
|
||||
|
||||
<arch name="64">
|
||||
<check name="check_extended_cpuid">
|
||||
<param>0x80000001</param>
|
||||
</check>
|
||||
<check name="cpuid_x86_bit"> <!-- checks to see if a bit is set -->
|
||||
<param>3</param> <!-- eax, ebx, ecx, [edx] -->
|
||||
<param>0x80000001</param> <!-- cpuid operation -->
|
||||
<param>29</param> <!-- bit shift -->
|
||||
</check>
|
||||
<flag compiler="gnu">-m64</flag>
|
||||
<flag compiler="clang">-m64</flag>
|
||||
</arch>
|
||||
|
||||
<arch name="3dnow">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>3</param>
|
||||
<param>0x80000001</param>
|
||||
<param>31</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-m3dnow</flag>
|
||||
<flag compiler="clang">-m3dnow</flag>
|
||||
<alignment>8</alignment>
|
||||
</arch>
|
||||
|
||||
<arch name="abm">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>3</param>
|
||||
<param>0x80000001</param>
|
||||
<param>5</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-msse4.2</flag>
|
||||
<flag compiler="clang">-msse4.2</flag>
|
||||
<alignment>16</alignment>
|
||||
</arch>
|
||||
|
||||
<arch name="popcount">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>2</param>
|
||||
<param>0x00000001</param>
|
||||
<param>23</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-mpopcnt</flag>
|
||||
<flag compiler="clang">-mpopcnt</flag>
|
||||
<flag compiler="msvc">/arch:AVX</flag>
|
||||
</arch>
|
||||
|
||||
<arch name="mmx">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>3</param>
|
||||
<param>0x00000001</param>
|
||||
<param>23</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-mmmx</flag>
|
||||
<flag compiler="clang">-mmmx</flag>
|
||||
<flag compiler="msvc">/arch:SSE</flag>
|
||||
<alignment>8</alignment>
|
||||
</arch>
|
||||
|
||||
<arch name="sse">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>3</param>
|
||||
<param>0x00000001</param>
|
||||
<param>25</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-msse</flag>
|
||||
<flag compiler="clang">-msse</flag>
|
||||
<flag compiler="msvc">/arch:SSE</flag>
|
||||
<environment>_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);</environment>
|
||||
<include>xmmintrin.h</include>
|
||||
<alignment>16</alignment>
|
||||
</arch>
|
||||
|
||||
<arch name="sse2">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>3</param>
|
||||
<param>0x00000001</param>
|
||||
<param>26</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-msse2</flag>
|
||||
<flag compiler="clang">-msse2</flag>
|
||||
<flag compiler="msvc">/arch:SSE2</flag>
|
||||
<alignment>16</alignment>
|
||||
</arch>
|
||||
|
||||
<arch name="orc">
|
||||
</arch>
|
||||
|
||||
<!-- it's here for overrule stuff. -->
|
||||
<arch name="norc">
|
||||
</arch>
|
||||
|
||||
<arch name="sse3">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>2</param>
|
||||
<param>0x00000001</param>
|
||||
<param>0</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-msse3</flag>
|
||||
<flag compiler="clang">-msse3</flag>
|
||||
<flag compiler="msvc">/arch:AVX</flag>
|
||||
<environment>_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);</environment>
|
||||
<include>pmmintrin.h</include>
|
||||
<alignment>16</alignment>
|
||||
</arch>
|
||||
|
||||
<arch name="ssse3">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>2</param>
|
||||
<param>0x00000001</param>
|
||||
<param>9</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-mssse3</flag>
|
||||
<flag compiler="clang">-mssse3</flag>
|
||||
<flag compiler="msvc">/arch:AVX</flag>
|
||||
<alignment>16</alignment>
|
||||
</arch>
|
||||
|
||||
<arch name="sse4_a">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>2</param>
|
||||
<param>0x80000001</param>
|
||||
<param>6</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-msse4a</flag>
|
||||
<flag compiler="clang">-msse4a</flag>
|
||||
<alignment>16</alignment>
|
||||
</arch>
|
||||
|
||||
<arch name="sse4_1">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>2</param>
|
||||
<param>0x00000001</param>
|
||||
<param>19</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-msse4.1</flag>
|
||||
<flag compiler="clang">-msse4.1</flag>
|
||||
<flag compiler="msvc">/arch:AVX</flag>
|
||||
<alignment>16</alignment>
|
||||
</arch>
|
||||
|
||||
<arch name="sse4_2">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>2</param>
|
||||
<param>0x00000001</param>
|
||||
<param>20</param>
|
||||
</check>
|
||||
<flag compiler="gnu">-msse4.2</flag>
|
||||
<flag compiler="clang">-msse4.2</flag>
|
||||
<flag compiler="msvc">/arch:AVX</flag>
|
||||
<alignment>16</alignment>
|
||||
</arch>
|
||||
|
||||
<arch name="avx">
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>2</param>
|
||||
<param>0x00000001</param>
|
||||
<param>28</param>
|
||||
</check>
|
||||
<!-- check to make sure that xgetbv is enabled in OS -->
|
||||
<check name="cpuid_x86_bit">
|
||||
<param>2</param>
|
||||
<param>0x00000001</param>
|
||||
<param>27</param>
|
||||
</check>
|
||||
<!-- check to see that the OS has enabled AVX -->
|
||||
<check name="get_avx_enabled"></check>
|
||||
<flag compiler="gnu">-mavx</flag>
|
||||
<flag compiler="clang">-mavx</flag>
|
||||
<flag compiler="msvc">/arch:AVX</flag>
|
||||
<alignment>32</alignment>
|
||||
</arch>
|
||||
|
||||
</grammar>
|
55
src/algorithms/libs/volk_gnsssdr/gen/machines.xml
Normal file
55
src/algorithms/libs/volk_gnsssdr/gen/machines.xml
Normal file
@ -0,0 +1,55 @@
|
||||
<grammar>
|
||||
|
||||
<machine name="generic">
|
||||
<archs>generic orc|</archs>
|
||||
</machine>
|
||||
|
||||
<!--
|
||||
<machine name="mmx">
|
||||
<archs>generic 32|64 mmx orc|</archs>
|
||||
</machine>
|
||||
|
||||
<machine name="sse">
|
||||
<archs>generic 32|64| mmx| sse orc|</archs>
|
||||
</machine>
|
||||
-->
|
||||
|
||||
<machine name="neon">
|
||||
<archs>generic neon softfp|hardfp orc|</archs>
|
||||
</machine>
|
||||
|
||||
<!-- trailing | bar means generate without either for MSVC -->
|
||||
<machine name="sse2">
|
||||
<archs>generic 32|64| mmx| sse sse2 orc|</archs>
|
||||
</machine>
|
||||
|
||||
<machine name="sse3">
|
||||
<archs>generic 32|64 mmx sse sse2 sse3 orc|</archs>
|
||||
</machine>
|
||||
|
||||
<machine name="ssse3">
|
||||
<archs>generic 32|64 mmx sse sse2 sse3 ssse3 orc|</archs>
|
||||
</machine>
|
||||
|
||||
<machine name="sse4_a">
|
||||
<archs>generic 32|64 mmx sse sse2 sse3 sse4_a popcount orc|</archs>
|
||||
</machine>
|
||||
|
||||
<machine name="sse4_1">
|
||||
<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 orc|</archs>
|
||||
</machine>
|
||||
|
||||
<machine name="sse4_2">
|
||||
<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount orc|</archs>
|
||||
</machine>
|
||||
|
||||
<!-- trailing | bar means generate without either for MSVC -->
|
||||
<machine name="avx">
|
||||
<archs>generic 32|64| mmx| sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount avx orc|</archs>
|
||||
</machine>
|
||||
|
||||
<machine name="altivec">
|
||||
<archs>generic altivec</archs>
|
||||
</machine>
|
||||
|
||||
</grammar>
|
@ -0,0 +1,85 @@
|
||||
#
|
||||
# Copyright 2012 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
archs = list()
|
||||
arch_dict = dict()
|
||||
|
||||
class arch_class:
|
||||
def __init__(self, flags, checks, **kwargs):
|
||||
for key, cast, failval in (
|
||||
('name', str, None),
|
||||
('environment', str, None),
|
||||
('include', str, None),
|
||||
('alignment', int, 1)
|
||||
):
|
||||
try: setattr(self, key, cast(kwargs[key]))
|
||||
except: setattr(self, key, failval)
|
||||
self.checks = checks
|
||||
assert(self.name)
|
||||
self._flags = flags
|
||||
|
||||
def is_supported(self, compiler):
|
||||
if not self._flags.keys(): return True
|
||||
return compiler in self._flags.keys()
|
||||
|
||||
def get_flags(self, compiler):
|
||||
try: return self._flags[compiler]
|
||||
except KeyError: return list()
|
||||
|
||||
def __repr__(self): return self.name
|
||||
|
||||
def register_arch(**kwargs):
|
||||
arch = arch_class(**kwargs)
|
||||
archs.append(arch)
|
||||
arch_dict[arch.name] = arch
|
||||
|
||||
########################################################################
|
||||
# register the arches
|
||||
########################################################################
|
||||
#TODO skip the XML and put it here
|
||||
from xml.dom import minidom
|
||||
import os
|
||||
gendir = os.path.dirname(__file__)
|
||||
archs_xml = minidom.parse(os.path.join(gendir, 'archs.xml')).getElementsByTagName('arch')
|
||||
for arch_xml in archs_xml:
|
||||
kwargs = dict()
|
||||
for attr in arch_xml.attributes.keys():
|
||||
kwargs[attr] = arch_xml.attributes[attr].value
|
||||
for node in arch_xml.childNodes:
|
||||
try:
|
||||
name = node.tagName
|
||||
val = arch_xml.getElementsByTagName(name)[0].firstChild.data
|
||||
kwargs[name] = val
|
||||
except: pass
|
||||
checks = list()
|
||||
for check_xml in arch_xml.getElementsByTagName("check"):
|
||||
name = check_xml.attributes["name"].value
|
||||
params = list()
|
||||
for param_xml in check_xml.getElementsByTagName("param"):
|
||||
params.append(param_xml.firstChild.data)
|
||||
checks.append([name, params])
|
||||
flags = dict()
|
||||
for flag_xml in arch_xml.getElementsByTagName("flag"):
|
||||
name = flag_xml.attributes["compiler"].value
|
||||
if not flags.has_key(name): flags[name] = list()
|
||||
flags[name].append(flag_xml.firstChild.data)
|
||||
#force kwargs keys to be of type str, not unicode for py25
|
||||
kwargs = dict((str(k), v) for k, v in kwargs.iteritems())
|
||||
register_arch(flags=flags, checks=checks, **kwargs)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print archs
|
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2012 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
import optparse
|
||||
import volk_gnsssdr_arch_defs
|
||||
import volk_gnsssdr_machine_defs
|
||||
|
||||
def do_arch_flags_list(compiler):
|
||||
output = list()
|
||||
for arch in volk_gnsssdr_arch_defs.archs:
|
||||
if not arch.is_supported(compiler): continue
|
||||
fields = [arch.name] + arch.get_flags(compiler)
|
||||
output.append(','.join(fields))
|
||||
print ';'.join(output)
|
||||
|
||||
def do_machines_list(arch_names):
|
||||
output = list()
|
||||
for machine in volk_gnsssdr_machine_defs.machines:
|
||||
machine_arch_set = set(machine.arch_names)
|
||||
if set(arch_names).intersection(machine_arch_set) == machine_arch_set:
|
||||
output.append(machine.name)
|
||||
print ';'.join(output)
|
||||
|
||||
def do_machine_flags_list(compiler, machine_name):
|
||||
output = list()
|
||||
machine = volk_gnsssdr_machine_defs.machine_dict[machine_name]
|
||||
for arch in machine.archs:
|
||||
output.extend(arch.get_flags(compiler))
|
||||
print ' '.join(output)
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser()
|
||||
parser.add_option('--mode', type='string')
|
||||
parser.add_option('--compiler', type='string')
|
||||
parser.add_option('--archs', type='string')
|
||||
parser.add_option('--machine', type='string')
|
||||
(opts, args) = parser.parse_args()
|
||||
|
||||
if opts.mode == 'arch_flags': return do_arch_flags_list(opts.compiler.lower())
|
||||
if opts.mode == 'machines': return do_machines_list(opts.archs.split(';'))
|
||||
if opts.mode == 'machine_flags': return do_machine_flags_list(opts.compiler.lower(), opts.machine)
|
||||
|
||||
if __name__ == '__main__': main()
|
209
src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py
Normal file
209
src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py
Normal file
@ -0,0 +1,209 @@
|
||||
#
|
||||
# Copyright 2011-2012 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GNU Radio
|
||||
#
|
||||
# GNU Radio is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# GNU Radio is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GNU Radio; see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
#
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import glob
|
||||
|
||||
########################################################################
|
||||
# Strip comments from a c/cpp file.
|
||||
# Input is code string, output is code string without comments.
|
||||
# http://stackoverflow.com/questions/241327/python-snippet-to-remove-c-and-c-comments
|
||||
########################################################################
|
||||
def comment_remover(text):
|
||||
def replacer(match):
|
||||
s = match.group(0)
|
||||
if s.startswith('/'):
|
||||
return ""
|
||||
else:
|
||||
return s
|
||||
pattern = re.compile(
|
||||
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
|
||||
re.DOTALL | re.MULTILINE
|
||||
)
|
||||
return re.sub(pattern, replacer, text)
|
||||
|
||||
########################################################################
|
||||
# Split code into nested sections according to ifdef preprocessor macros
|
||||
########################################################################
|
||||
def split_into_nested_ifdef_sections(code):
|
||||
sections = list()
|
||||
section = ''
|
||||
header = 'text'
|
||||
in_section_depth = 0
|
||||
for i, line in enumerate(code.splitlines()):
|
||||
m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line)
|
||||
line_is = 'normal'
|
||||
if m:
|
||||
p0, p1, fcn, stuff = m.groups()
|
||||
if fcn in ('if', 'ifndef', 'ifdef'): line_is = 'if'
|
||||
if fcn in ('else', 'elif'): line_is = 'else'
|
||||
if fcn in ('endif',): line_is = 'end'
|
||||
|
||||
if line_is == 'if': in_section_depth += 1
|
||||
if line_is == 'end': in_section_depth -= 1
|
||||
|
||||
if in_section_depth == 1 and line_is == 'if':
|
||||
sections.append((header, section))
|
||||
section = ''
|
||||
header = line
|
||||
continue
|
||||
|
||||
if in_section_depth == 1 and line_is == 'else':
|
||||
sections.append((header, section))
|
||||
section = ''
|
||||
header = line
|
||||
continue
|
||||
|
||||
if in_section_depth == 0 and line_is == 'end':
|
||||
sections.append((header, section))
|
||||
section = ''
|
||||
header = 'text'
|
||||
continue
|
||||
|
||||
section += line + '\n'
|
||||
|
||||
sections.append((header, section)) #and pack remainder into sections
|
||||
sections = [sec for sec in sections if sec[1].strip()] #filter empty sections
|
||||
|
||||
#recurse into non-text sections to fill subsections
|
||||
for i, (header, section) in enumerate(sections):
|
||||
if header == 'text': continue
|
||||
sections[i] = (header, split_into_nested_ifdef_sections(section))
|
||||
|
||||
return sections
|
||||
|
||||
########################################################################
|
||||
# Recursive print of sections to test code above
|
||||
########################################################################
|
||||
def print_sections(sections, indent = ' '):
|
||||
for header, body in sections:
|
||||
if header == 'text':
|
||||
print indent, ('\n'+indent).join(body.splitlines())
|
||||
continue
|
||||
print indent.replace(' ', '-') + '>', header
|
||||
print_sections(body, indent + ' ')
|
||||
|
||||
########################################################################
|
||||
# Flatten a section to just body text
|
||||
########################################################################
|
||||
def flatten_section_text(sections):
|
||||
output = ''
|
||||
for hdr, bdy in sections:
|
||||
if hdr != 'text': output += flatten_section_text(bdy)
|
||||
else: output += bdy
|
||||
return output
|
||||
|
||||
########################################################################
|
||||
# Extract kernel info from section, represent as an implementation
|
||||
########################################################################
|
||||
class impl_class:
|
||||
def __init__(self, kern_name, header, body):
|
||||
#extract LV_HAVE_*
|
||||
self.deps = set(map(str.lower, re.findall('LV_HAVE_(\w+)', header)))
|
||||
#extract function suffix and args
|
||||
body = flatten_section_text(body)
|
||||
try:
|
||||
fcn_matcher = re.compile('^.*(%s\\w*)\\s*\\((.*)$'%kern_name, re.DOTALL | re.MULTILINE)
|
||||
body = body.split('{')[0].rsplit(')', 1)[0] #get the part before the open ){ bracket
|
||||
m = fcn_matcher.match(body)
|
||||
impl_name, the_rest = m.groups()
|
||||
self.name = impl_name.replace(kern_name+'_', '')
|
||||
self.args = list()
|
||||
fcn_args = the_rest.split(',')
|
||||
for fcn_arg in fcn_args:
|
||||
arg_matcher = re.compile('^\s*(.*\\W)\s*(\w+)\s*$', re.DOTALL | re.MULTILINE)
|
||||
m = arg_matcher.match(fcn_arg)
|
||||
arg_type, arg_name = m.groups()
|
||||
self.args.append((arg_type, arg_name))
|
||||
except Exception as ex:
|
||||
raise Exception, 'I cant parse the function prototype from: %s in %s\n%s'%(kern_name, body, ex)
|
||||
|
||||
assert self.name
|
||||
self.is_aligned = self.name.startswith('a_')
|
||||
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
########################################################################
|
||||
# Get sets of LV_HAVE_* from the code
|
||||
########################################################################
|
||||
def extract_lv_haves(code):
|
||||
haves = list()
|
||||
for line in code.splitlines():
|
||||
if not line.strip().startswith('#'): continue
|
||||
have_set = set(map(str.lower, re.findall('LV_HAVE_(\w+)', line)))
|
||||
if have_set: haves.append(have_set)
|
||||
return haves
|
||||
|
||||
########################################################################
|
||||
# Represent a processing kernel, parse from file
|
||||
########################################################################
|
||||
class kernel_class:
|
||||
def __init__(self, kernel_file):
|
||||
self.name = os.path.splitext(os.path.basename(kernel_file))[0]
|
||||
self.pname = self.name.replace('volk_gnsssdr_', 'p_')
|
||||
code = open(kernel_file, 'r').read()
|
||||
code = comment_remover(code)
|
||||
sections = split_into_nested_ifdef_sections(code)
|
||||
self._impls = list()
|
||||
for header, section in sections:
|
||||
if 'ifndef' not in header.lower(): continue
|
||||
for sub_hdr, body in section:
|
||||
if 'if' not in sub_hdr.lower(): continue
|
||||
if 'LV_HAVE_' not in sub_hdr: continue
|
||||
self._impls.append(impl_class(
|
||||
kern_name=self.name, header=sub_hdr, body=body,
|
||||
))
|
||||
assert(self._impls)
|
||||
self.has_dispatcher = False
|
||||
for impl in self._impls:
|
||||
if impl.name == 'dispatcher':
|
||||
self._impls.remove(impl)
|
||||
self.has_dispatcher = True
|
||||
break
|
||||
self.args = self._impls[0].args
|
||||
self.arglist_types = ', '.join([a[0] for a in self.args])
|
||||
self.arglist_full = ', '.join(['%s %s'%a for a in self.args])
|
||||
self.arglist_names = ', '.join([a[1] for a in self.args])
|
||||
|
||||
def get_impls(self, archs):
|
||||
archs = set(archs)
|
||||
impls = list()
|
||||
for impl in self._impls:
|
||||
if impl.deps.intersection(archs) == impl.deps:
|
||||
impls.append(impl)
|
||||
return impls
|
||||
|
||||
def __repr__(self):
|
||||
return self.name
|
||||
|
||||
########################################################################
|
||||
# Extract information from the VOLK kernels
|
||||
########################################################################
|
||||
__file__ = os.path.abspath(__file__)
|
||||
srcdir = os.path.dirname(os.path.dirname(__file__))
|
||||
kernel_files = glob.glob(os.path.join(srcdir, "kernels", "volk_gnsssdr", "*.h"))
|
||||
kernels = map(kernel_class, kernel_files)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print kernels
|
@ -0,0 +1,74 @@
|
||||
#
|
||||
# Copyright 2012 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
from volk_gnsssdr_arch_defs import arch_dict
|
||||
|
||||
machines = list()
|
||||
machine_dict = dict()
|
||||
|
||||
class machine_class:
|
||||
def __init__(self, name, archs):
|
||||
self.name = name
|
||||
self.archs = list()
|
||||
self.arch_names = list()
|
||||
for arch_name in archs:
|
||||
if not arch_name: continue
|
||||
arch = arch_dict[arch_name]
|
||||
self.archs.append(arch)
|
||||
self.arch_names.append(arch_name)
|
||||
self.alignment = max(map(lambda a: a.alignment, self.archs))
|
||||
|
||||
def __repr__(self): return self.name
|
||||
|
||||
def register_machine(name, archs):
|
||||
for i, arch_name in enumerate(archs):
|
||||
if '|' in arch_name: #handle special arch names with the '|'
|
||||
for arch_sub in arch_name.split('|'):
|
||||
if arch_sub:
|
||||
register_machine(name+'_'+arch_sub, archs[:i] + [arch_sub] + archs[i+1:])
|
||||
else:
|
||||
register_machine(name, archs[:i] + archs[i+1:])
|
||||
return
|
||||
machine = machine_class(name=name, archs=archs)
|
||||
machines.append(machine)
|
||||
machine_dict[machine.name] = machine
|
||||
|
||||
########################################################################
|
||||
# register the machines
|
||||
########################################################################
|
||||
#TODO skip the XML and put it here
|
||||
from xml.dom import minidom
|
||||
import os
|
||||
gendir = os.path.dirname(__file__)
|
||||
machines_xml = minidom.parse(os.path.join(gendir, 'machines.xml')).getElementsByTagName('machine')
|
||||
for machine_xml in machines_xml:
|
||||
kwargs = dict()
|
||||
for attr in machine_xml.attributes.keys():
|
||||
kwargs[attr] = machine_xml.attributes[attr].value
|
||||
for node in machine_xml.childNodes:
|
||||
try:
|
||||
name = node.tagName
|
||||
val = machine_xml.getElementsByTagName(name)[0].firstChild.data
|
||||
kwargs[name] = val
|
||||
except: pass
|
||||
kwargs['archs'] = kwargs['archs'].split()
|
||||
#force kwargs keys to be of type str, not unicode for py25
|
||||
kwargs = dict((str(k), v) for k, v in kwargs.iteritems())
|
||||
register_machine(**kwargs)
|
||||
|
||||
if __name__ == '__main__':
|
||||
print machines
|
@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2012 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GNU Radio
|
||||
#
|
||||
# GNU Radio is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# GNU Radio is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GNU Radio; see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
#
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import optparse
|
||||
import volk_gnsssdr_arch_defs
|
||||
import volk_gnsssdr_machine_defs
|
||||
import volk_gnsssdr_kernel_defs
|
||||
from Cheetah import Template
|
||||
|
||||
def __escape_pre_processor(code):
|
||||
out = list()
|
||||
for line in code.splitlines():
|
||||
m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line)
|
||||
if m:
|
||||
p0, p1, fcn, stuff = m.groups()
|
||||
conly = fcn in ('include', 'define', 'ifdef', 'ifndef', 'endif', 'elif', 'pragma')
|
||||
both = fcn in ('if', 'else')
|
||||
istmpl = '$' in stuff
|
||||
if 'defined' in stuff: istmpl = False
|
||||
if conly or (both and not istmpl):
|
||||
line = '%s\\#%s%s%s'%(p0, p1, fcn, stuff)
|
||||
out.append(line)
|
||||
return '\n'.join(out)
|
||||
|
||||
def __parse_tmpl(_tmpl, **kwargs):
|
||||
defs = {
|
||||
'archs': volk_gnsssdr_arch_defs.archs,
|
||||
'arch_dict': volk_gnsssdr_arch_defs.arch_dict,
|
||||
'machines': volk_gnsssdr_machine_defs.machines,
|
||||
'machine_dict': volk_gnsssdr_machine_defs.machine_dict,
|
||||
'kernels': volk_gnsssdr_kernel_defs.kernels,
|
||||
}
|
||||
defs.update(kwargs)
|
||||
_tmpl = __escape_pre_processor(_tmpl)
|
||||
_tmpl = """
|
||||
|
||||
/* this file was generated by volk_gnsssdr template utils, do not edit! */
|
||||
|
||||
""" + _tmpl
|
||||
return str(Template.Template(_tmpl, defs))
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser()
|
||||
parser.add_option('--input', type='string')
|
||||
parser.add_option('--output', type='string')
|
||||
(opts, args) = parser.parse_args()
|
||||
|
||||
output = __parse_tmpl(open(opts.input).read(), args=args)
|
||||
if opts.output: open(opts.output, 'w').write(output)
|
||||
else: print output
|
||||
|
||||
if __name__ == '__main__': main()
|
@ -0,0 +1,39 @@
|
||||
/* -*- c++ -*- */
|
||||
/*
|
||||
* Copyright 2006,2009,2013 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_VOLK_CONSTANTS_H
|
||||
#define INCLUDED_VOLK_CONSTANTS_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
|
||||
__VOLK_DECL_BEGIN
|
||||
|
||||
VOLK_API char* volk_gnsssdr_prefix();
|
||||
VOLK_API char* volk_gnsssdr_build_date();
|
||||
VOLK_API char* volk_gnsssdr_version();
|
||||
VOLK_API char* volk_gnsssdr_c_compiler();
|
||||
VOLK_API char* volk_gnsssdr_compiler_flags();
|
||||
VOLK_API char* volk_gnsssdr_available_machines();
|
||||
|
||||
__VOLK_DECL_END
|
||||
|
||||
#endif /* INCLUDED_VOLK_CONSTANTS_H */
|
@ -0,0 +1,96 @@
|
||||
#ifndef INCLUDED_LIBVOLK_COMMON_H
|
||||
#define INCLUDED_LIBVOLK_COMMON_H
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Cross-platform attribute macros
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#if defined __GNUC__
|
||||
# define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
|
||||
# define __VOLK_ATTR_UNUSED __attribute__((unused))
|
||||
# define __VOLK_ATTR_INLINE __attribute__((always_inline))
|
||||
# define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
|
||||
# if __GNUC__ >= 4
|
||||
# define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
|
||||
# define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
|
||||
# else
|
||||
# define __VOLK_ATTR_EXPORT
|
||||
# define __VOLK_ATTR_IMPORT
|
||||
# endif
|
||||
#elif _MSC_VER
|
||||
# define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
|
||||
# define __VOLK_ATTR_UNUSED
|
||||
# define __VOLK_ATTR_INLINE __forceinline
|
||||
# define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
|
||||
# define __VOLK_ATTR_EXPORT __declspec(dllexport)
|
||||
# define __VOLK_ATTR_IMPORT __declspec(dllimport)
|
||||
#else
|
||||
# define __VOLK_ATTR_ALIGNED(x)
|
||||
# define __VOLK_ATTR_UNUSED
|
||||
# define __VOLK_ATTR_INLINE
|
||||
# define __VOLK_ATTR_DEPRECATED
|
||||
# define __VOLK_ATTR_EXPORT
|
||||
# define __VOLK_ATTR_IMPORT
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Ignore annoying warnings in MSVC
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#if defined(_MSC_VER)
|
||||
# pragma warning(disable: 4244) //'conversion' conversion from 'type1' to 'type2', possible loss of data
|
||||
# pragma warning(disable: 4305) //'identifier' : truncation from 'type1' to 'type2'
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// C-linkage declaration macros
|
||||
// FIXME: due to the usage of complex.h, require gcc for c-linkage
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#if defined(__cplusplus) && (__GNUC__)
|
||||
# define __VOLK_DECL_BEGIN extern "C" {
|
||||
# define __VOLK_DECL_END }
|
||||
#else
|
||||
# define __VOLK_DECL_BEGIN
|
||||
# define __VOLK_DECL_END
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Define VOLK_API for library symbols
|
||||
// http://gcc.gnu.org/wiki/Visibility
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#ifdef volk_gnsssdr_EXPORTS
|
||||
# define VOLK_API __VOLK_ATTR_EXPORT
|
||||
#else
|
||||
# define VOLK_API __VOLK_ATTR_IMPORT
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// The bit128 union used by some
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#include <inttypes.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
union bit128{
|
||||
uint16_t i16[8];
|
||||
uint32_t i[4];
|
||||
float f[4];
|
||||
double d[2];
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
__m128 float_vec;
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
__m128i int_vec;
|
||||
__m128d double_vec;
|
||||
#endif
|
||||
};
|
||||
|
||||
#define bit128_p(x) ((union bit128 *)(x))
|
||||
|
||||
#endif /*INCLUDED_LIBVOLK_COMMON_H*/
|
@ -0,0 +1,86 @@
|
||||
#ifndef INCLUDE_VOLK_COMPLEX_H
|
||||
#define INCLUDE_VOLK_COMPLEX_H
|
||||
|
||||
/*!
|
||||
* \brief Provide typedefs and operators for all complex types in C and C++.
|
||||
*
|
||||
* The typedefs encompass all signed integer and floating point types.
|
||||
* Each operator function is intended to work across all data types.
|
||||
* Under C++, these operators are defined as inline templates.
|
||||
* Under C, these operators are defined as preprocessor macros.
|
||||
* The use of macros makes the operators agnostic to the type.
|
||||
*
|
||||
* The following operator functions are defined:
|
||||
* - lv_cmake - make a complex type from components
|
||||
* - lv_creal - get the real part of the complex number
|
||||
* - lv_cimag - get the imaginary part of the complex number
|
||||
* - lv_conj - take the conjugate of the complex number
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#include <complex>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef std::complex<int8_t> lv_8sc_t;
|
||||
typedef std::complex<int16_t> lv_16sc_t;
|
||||
typedef std::complex<int32_t> lv_32sc_t;
|
||||
typedef std::complex<int64_t> lv_64sc_t;
|
||||
typedef std::complex<float> lv_32fc_t;
|
||||
typedef std::complex<double> lv_64fc_t;
|
||||
|
||||
template <typename T> inline std::complex<T> lv_cmake(const T &r, const T &i){
|
||||
return std::complex<T>(r, i);
|
||||
}
|
||||
|
||||
template <typename T> inline typename T::value_type lv_creal(const T &x){
|
||||
return x.real();
|
||||
}
|
||||
|
||||
template <typename T> inline typename T::value_type lv_cimag(const T &x){
|
||||
return x.imag();
|
||||
}
|
||||
|
||||
template <typename T> inline T lv_conj(const T &x){
|
||||
return std::conj(x);
|
||||
}
|
||||
|
||||
#else /* __cplusplus */
|
||||
|
||||
#include <complex.h>
|
||||
|
||||
typedef char complex lv_8sc_t;
|
||||
typedef short complex lv_16sc_t;
|
||||
typedef long complex lv_32sc_t;
|
||||
typedef long long complex lv_64sc_t;
|
||||
typedef float complex lv_32fc_t;
|
||||
typedef double complex lv_64fc_t;
|
||||
|
||||
#define lv_cmake(r, i) ((r) + _Complex_I*(i))
|
||||
|
||||
// When GNUC is available, use the complex extensions.
|
||||
// The extensions always return the correct value type.
|
||||
// http://gcc.gnu.org/onlinedocs/gcc/Complex.html
|
||||
#ifdef __GNUC__
|
||||
|
||||
#define lv_creal(x) (__real__(x))
|
||||
|
||||
#define lv_cimag(x) (__imag__(x))
|
||||
|
||||
#define lv_conj(x) (~(x))
|
||||
|
||||
// When not available, use the c99 complex function family,
|
||||
// which always returns double regardless of the input type.
|
||||
#else /* __GNUC__ */
|
||||
|
||||
#define lv_creal(x) (creal(x))
|
||||
|
||||
#define lv_cimag(x) (cimag(x))
|
||||
|
||||
#define lv_conj(x) (conj(x))
|
||||
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* INCLUDE_VOLK_COMPLEX_H */
|
@ -0,0 +1,66 @@
|
||||
/* -*- c -*- */
|
||||
/*
|
||||
* Copyright 2014 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_VOLK_MALLOC_H
|
||||
#define INCLUDED_VOLK_MALLOC_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
__VOLK_DECL_BEGIN
|
||||
|
||||
/*!
|
||||
* \brief Allocate \p size bytes of data aligned to \p alignment.
|
||||
*
|
||||
* \details
|
||||
* Because we don't have a standard method to allocate buffers in
|
||||
* memory that are guaranteed to be on an alignment, VOLK handles this
|
||||
* itself. The volk_gnsssdr_malloc function behaves like malloc in that it
|
||||
* returns a pointer to the allocated memory. However, it also takes
|
||||
* in an alignment specfication, which is usually something like 16 or
|
||||
* 32 to ensure that the aligned memory is located on a particular
|
||||
* byte boundary for use with SIMD.
|
||||
*
|
||||
* Internally, the volk_gnsssdr_malloc first checks if the compiler is C11
|
||||
* compliant and uses the new aligned_alloc method. If not, it checks
|
||||
* if the system is POSIX compliant and uses posix_memalign. If that
|
||||
* fails, volk_gnsssdr_malloc handles the memory allocation and alignment
|
||||
* internally.
|
||||
*
|
||||
* Because of the ways in which volk_gnsssdr_malloc may allocate memory, it is
|
||||
* important to always free volk_gnsssdr_malloc pointers using volk_gnsssdr_free.
|
||||
*
|
||||
* \param size The number of bytes to allocate.
|
||||
* \param alignment The byte alignment of the allocated memory.
|
||||
* \return pointer to aligned memory.
|
||||
*/
|
||||
VOLK_API void *volk_gnsssdr_malloc(size_t size, size_t alignment);
|
||||
|
||||
/*!
|
||||
* \brief Free's memory allocated by volk_gnsssdr_malloc.
|
||||
* \param aptr The aligned pointer allocaed by volk_gnsssdr_malloc.
|
||||
*/
|
||||
VOLK_API void volk_gnsssdr_free(void *aptr);
|
||||
|
||||
__VOLK_DECL_END
|
||||
|
||||
#endif /* INCLUDED_VOLK_MALLOC_H */
|
@ -0,0 +1,28 @@
|
||||
#ifndef INCLUDED_VOLK_PREFS_H
|
||||
#define INCLUDED_VOLK_PREFS_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
__VOLK_DECL_BEGIN
|
||||
|
||||
typedef struct volk_gnsssdr_arch_pref
|
||||
{
|
||||
char name[128]; //name of the kernel
|
||||
char impl_a[128]; //best aligned impl
|
||||
char impl_u[128]; //best unaligned impl
|
||||
} volk_gnsssdr_arch_pref_t;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// get path to volk_gnsssdr_config profiling info
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
VOLK_API void volk_gnsssdr_get_config_path(char *);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// load prefs into global prefs struct
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
VOLK_API size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **);
|
||||
|
||||
__VOLK_DECL_END
|
||||
|
||||
#endif //INCLUDED_VOLK_PREFS_H
|
67
src/algorithms/libs/volk_gnsssdr/kernels/README.txt
Normal file
67
src/algorithms/libs/volk_gnsssdr/kernels/README.txt
Normal file
@ -0,0 +1,67 @@
|
||||
########################################################################
|
||||
# How to create custom kernel dispatchers
|
||||
########################################################################
|
||||
A kernel dispatcher is kernel implementation that calls other kernel implementations.
|
||||
By default, a dispatcher is generated by the build system for every kernel such that:
|
||||
* the best aligned implemention is called when all pointer arguments are aligned,
|
||||
* and otherwise the best unaligned implementation is called.
|
||||
|
||||
The author of a VOLK kernel may create a custom dispatcher,
|
||||
to be called in place of the automatically generated one.
|
||||
A custom dispatcher may be useful to handle head and tail cases,
|
||||
or to implement different alignment and bounds checking logic.
|
||||
|
||||
########################################################################
|
||||
# Code for an example dispatcher w/ tail case
|
||||
########################################################################
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
|
||||
#ifdef LV_HAVE_DISPATCHER
|
||||
|
||||
static inline void volk_gnsssdr_32f_x2_add_32f_dispatcher(float* cVector, const float* aVector, const float* bVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int num_points_r = num_points%4;
|
||||
const unsigned int num_points_x = num_points - num_points_r;
|
||||
|
||||
if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(cVector, VOLK_OR_PTR(aVector, bVector))))
|
||||
{
|
||||
volk_gnsssdr_32f_x2_add_32f_a(cVector, aVector, bVector, num_points_x);
|
||||
}
|
||||
else
|
||||
{
|
||||
volk_gnsssdr_32f_x2_add_32f_u(cVector, aVector, bVector, num_points_x);
|
||||
}
|
||||
|
||||
volk_gnsssdr_32f_x2_add_32f_g(cVector+num_points_x, aVector+num_points_x, bVector+num_points_x, num_points_r);
|
||||
}
|
||||
|
||||
#endif //LV_HAVE_DISPATCHER
|
||||
|
||||
########################################################################
|
||||
# Code for an example dispatcher w/ tail case and accumulator
|
||||
########################################################################
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
|
||||
#ifdef LV_HAVE_DISPATCHER
|
||||
|
||||
static inline void volk_gnsssdr_32f_x2_dot_prod_32f_dispatcher(float * result, const float * input, const float * taps, unsigned int num_points)
|
||||
{
|
||||
const unsigned int num_points_r = num_points%16;
|
||||
const unsigned int num_points_x = num_points - num_points_r;
|
||||
|
||||
if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(input, taps)))
|
||||
{
|
||||
volk_gnsssdr_32f_x2_dot_prod_32f_a(result, input, taps, num_points_x);
|
||||
}
|
||||
else
|
||||
{
|
||||
volk_gnsssdr_32f_x2_dot_prod_32f_u(result, input, taps, num_points_x);
|
||||
}
|
||||
|
||||
float result_tail = 0;
|
||||
volk_gnsssdr_32f_x2_dot_prod_32f_g(&result_tail, input+num_points_x, taps+num_points_x, num_points_r);
|
||||
|
||||
*result += result_tail;
|
||||
}
|
||||
|
||||
#endif //LV_HAVE_DISPATCHER
|
@ -0,0 +1,241 @@
|
||||
#ifndef INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H
|
||||
#define INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
|
||||
\param inputVector The 16 bit input data buffer
|
||||
\param outputVector The floating point output data buffer
|
||||
\param scalar The value divided against each point in the output buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\note Output buffer does NOT need to be properly aligned
|
||||
*/
|
||||
static inline void volk_gnsssdr_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int eighthPoints = num_points / 8;
|
||||
|
||||
float* outputVectorPtr = outputVector;
|
||||
__m128 invScalar = _mm_set_ps1(1.0/scalar);
|
||||
int16_t* inputPtr = (int16_t*)inputVector;
|
||||
__m128i inputVal;
|
||||
__m128i inputVal2;
|
||||
__m128 ret;
|
||||
|
||||
for(;number < eighthPoints; number++){
|
||||
|
||||
// Load the 8 values
|
||||
inputVal = _mm_loadu_si128((__m128i*)inputPtr);
|
||||
|
||||
// Shift the input data to the right by 64 bits ( 8 bytes )
|
||||
inputVal2 = _mm_srli_si128(inputVal, 8);
|
||||
|
||||
// Convert the lower 4 values into 32 bit words
|
||||
inputVal = _mm_cvtepi16_epi32(inputVal);
|
||||
inputVal2 = _mm_cvtepi16_epi32(inputVal2);
|
||||
|
||||
ret = _mm_cvtepi32_ps(inputVal);
|
||||
ret = _mm_mul_ps(ret, invScalar);
|
||||
_mm_storeu_ps(outputVectorPtr, ret);
|
||||
outputVectorPtr += 4;
|
||||
|
||||
ret = _mm_cvtepi32_ps(inputVal2);
|
||||
ret = _mm_mul_ps(ret, invScalar);
|
||||
_mm_storeu_ps(outputVectorPtr, ret);
|
||||
|
||||
outputVectorPtr += 4;
|
||||
|
||||
inputPtr += 8;
|
||||
}
|
||||
|
||||
number = eighthPoints * 8;
|
||||
for(; number < num_points; number++){
|
||||
outputVector[number] =((float)(inputVector[number])) / scalar;
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE4_1 */
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
|
||||
\param inputVector The 16 bit input data buffer
|
||||
\param outputVector The floating point output data buffer
|
||||
\param scalar The value divided against each point in the output buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\note Output buffer does NOT need to be properly aligned
|
||||
*/
|
||||
static inline void volk_gnsssdr_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
float* outputVectorPtr = outputVector;
|
||||
__m128 invScalar = _mm_set_ps1(1.0/scalar);
|
||||
int16_t* inputPtr = (int16_t*)inputVector;
|
||||
__m128 ret;
|
||||
|
||||
for(;number < quarterPoints; number++){
|
||||
ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
|
||||
|
||||
ret = _mm_mul_ps(ret, invScalar);
|
||||
_mm_storeu_ps(outputVectorPtr, ret);
|
||||
|
||||
inputPtr += 4;
|
||||
outputVectorPtr += 4;
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(; number < num_points; number++){
|
||||
outputVector[number] = (float)(inputVector[number]) / scalar;
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
|
||||
\param inputVector The 16 bit input data buffer
|
||||
\param outputVector The floating point output data buffer
|
||||
\param scalar The value divided against each point in the output buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\note Output buffer does NOT need to be properly aligned
|
||||
*/
|
||||
static inline void volk_gnsssdr_16i_s32f_convert_32f_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
|
||||
float* outputVectorPtr = outputVector;
|
||||
const int16_t* inputVectorPtr = inputVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H */
|
||||
#ifndef INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H
|
||||
#define INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
|
||||
\param inputVector The 16 bit input data buffer
|
||||
\param outputVector The floating point output data buffer
|
||||
\param scalar The value divided against each point in the output buffer
|
||||
\param num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int eighthPoints = num_points / 8;
|
||||
|
||||
float* outputVectorPtr = outputVector;
|
||||
__m128 invScalar = _mm_set_ps1(1.0/scalar);
|
||||
int16_t* inputPtr = (int16_t*)inputVector;
|
||||
__m128i inputVal;
|
||||
__m128i inputVal2;
|
||||
__m128 ret;
|
||||
|
||||
for(;number < eighthPoints; number++){
|
||||
|
||||
// Load the 8 values
|
||||
inputVal = _mm_loadu_si128((__m128i*)inputPtr);
|
||||
|
||||
// Shift the input data to the right by 64 bits ( 8 bytes )
|
||||
inputVal2 = _mm_srli_si128(inputVal, 8);
|
||||
|
||||
// Convert the lower 4 values into 32 bit words
|
||||
inputVal = _mm_cvtepi16_epi32(inputVal);
|
||||
inputVal2 = _mm_cvtepi16_epi32(inputVal2);
|
||||
|
||||
ret = _mm_cvtepi32_ps(inputVal);
|
||||
ret = _mm_mul_ps(ret, invScalar);
|
||||
_mm_storeu_ps(outputVectorPtr, ret);
|
||||
outputVectorPtr += 4;
|
||||
|
||||
ret = _mm_cvtepi32_ps(inputVal2);
|
||||
ret = _mm_mul_ps(ret, invScalar);
|
||||
_mm_storeu_ps(outputVectorPtr, ret);
|
||||
|
||||
outputVectorPtr += 4;
|
||||
|
||||
inputPtr += 8;
|
||||
}
|
||||
|
||||
number = eighthPoints * 8;
|
||||
for(; number < num_points; number++){
|
||||
outputVector[number] =((float)(inputVector[number])) / scalar;
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE4_1 */
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
|
||||
\param inputVector The 16 bit input data buffer
|
||||
\param outputVector The floating point output data buffer
|
||||
\param scalar The value divided against each point in the output buffer
|
||||
\param num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
float* outputVectorPtr = outputVector;
|
||||
__m128 invScalar = _mm_set_ps1(1.0/scalar);
|
||||
int16_t* inputPtr = (int16_t*)inputVector;
|
||||
__m128 ret;
|
||||
|
||||
for(;number < quarterPoints; number++){
|
||||
ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
|
||||
|
||||
ret = _mm_mul_ps(ret, invScalar);
|
||||
_mm_storeu_ps(outputVectorPtr, ret);
|
||||
|
||||
inputPtr += 4;
|
||||
outputVectorPtr += 4;
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(; number < num_points; number++){
|
||||
outputVector[number] = (float)(inputVector[number]) / scalar;
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
|
||||
\param inputVector The 16 bit input data buffer
|
||||
\param outputVector The floating point output data buffer
|
||||
\param scalar The value divided against each point in the output buffer
|
||||
\param num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16i_s32f_convert_32f_a_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
|
||||
float* outputVectorPtr = outputVector;
|
||||
const int16_t* inputVectorPtr = inputVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H */
|
@ -0,0 +1,68 @@
|
||||
#ifndef INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H
|
||||
#define INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_32f_accumulator_s32f_a_sse(float* result, const float* inputBuffer, unsigned int num_points){
|
||||
float returnValue = 0;
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
const float* aPtr = inputBuffer;
|
||||
__VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
|
||||
|
||||
__m128 accumulator = _mm_setzero_ps();
|
||||
__m128 aVal = _mm_setzero_ps();
|
||||
|
||||
for(;number < quarterPoints; number++){
|
||||
aVal = _mm_load_ps(aPtr);
|
||||
accumulator = _mm_add_ps(accumulator, aVal);
|
||||
aPtr += 4;
|
||||
}
|
||||
_mm_store_ps(tempBuffer,accumulator); // Store the results back into the C container
|
||||
returnValue = tempBuffer[0];
|
||||
returnValue += tempBuffer[1];
|
||||
returnValue += tempBuffer[2];
|
||||
returnValue += tempBuffer[3];
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(;number < num_points; number++){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_32f_accumulator_s32f_generic(float* result, const float* inputBuffer, unsigned int num_points){
|
||||
const float* aPtr = inputBuffer;
|
||||
unsigned int number = 0;
|
||||
float returnValue = 0;
|
||||
|
||||
for(;number < num_points; number++){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H */
|
@ -0,0 +1,149 @@
|
||||
#ifndef INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H
|
||||
#define INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include<smmintrin.h>
|
||||
|
||||
static inline void volk_gnsssdr_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
float* inputPtr = (float*)src0;
|
||||
|
||||
__m128 indexIncrementValues = _mm_set1_ps(4);
|
||||
__m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
|
||||
|
||||
float max = src0[0];
|
||||
float index = 0;
|
||||
__m128 maxValues = _mm_set1_ps(max);
|
||||
__m128 maxValuesIndex = _mm_setzero_ps();
|
||||
__m128 compareResults;
|
||||
__m128 currentValues;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
|
||||
|
||||
for(;number < quarterPoints; number++){
|
||||
|
||||
currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
|
||||
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
|
||||
|
||||
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
|
||||
|
||||
maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
|
||||
maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults);
|
||||
}
|
||||
|
||||
// Calculate the largest value from the remaining 4 points
|
||||
_mm_store_ps(maxValuesBuffer, maxValues);
|
||||
_mm_store_ps(maxIndexesBuffer, maxValuesIndex);
|
||||
|
||||
for(number = 0; number < 4; number++){
|
||||
if(maxValuesBuffer[number] > max){
|
||||
index = maxIndexesBuffer[number];
|
||||
max = maxValuesBuffer[number];
|
||||
}
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(;number < num_points; number++){
|
||||
if(src0[number] > max){
|
||||
index = number;
|
||||
max = src0[number];
|
||||
}
|
||||
}
|
||||
target[0] = (unsigned int)index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include<xmmintrin.h>
|
||||
|
||||
static inline void volk_gnsssdr_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
float* inputPtr = (float*)src0;
|
||||
|
||||
__m128 indexIncrementValues = _mm_set1_ps(4);
|
||||
__m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
|
||||
|
||||
float max = src0[0];
|
||||
float index = 0;
|
||||
__m128 maxValues = _mm_set1_ps(max);
|
||||
__m128 maxValuesIndex = _mm_setzero_ps();
|
||||
__m128 compareResults;
|
||||
__m128 currentValues;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
|
||||
|
||||
for(;number < quarterPoints; number++){
|
||||
|
||||
currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
|
||||
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
|
||||
|
||||
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
|
||||
|
||||
maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
|
||||
|
||||
maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
|
||||
}
|
||||
|
||||
// Calculate the largest value from the remaining 4 points
|
||||
_mm_store_ps(maxValuesBuffer, maxValues);
|
||||
_mm_store_ps(maxIndexesBuffer, maxValuesIndex);
|
||||
|
||||
for(number = 0; number < 4; number++){
|
||||
if(maxValuesBuffer[number] > max){
|
||||
index = maxIndexesBuffer[number];
|
||||
max = maxValuesBuffer[number];
|
||||
}
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(;number < num_points; number++){
|
||||
if(src0[number] > max){
|
||||
index = number;
|
||||
max = src0[number];
|
||||
}
|
||||
}
|
||||
target[0] = (unsigned int)index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE*/
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_32f_index_max_16u_generic(unsigned int* target, const float* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
float max = src0[0];
|
||||
unsigned int index = 0;
|
||||
|
||||
unsigned int i = 1;
|
||||
|
||||
for(; i < num_points; ++i) {
|
||||
|
||||
if(src0[i] > max){
|
||||
index = i;
|
||||
max = src0[i];
|
||||
}
|
||||
|
||||
}
|
||||
target[0] = index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H*/
|
@ -0,0 +1,147 @@
|
||||
#ifndef INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H
|
||||
#define INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
float* cPtr = cVector;
|
||||
const float* aPtr = aVector;
|
||||
const float* bPtr= bVector;
|
||||
|
||||
__m128 aVal, bVal, cVal;
|
||||
for(;number < quarterPoints; number++){
|
||||
|
||||
aVal = _mm_loadu_ps(aPtr);
|
||||
bVal = _mm_loadu_ps(bPtr);
|
||||
|
||||
cVal = _mm_add_ps(aVal, bVal);
|
||||
|
||||
_mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
|
||||
|
||||
aPtr += 4;
|
||||
bPtr += 4;
|
||||
cPtr += 4;
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(;number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32f_x2_add_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
|
||||
float* cPtr = cVector;
|
||||
const float* aPtr = aVector;
|
||||
const float* bPtr= bVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H */
|
||||
#ifndef INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H
|
||||
#define INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
float* cPtr = cVector;
|
||||
const float* aPtr = aVector;
|
||||
const float* bPtr= bVector;
|
||||
|
||||
__m128 aVal, bVal, cVal;
|
||||
for(;number < quarterPoints; number++){
|
||||
|
||||
aVal = _mm_load_ps(aPtr);
|
||||
bVal = _mm_load_ps(bPtr);
|
||||
|
||||
cVal = _mm_add_ps(aVal, bVal);
|
||||
|
||||
_mm_store_ps(cPtr,cVal); // Store the results back into the C container
|
||||
|
||||
aPtr += 4;
|
||||
bPtr += 4;
|
||||
cPtr += 4;
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(;number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
|
||||
float* cPtr = cVector;
|
||||
const float* aPtr = aVector;
|
||||
const float* bPtr= bVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
extern void volk_gnsssdr_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_32f_x2_add_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
|
||||
volk_gnsssdr_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points);
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H */
|
@ -0,0 +1,127 @@
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of a complex vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points / 2;
|
||||
|
||||
__m128 x;
|
||||
lv_32fc_t* c = cVector;
|
||||
const lv_32fc_t* a = aVector;
|
||||
|
||||
__m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
|
||||
|
||||
for(;number < halfPoints; number++){
|
||||
|
||||
x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
|
||||
|
||||
x = _mm_xor_ps(x, conjugator); // conjugate register
|
||||
|
||||
_mm_storeu_ps((float*)c,x); // Store the results back into the C container
|
||||
|
||||
a += 2;
|
||||
c += 2;
|
||||
}
|
||||
|
||||
if((num_points % 2) != 0) {
|
||||
*c = lv_conj(*a);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Takes the conjugate of a complex vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_conjugate_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
|
||||
lv_32fc_t* cPtr = cVector;
|
||||
const lv_32fc_t* aPtr = aVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*cPtr++ = lv_conj(*aPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H */
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of a complex vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points / 2;
|
||||
|
||||
__m128 x;
|
||||
lv_32fc_t* c = cVector;
|
||||
const lv_32fc_t* a = aVector;
|
||||
|
||||
__m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
|
||||
|
||||
for(;number < halfPoints; number++){
|
||||
|
||||
x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
|
||||
|
||||
x = _mm_xor_ps(x, conjugator); // conjugate register
|
||||
|
||||
_mm_store_ps((float*)c,x); // Store the results back into the C container
|
||||
|
||||
a += 2;
|
||||
c += 2;
|
||||
}
|
||||
|
||||
if((num_points % 2) != 0) {
|
||||
*c = lv_conj(*a);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Takes the conjugate of a complex vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
|
||||
lv_32fc_t* cPtr = cVector;
|
||||
const lv_32fc_t* aPtr = aVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*cPtr++ = lv_conj(*aPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H */
|
@ -0,0 +1,228 @@
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_u_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
const float* complexVectorPtr = (float*)complexVector;
|
||||
float* magnitudeVectorPtr = magnitudeVector;
|
||||
|
||||
__m128 cplxValue1, cplxValue2, result;
|
||||
for(;number < quarterPoints; number++){
|
||||
cplxValue1 = _mm_loadu_ps(complexVectorPtr);
|
||||
complexVectorPtr += 4;
|
||||
|
||||
cplxValue2 = _mm_loadu_ps(complexVectorPtr);
|
||||
complexVectorPtr += 4;
|
||||
|
||||
cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
|
||||
cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
|
||||
|
||||
result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
|
||||
|
||||
_mm_storeu_ps(magnitudeVectorPtr, result);
|
||||
magnitudeVectorPtr += 4;
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(; number < num_points; number++){
|
||||
float val1Real = *complexVectorPtr++;
|
||||
float val1Imag = *complexVectorPtr++;
|
||||
*magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
const float* complexVectorPtr = (float*)complexVector;
|
||||
float* magnitudeVectorPtr = magnitudeVector;
|
||||
|
||||
__m128 cplxValue1, cplxValue2, iValue, qValue, result;
|
||||
for(;number < quarterPoints; number++){
|
||||
cplxValue1 = _mm_loadu_ps(complexVectorPtr);
|
||||
complexVectorPtr += 4;
|
||||
|
||||
cplxValue2 = _mm_loadu_ps(complexVectorPtr);
|
||||
complexVectorPtr += 4;
|
||||
|
||||
// Arrange in i1i2i3i4 format
|
||||
iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
|
||||
// Arrange in q1q2q3q4 format
|
||||
qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
|
||||
|
||||
iValue = _mm_mul_ps(iValue, iValue); // Square the I values
|
||||
qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
|
||||
|
||||
result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
|
||||
|
||||
_mm_storeu_ps(magnitudeVectorPtr, result);
|
||||
magnitudeVectorPtr += 4;
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(; number < num_points; number++){
|
||||
float val1Real = *complexVectorPtr++;
|
||||
float val1Imag = *complexVectorPtr++;
|
||||
*magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
|
||||
const float* complexVectorPtr = (float*)complexVector;
|
||||
float* magnitudeVectorPtr = magnitudeVector;
|
||||
unsigned int number = 0;
|
||||
for(number = 0; number < num_points; number++){
|
||||
const float real = *complexVectorPtr++;
|
||||
const float imag = *complexVectorPtr++;
|
||||
*magnitudeVectorPtr++ = (real*real) + (imag*imag);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_a_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
const float* complexVectorPtr = (float*)complexVector;
|
||||
float* magnitudeVectorPtr = magnitudeVector;
|
||||
|
||||
__m128 cplxValue1, cplxValue2, result;
|
||||
for(;number < quarterPoints; number++){
|
||||
cplxValue1 = _mm_load_ps(complexVectorPtr);
|
||||
complexVectorPtr += 4;
|
||||
|
||||
cplxValue2 = _mm_load_ps(complexVectorPtr);
|
||||
complexVectorPtr += 4;
|
||||
|
||||
cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
|
||||
cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
|
||||
|
||||
result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
|
||||
|
||||
_mm_store_ps(magnitudeVectorPtr, result);
|
||||
magnitudeVectorPtr += 4;
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(; number < num_points; number++){
|
||||
float val1Real = *complexVectorPtr++;
|
||||
float val1Imag = *complexVectorPtr++;
|
||||
*magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
|
||||
const float* complexVectorPtr = (float*)complexVector;
|
||||
float* magnitudeVectorPtr = magnitudeVector;
|
||||
|
||||
__m128 cplxValue1, cplxValue2, iValue, qValue, result;
|
||||
for(;number < quarterPoints; number++){
|
||||
cplxValue1 = _mm_load_ps(complexVectorPtr);
|
||||
complexVectorPtr += 4;
|
||||
|
||||
cplxValue2 = _mm_load_ps(complexVectorPtr);
|
||||
complexVectorPtr += 4;
|
||||
|
||||
// Arrange in i1i2i3i4 format
|
||||
iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
|
||||
// Arrange in q1q2q3q4 format
|
||||
qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
|
||||
|
||||
iValue = _mm_mul_ps(iValue, iValue); // Square the I values
|
||||
qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
|
||||
|
||||
result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
|
||||
|
||||
_mm_store_ps(magnitudeVectorPtr, result);
|
||||
magnitudeVectorPtr += 4;
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(; number < num_points; number++){
|
||||
float val1Real = *complexVectorPtr++;
|
||||
float val1Imag = *complexVectorPtr++;
|
||||
*magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
|
||||
const float* complexVectorPtr = (float*)complexVector;
|
||||
float* magnitudeVectorPtr = magnitudeVector;
|
||||
unsigned int number = 0;
|
||||
for(number = 0; number < num_points; number++){
|
||||
const float real = *complexVectorPtr++;
|
||||
const float imag = *complexVectorPtr++;
|
||||
*magnitudeVectorPtr++ = (real*real) + (imag*imag);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */
|
@ -0,0 +1,178 @@
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_u_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points / 2;
|
||||
|
||||
__m128 x, yl, yh, z, tmp1, tmp2;
|
||||
lv_32fc_t* c = cVector;
|
||||
const lv_32fc_t* a = aVector;
|
||||
|
||||
// Set up constant scalar vector
|
||||
yl = _mm_set_ps1(lv_creal(scalar));
|
||||
yh = _mm_set_ps1(lv_cimag(scalar));
|
||||
|
||||
for(;number < halfPoints; number++){
|
||||
|
||||
x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
_mm_storeu_ps((float*)c,z); // Store the results back into the C container
|
||||
|
||||
a += 2;
|
||||
c += 2;
|
||||
}
|
||||
|
||||
if((num_points % 2) != 0) {
|
||||
*c = (*a) * scalar;
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
|
||||
lv_32fc_t* cPtr = cVector;
|
||||
const lv_32fc_t* aPtr = aVector;
|
||||
unsigned int number = num_points;
|
||||
|
||||
// unwrap loop
|
||||
while (number >= 8){
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
number -= 8;
|
||||
}
|
||||
|
||||
// clean up any remaining
|
||||
while (number-- > 0)
|
||||
*cPtr++ = *aPtr++ * scalar;
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_a_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points / 2;
|
||||
|
||||
__m128 x, yl, yh, z, tmp1, tmp2;
|
||||
lv_32fc_t* c = cVector;
|
||||
const lv_32fc_t* a = aVector;
|
||||
|
||||
// Set up constant scalar vector
|
||||
yl = _mm_set_ps1(lv_creal(scalar));
|
||||
yh = _mm_set_ps1(lv_cimag(scalar));
|
||||
|
||||
for(;number < halfPoints; number++){
|
||||
|
||||
x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
_mm_store_ps((float*)c,z); // Store the results back into the C container
|
||||
|
||||
a += 2;
|
||||
c += 2;
|
||||
}
|
||||
|
||||
if((num_points % 2) != 0) {
|
||||
*c = (*a) * scalar;
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
|
||||
lv_32fc_t* cPtr = cVector;
|
||||
const lv_32fc_t* aPtr = aVector;
|
||||
unsigned int number = num_points;
|
||||
|
||||
// unwrap loop
|
||||
while (number >= 8){
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
number -= 8;
|
||||
}
|
||||
|
||||
// clean up any remaining
|
||||
while (number-- > 0)
|
||||
*cPtr++ = *aPtr++ * scalar;
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */
|
@ -0,0 +1,763 @@
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
|
||||
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
|
||||
|
||||
float * res = (float*) result;
|
||||
float * in = (float*) input;
|
||||
float * tp = (float*) taps;
|
||||
unsigned int n_2_ccomplex_blocks = num_points/2;
|
||||
unsigned int isodd = num_points & 1;
|
||||
|
||||
float sum0[2] = {0,0};
|
||||
float sum1[2] = {0,0};
|
||||
unsigned int i = 0;
|
||||
|
||||
for(i = 0; i < n_2_ccomplex_blocks; ++i) {
|
||||
sum0[0] += in[0] * tp[0] - in[1] * tp[1];
|
||||
sum0[1] += in[0] * tp[1] + in[1] * tp[0];
|
||||
sum1[0] += in[2] * tp[2] - in[3] * tp[3];
|
||||
sum1[1] += in[2] * tp[3] + in[3] * tp[2];
|
||||
|
||||
in += 4;
|
||||
tp += 4;
|
||||
}
|
||||
|
||||
res[0] = sum0[0] + sum1[0];
|
||||
res[1] = sum0[1] + sum1[1];
|
||||
|
||||
// Cleanup if we had an odd number of points
|
||||
for(i = 0; i < isodd; ++i) {
|
||||
*result += input[num_points - 1] * taps[num_points - 1];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
|
||||
#if LV_HAVE_SSE && LV_HAVE_64
|
||||
|
||||
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
|
||||
|
||||
const unsigned int num_bytes = num_points*8;
|
||||
unsigned int isodd = num_points & 1;
|
||||
|
||||
asm
|
||||
(
|
||||
"# ccomplex_dotprod_generic (float* result, const float *input,\n\t"
|
||||
"# const float *taps, unsigned num_bytes)\n\t"
|
||||
"# float sum0 = 0;\n\t"
|
||||
"# float sum1 = 0;\n\t"
|
||||
"# float sum2 = 0;\n\t"
|
||||
"# float sum3 = 0;\n\t"
|
||||
"# do {\n\t"
|
||||
"# sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
|
||||
"# sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
|
||||
"# sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
|
||||
"# sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
|
||||
"# input += 4;\n\t"
|
||||
"# taps += 4; \n\t"
|
||||
"# } while (--n_2_ccomplex_blocks != 0);\n\t"
|
||||
"# result[0] = sum0 + sum2;\n\t"
|
||||
"# result[1] = sum1 + sum3;\n\t"
|
||||
"# TODO: prefetch and better scheduling\n\t"
|
||||
" xor %%r9, %%r9\n\t"
|
||||
" xor %%r10, %%r10\n\t"
|
||||
" movq %%rcx, %%rax\n\t"
|
||||
" movq %%rcx, %%r8\n\t"
|
||||
" movq %[rsi], %%r9\n\t"
|
||||
" movq %[rdx], %%r10\n\t"
|
||||
" xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
|
||||
" movups 0(%%r9), %%xmm0\n\t"
|
||||
" xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
|
||||
" movups 0(%%r10), %%xmm2\n\t"
|
||||
" shr $5, %%rax # rax = n_2_ccomplex_blocks / 2\n\t"
|
||||
" shr $4, %%r8\n\t"
|
||||
" jmp .%=L1_test\n\t"
|
||||
" # 4 taps / loop\n\t"
|
||||
" # something like ?? cycles / loop\n\t"
|
||||
".%=Loop1: \n\t"
|
||||
"# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
|
||||
"# movups (%%r9), %%xmmA\n\t"
|
||||
"# movups (%%r10), %%xmmB\n\t"
|
||||
"# movups %%xmmA, %%xmmZ\n\t"
|
||||
"# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
|
||||
"# mulps %%xmmB, %%xmmA\n\t"
|
||||
"# mulps %%xmmZ, %%xmmB\n\t"
|
||||
"# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
|
||||
"# xorps %%xmmPN, %%xmmA\n\t"
|
||||
"# movups %%xmmA, %%xmmZ\n\t"
|
||||
"# unpcklps %%xmmB, %%xmmA\n\t"
|
||||
"# unpckhps %%xmmB, %%xmmZ\n\t"
|
||||
"# movups %%xmmZ, %%xmmY\n\t"
|
||||
"# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
|
||||
"# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
|
||||
"# addps %%xmmZ, %%xmmA\n\t"
|
||||
"# addps %%xmmA, %%xmmC\n\t"
|
||||
"# A=xmm0, B=xmm2, Z=xmm4\n\t"
|
||||
"# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
|
||||
" movups 16(%%r9), %%xmm1\n\t"
|
||||
" movups %%xmm0, %%xmm4\n\t"
|
||||
" mulps %%xmm2, %%xmm0\n\t"
|
||||
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
|
||||
" movups 16(%%r10), %%xmm3\n\t"
|
||||
" movups %%xmm1, %%xmm5\n\t"
|
||||
" addps %%xmm0, %%xmm6\n\t"
|
||||
" mulps %%xmm3, %%xmm1\n\t"
|
||||
" shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
|
||||
" addps %%xmm1, %%xmm6\n\t"
|
||||
" mulps %%xmm4, %%xmm2\n\t"
|
||||
" movups 32(%%r9), %%xmm0\n\t"
|
||||
" addps %%xmm2, %%xmm7\n\t"
|
||||
" mulps %%xmm5, %%xmm3\n\t"
|
||||
" add $32, %%r9\n\t"
|
||||
" movups 32(%%r10), %%xmm2\n\t"
|
||||
" addps %%xmm3, %%xmm7\n\t"
|
||||
" add $32, %%r10\n\t"
|
||||
".%=L1_test:\n\t"
|
||||
" dec %%rax\n\t"
|
||||
" jge .%=Loop1\n\t"
|
||||
" # We've handled the bulk of multiplies up to here.\n\t"
|
||||
" # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
|
||||
" # If so, we've got 2 more taps to do.\n\t"
|
||||
" and $1, %%r8\n\t"
|
||||
" je .%=Leven\n\t"
|
||||
" # The count was odd, do 2 more taps.\n\t"
|
||||
" # Note that we've already got mm0/mm2 preloaded\n\t"
|
||||
" # from the main loop.\n\t"
|
||||
" movups %%xmm0, %%xmm4\n\t"
|
||||
" mulps %%xmm2, %%xmm0\n\t"
|
||||
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
|
||||
" addps %%xmm0, %%xmm6\n\t"
|
||||
" mulps %%xmm4, %%xmm2\n\t"
|
||||
" addps %%xmm2, %%xmm7\n\t"
|
||||
".%=Leven:\n\t"
|
||||
" # neg inversor\n\t"
|
||||
" xorps %%xmm1, %%xmm1\n\t"
|
||||
" mov $0x80000000, %%r9\n\t"
|
||||
" movd %%r9, %%xmm1\n\t"
|
||||
" shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
|
||||
" # pfpnacc\n\t"
|
||||
" xorps %%xmm1, %%xmm6\n\t"
|
||||
" movups %%xmm6, %%xmm2\n\t"
|
||||
" unpcklps %%xmm7, %%xmm6\n\t"
|
||||
" unpckhps %%xmm7, %%xmm2\n\t"
|
||||
" movups %%xmm2, %%xmm3\n\t"
|
||||
" shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
|
||||
" shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
|
||||
" addps %%xmm2, %%xmm6\n\t"
|
||||
" # xmm6 = r1 i2 r3 i4\n\t"
|
||||
" movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
|
||||
" addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
|
||||
" movlps %%xmm6, (%[rdi]) # store low 2x32 bits (complex) to memory\n\t"
|
||||
:
|
||||
:[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
|
||||
:"rax", "r8", "r9", "r10"
|
||||
);
|
||||
|
||||
|
||||
if(isodd) {
|
||||
*result += input[num_points - 1] * taps[num_points - 1];
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE && LV_HAVE_64 */
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
|
||||
|
||||
lv_32fc_t dotProduct;
|
||||
memset(&dotProduct, 0x0, 2*sizeof(float));
|
||||
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points/2;
|
||||
unsigned int isodd = num_points & 1;
|
||||
|
||||
__m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
|
||||
|
||||
const lv_32fc_t* a = input;
|
||||
const lv_32fc_t* b = taps;
|
||||
|
||||
dotProdVal = _mm_setzero_ps();
|
||||
|
||||
for(;number < halfPoints; number++){
|
||||
|
||||
x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together
|
||||
|
||||
a += 2;
|
||||
b += 2;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
|
||||
|
||||
_mm_storeu_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector
|
||||
|
||||
dotProduct += ( dotProductVector[0] + dotProductVector[1] );
|
||||
|
||||
if(isodd) {
|
||||
dotProduct += input[num_points - 1] * taps[num_points - 1];
|
||||
}
|
||||
|
||||
*result = dotProduct;
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE3*/
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
|
||||
|
||||
unsigned int i = 0;
|
||||
const unsigned int qtr_points = num_points/4;
|
||||
const unsigned int isodd = num_points & 3;
|
||||
|
||||
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
|
||||
float *p_input, *p_taps;
|
||||
__m64 *p_result;
|
||||
|
||||
p_result = (__m64*)result;
|
||||
p_input = (float*)input;
|
||||
p_taps = (float*)taps;
|
||||
|
||||
static const __m128i neg = {0x000000000000000080000000};
|
||||
|
||||
real0 = _mm_setzero_ps();
|
||||
real1 = _mm_setzero_ps();
|
||||
im0 = _mm_setzero_ps();
|
||||
im1 = _mm_setzero_ps();
|
||||
|
||||
for(; i < qtr_points; ++i) {
|
||||
xmm0 = _mm_loadu_ps(p_input);
|
||||
xmm1 = _mm_loadu_ps(p_taps);
|
||||
|
||||
p_input += 4;
|
||||
p_taps += 4;
|
||||
|
||||
xmm2 = _mm_loadu_ps(p_input);
|
||||
xmm3 = _mm_loadu_ps(p_taps);
|
||||
|
||||
p_input += 4;
|
||||
p_taps += 4;
|
||||
|
||||
xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
|
||||
xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
|
||||
xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
|
||||
xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
|
||||
|
||||
//imaginary vector from input
|
||||
xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
|
||||
//real vector from input
|
||||
xmm3 = _mm_unpacklo_ps(xmm0, xmm4);
|
||||
//imaginary vector from taps
|
||||
xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
|
||||
//real vector from taps
|
||||
xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
|
||||
|
||||
xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
|
||||
xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
|
||||
|
||||
xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
|
||||
xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
|
||||
|
||||
real0 = _mm_add_ps(xmm4, real0);
|
||||
real1 = _mm_add_ps(xmm5, real1);
|
||||
im0 = _mm_add_ps(xmm6, im0);
|
||||
im1 = _mm_add_ps(xmm7, im1);
|
||||
}
|
||||
|
||||
real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec);
|
||||
|
||||
im0 = _mm_add_ps(im0, im1);
|
||||
real0 = _mm_add_ps(real0, real1);
|
||||
|
||||
im0 = _mm_add_ps(im0, real0);
|
||||
|
||||
_mm_storel_pi(p_result, im0);
|
||||
|
||||
for(i = num_points-isodd; i < num_points; i++) {
|
||||
*result += input[i] * taps[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H*/
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
|
||||
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
|
||||
|
||||
const unsigned int num_bytes = num_points*8;
|
||||
|
||||
float * res = (float*) result;
|
||||
float * in = (float*) input;
|
||||
float * tp = (float*) taps;
|
||||
unsigned int n_2_ccomplex_blocks = num_bytes >> 4;
|
||||
unsigned int isodd = num_points & 1;
|
||||
|
||||
float sum0[2] = {0,0};
|
||||
float sum1[2] = {0,0};
|
||||
unsigned int i = 0;
|
||||
|
||||
for(i = 0; i < n_2_ccomplex_blocks; ++i) {
|
||||
sum0[0] += in[0] * tp[0] - in[1] * tp[1];
|
||||
sum0[1] += in[0] * tp[1] + in[1] * tp[0];
|
||||
sum1[0] += in[2] * tp[2] - in[3] * tp[3];
|
||||
sum1[1] += in[2] * tp[3] + in[3] * tp[2];
|
||||
|
||||
in += 4;
|
||||
tp += 4;
|
||||
}
|
||||
|
||||
res[0] = sum0[0] + sum1[0];
|
||||
res[1] = sum0[1] + sum1[1];
|
||||
|
||||
for(i = 0; i < isodd; ++i) {
|
||||
*result += input[num_points - 1] * taps[num_points - 1];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
#if LV_HAVE_SSE && LV_HAVE_64
|
||||
|
||||
|
||||
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
|
||||
|
||||
const unsigned int num_bytes = num_points*8;
|
||||
unsigned int isodd = num_points & 1;
|
||||
|
||||
asm
|
||||
(
|
||||
"# ccomplex_dotprod_generic (float* result, const float *input,\n\t"
|
||||
"# const float *taps, unsigned num_bytes)\n\t"
|
||||
"# float sum0 = 0;\n\t"
|
||||
"# float sum1 = 0;\n\t"
|
||||
"# float sum2 = 0;\n\t"
|
||||
"# float sum3 = 0;\n\t"
|
||||
"# do {\n\t"
|
||||
"# sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
|
||||
"# sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
|
||||
"# sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
|
||||
"# sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
|
||||
"# input += 4;\n\t"
|
||||
"# taps += 4; \n\t"
|
||||
"# } while (--n_2_ccomplex_blocks != 0);\n\t"
|
||||
"# result[0] = sum0 + sum2;\n\t"
|
||||
"# result[1] = sum1 + sum3;\n\t"
|
||||
"# TODO: prefetch and better scheduling\n\t"
|
||||
" xor %%r9, %%r9\n\t"
|
||||
" xor %%r10, %%r10\n\t"
|
||||
" movq %%rcx, %%rax\n\t"
|
||||
" movq %%rcx, %%r8\n\t"
|
||||
" movq %[rsi], %%r9\n\t"
|
||||
" movq %[rdx], %%r10\n\t"
|
||||
" xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
|
||||
" movaps 0(%%r9), %%xmm0\n\t"
|
||||
" xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
|
||||
" movaps 0(%%r10), %%xmm2\n\t"
|
||||
" shr $5, %%rax # rax = n_2_ccomplex_blocks / 2\n\t"
|
||||
" shr $4, %%r8\n\t"
|
||||
" jmp .%=L1_test\n\t"
|
||||
" # 4 taps / loop\n\t"
|
||||
" # something like ?? cycles / loop\n\t"
|
||||
".%=Loop1: \n\t"
|
||||
"# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
|
||||
"# movaps (%%r9), %%xmmA\n\t"
|
||||
"# movaps (%%r10), %%xmmB\n\t"
|
||||
"# movaps %%xmmA, %%xmmZ\n\t"
|
||||
"# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
|
||||
"# mulps %%xmmB, %%xmmA\n\t"
|
||||
"# mulps %%xmmZ, %%xmmB\n\t"
|
||||
"# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
|
||||
"# xorps %%xmmPN, %%xmmA\n\t"
|
||||
"# movaps %%xmmA, %%xmmZ\n\t"
|
||||
"# unpcklps %%xmmB, %%xmmA\n\t"
|
||||
"# unpckhps %%xmmB, %%xmmZ\n\t"
|
||||
"# movaps %%xmmZ, %%xmmY\n\t"
|
||||
"# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
|
||||
"# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
|
||||
"# addps %%xmmZ, %%xmmA\n\t"
|
||||
"# addps %%xmmA, %%xmmC\n\t"
|
||||
"# A=xmm0, B=xmm2, Z=xmm4\n\t"
|
||||
"# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
|
||||
" movaps 16(%%r9), %%xmm1\n\t"
|
||||
" movaps %%xmm0, %%xmm4\n\t"
|
||||
" mulps %%xmm2, %%xmm0\n\t"
|
||||
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
|
||||
" movaps 16(%%r10), %%xmm3\n\t"
|
||||
" movaps %%xmm1, %%xmm5\n\t"
|
||||
" addps %%xmm0, %%xmm6\n\t"
|
||||
" mulps %%xmm3, %%xmm1\n\t"
|
||||
" shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
|
||||
" addps %%xmm1, %%xmm6\n\t"
|
||||
" mulps %%xmm4, %%xmm2\n\t"
|
||||
" movaps 32(%%r9), %%xmm0\n\t"
|
||||
" addps %%xmm2, %%xmm7\n\t"
|
||||
" mulps %%xmm5, %%xmm3\n\t"
|
||||
" add $32, %%r9\n\t"
|
||||
" movaps 32(%%r10), %%xmm2\n\t"
|
||||
" addps %%xmm3, %%xmm7\n\t"
|
||||
" add $32, %%r10\n\t"
|
||||
".%=L1_test:\n\t"
|
||||
" dec %%rax\n\t"
|
||||
" jge .%=Loop1\n\t"
|
||||
" # We've handled the bulk of multiplies up to here.\n\t"
|
||||
" # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
|
||||
" # If so, we've got 2 more taps to do.\n\t"
|
||||
" and $1, %%r8\n\t"
|
||||
" je .%=Leven\n\t"
|
||||
" # The count was odd, do 2 more taps.\n\t"
|
||||
" # Note that we've already got mm0/mm2 preloaded\n\t"
|
||||
" # from the main loop.\n\t"
|
||||
" movaps %%xmm0, %%xmm4\n\t"
|
||||
" mulps %%xmm2, %%xmm0\n\t"
|
||||
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
|
||||
" addps %%xmm0, %%xmm6\n\t"
|
||||
" mulps %%xmm4, %%xmm2\n\t"
|
||||
" addps %%xmm2, %%xmm7\n\t"
|
||||
".%=Leven:\n\t"
|
||||
" # neg inversor\n\t"
|
||||
" xorps %%xmm1, %%xmm1\n\t"
|
||||
" mov $0x80000000, %%r9\n\t"
|
||||
" movd %%r9, %%xmm1\n\t"
|
||||
" shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
|
||||
" # pfpnacc\n\t"
|
||||
" xorps %%xmm1, %%xmm6\n\t"
|
||||
" movaps %%xmm6, %%xmm2\n\t"
|
||||
" unpcklps %%xmm7, %%xmm6\n\t"
|
||||
" unpckhps %%xmm7, %%xmm2\n\t"
|
||||
" movaps %%xmm2, %%xmm3\n\t"
|
||||
" shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
|
||||
" shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
|
||||
" addps %%xmm2, %%xmm6\n\t"
|
||||
" # xmm6 = r1 i2 r3 i4\n\t"
|
||||
" movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
|
||||
" addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
|
||||
" movlps %%xmm6, (%[rdi]) # store low 2x32 bits (complex) to memory\n\t"
|
||||
:
|
||||
:[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
|
||||
:"rax", "r8", "r9", "r10"
|
||||
);
|
||||
|
||||
|
||||
if(isodd) {
|
||||
*result += input[num_points - 1] * taps[num_points - 1];
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if LV_HAVE_SSE && LV_HAVE_32
|
||||
|
||||
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
|
||||
|
||||
volk_gnsssdr_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_points);
|
||||
|
||||
#if 0
|
||||
const unsigned int num_bytes = num_points*8;
|
||||
unsigned int isodd = num_points & 1;
|
||||
|
||||
asm volatile
|
||||
(
|
||||
" #pushl %%ebp\n\t"
|
||||
" #movl %%esp, %%ebp\n\t"
|
||||
" movl 12(%%ebp), %%eax # input\n\t"
|
||||
" movl 16(%%ebp), %%edx # taps\n\t"
|
||||
" movl 20(%%ebp), %%ecx # n_bytes\n\t"
|
||||
" xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
|
||||
" movaps 0(%%eax), %%xmm0\n\t"
|
||||
" xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
|
||||
" movaps 0(%%edx), %%xmm2\n\t"
|
||||
" shrl $5, %%ecx # ecx = n_2_ccomplex_blocks / 2\n\t"
|
||||
" jmp .%=L1_test\n\t"
|
||||
" # 4 taps / loop\n\t"
|
||||
" # something like ?? cycles / loop\n\t"
|
||||
".%=Loop1: \n\t"
|
||||
"# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
|
||||
"# movaps (%%eax), %%xmmA\n\t"
|
||||
"# movaps (%%edx), %%xmmB\n\t"
|
||||
"# movaps %%xmmA, %%xmmZ\n\t"
|
||||
"# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
|
||||
"# mulps %%xmmB, %%xmmA\n\t"
|
||||
"# mulps %%xmmZ, %%xmmB\n\t"
|
||||
"# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
|
||||
"# xorps %%xmmPN, %%xmmA\n\t"
|
||||
"# movaps %%xmmA, %%xmmZ\n\t"
|
||||
"# unpcklps %%xmmB, %%xmmA\n\t"
|
||||
"# unpckhps %%xmmB, %%xmmZ\n\t"
|
||||
"# movaps %%xmmZ, %%xmmY\n\t"
|
||||
"# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
|
||||
"# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
|
||||
"# addps %%xmmZ, %%xmmA\n\t"
|
||||
"# addps %%xmmA, %%xmmC\n\t"
|
||||
"# A=xmm0, B=xmm2, Z=xmm4\n\t"
|
||||
"# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
|
||||
" movaps 16(%%eax), %%xmm1\n\t"
|
||||
" movaps %%xmm0, %%xmm4\n\t"
|
||||
" mulps %%xmm2, %%xmm0\n\t"
|
||||
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
|
||||
" movaps 16(%%edx), %%xmm3\n\t"
|
||||
" movaps %%xmm1, %%xmm5\n\t"
|
||||
" addps %%xmm0, %%xmm6\n\t"
|
||||
" mulps %%xmm3, %%xmm1\n\t"
|
||||
" shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
|
||||
" addps %%xmm1, %%xmm6\n\t"
|
||||
" mulps %%xmm4, %%xmm2\n\t"
|
||||
" movaps 32(%%eax), %%xmm0\n\t"
|
||||
" addps %%xmm2, %%xmm7\n\t"
|
||||
" mulps %%xmm5, %%xmm3\n\t"
|
||||
" addl $32, %%eax\n\t"
|
||||
" movaps 32(%%edx), %%xmm2\n\t"
|
||||
" addps %%xmm3, %%xmm7\n\t"
|
||||
" addl $32, %%edx\n\t"
|
||||
".%=L1_test:\n\t"
|
||||
" decl %%ecx\n\t"
|
||||
" jge .%=Loop1\n\t"
|
||||
" # We've handled the bulk of multiplies up to here.\n\t"
|
||||
" # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
|
||||
" # If so, we've got 2 more taps to do.\n\t"
|
||||
" movl 20(%%ebp), %%ecx # n_2_ccomplex_blocks\n\t"
|
||||
" shrl $4, %%ecx\n\t"
|
||||
" andl $1, %%ecx\n\t"
|
||||
" je .%=Leven\n\t"
|
||||
" # The count was odd, do 2 more taps.\n\t"
|
||||
" # Note that we've already got mm0/mm2 preloaded\n\t"
|
||||
" # from the main loop.\n\t"
|
||||
" movaps %%xmm0, %%xmm4\n\t"
|
||||
" mulps %%xmm2, %%xmm0\n\t"
|
||||
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
|
||||
" addps %%xmm0, %%xmm6\n\t"
|
||||
" mulps %%xmm4, %%xmm2\n\t"
|
||||
" addps %%xmm2, %%xmm7\n\t"
|
||||
".%=Leven:\n\t"
|
||||
" # neg inversor\n\t"
|
||||
" movl 8(%%ebp), %%eax \n\t"
|
||||
" xorps %%xmm1, %%xmm1\n\t"
|
||||
" movl $0x80000000, (%%eax)\n\t"
|
||||
" movss (%%eax), %%xmm1\n\t"
|
||||
" shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
|
||||
" # pfpnacc\n\t"
|
||||
" xorps %%xmm1, %%xmm6\n\t"
|
||||
" movaps %%xmm6, %%xmm2\n\t"
|
||||
" unpcklps %%xmm7, %%xmm6\n\t"
|
||||
" unpckhps %%xmm7, %%xmm2\n\t"
|
||||
" movaps %%xmm2, %%xmm3\n\t"
|
||||
" shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
|
||||
" shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
|
||||
" addps %%xmm2, %%xmm6\n\t"
|
||||
" # xmm6 = r1 i2 r3 i4\n\t"
|
||||
" #movl 8(%%ebp), %%eax # @result\n\t"
|
||||
" movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
|
||||
" addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
|
||||
" movlps %%xmm6, (%%eax) # store low 2x32 bits (complex) to memory\n\t"
|
||||
" #popl %%ebp\n\t"
|
||||
:
|
||||
:
|
||||
: "eax", "ecx", "edx"
|
||||
);
|
||||
|
||||
|
||||
int getem = num_bytes % 16;
|
||||
|
||||
if(isodd) {
|
||||
*result += (input[num_points - 1] * taps[num_points - 1]);
|
||||
}
|
||||
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE*/
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
|
||||
|
||||
const unsigned int num_bytes = num_points*8;
|
||||
unsigned int isodd = num_points & 1;
|
||||
|
||||
lv_32fc_t dotProduct;
|
||||
memset(&dotProduct, 0x0, 2*sizeof(float));
|
||||
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_bytes >> 4;
|
||||
|
||||
__m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
|
||||
|
||||
const lv_32fc_t* a = input;
|
||||
const lv_32fc_t* b = taps;
|
||||
|
||||
dotProdVal = _mm_setzero_ps();
|
||||
|
||||
for(;number < halfPoints; number++){
|
||||
|
||||
x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together
|
||||
|
||||
a += 2;
|
||||
b += 2;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
|
||||
|
||||
_mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector
|
||||
|
||||
dotProduct += ( dotProductVector[0] + dotProductVector[1] );
|
||||
|
||||
if(isodd) {
|
||||
dotProduct += input[num_points - 1] * taps[num_points - 1];
|
||||
}
|
||||
|
||||
*result = dotProduct;
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE3*/
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
|
||||
#include <smmintrin.h>
|
||||
|
||||
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
|
||||
|
||||
unsigned int i = 0;
|
||||
const unsigned int qtr_points = num_points/4;
|
||||
const unsigned int isodd = num_points & 3;
|
||||
|
||||
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
|
||||
float *p_input, *p_taps;
|
||||
__m64 *p_result;
|
||||
|
||||
static const __m128i neg = {0x000000000000000080000000};
|
||||
|
||||
p_result = (__m64*)result;
|
||||
p_input = (float*)input;
|
||||
p_taps = (float*)taps;
|
||||
|
||||
real0 = _mm_setzero_ps();
|
||||
real1 = _mm_setzero_ps();
|
||||
im0 = _mm_setzero_ps();
|
||||
im1 = _mm_setzero_ps();
|
||||
|
||||
for(; i < qtr_points; ++i) {
|
||||
xmm0 = _mm_load_ps(p_input);
|
||||
xmm1 = _mm_load_ps(p_taps);
|
||||
|
||||
p_input += 4;
|
||||
p_taps += 4;
|
||||
|
||||
xmm2 = _mm_load_ps(p_input);
|
||||
xmm3 = _mm_load_ps(p_taps);
|
||||
|
||||
p_input += 4;
|
||||
p_taps += 4;
|
||||
|
||||
xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
|
||||
xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
|
||||
xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
|
||||
xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
|
||||
|
||||
//imaginary vector from input
|
||||
xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
|
||||
//real vector from input
|
||||
xmm3 = _mm_unpacklo_ps(xmm0, xmm4);
|
||||
//imaginary vector from taps
|
||||
xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
|
||||
//real vector from taps
|
||||
xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
|
||||
|
||||
xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
|
||||
xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
|
||||
|
||||
xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
|
||||
xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
|
||||
|
||||
real0 = _mm_add_ps(xmm4, real0);
|
||||
real1 = _mm_add_ps(xmm5, real1);
|
||||
im0 = _mm_add_ps(xmm6, im0);
|
||||
im1 = _mm_add_ps(xmm7, im1);
|
||||
}
|
||||
|
||||
real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec);
|
||||
|
||||
im0 = _mm_add_ps(im0, im1);
|
||||
real0 = _mm_add_ps(real0, real1);
|
||||
|
||||
im0 = _mm_add_ps(im0, real0);
|
||||
|
||||
_mm_storel_pi(p_result, im0);
|
||||
|
||||
for(i = num_points-isodd; i < num_points; i++) {
|
||||
*result += input[i] * taps[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H*/
|
@ -0,0 +1,170 @@
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points / 2;
|
||||
|
||||
__m128 x, y, yl, yh, z, tmp1, tmp2;
|
||||
lv_32fc_t* c = cVector;
|
||||
const lv_32fc_t* a = aVector;
|
||||
const lv_32fc_t* b = bVector;
|
||||
|
||||
for(;number < halfPoints; number++){
|
||||
|
||||
x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
_mm_storeu_ps((float*)c,z); // Store the results back into the C container
|
||||
|
||||
a += 2;
|
||||
b += 2;
|
||||
c += 2;
|
||||
}
|
||||
|
||||
if((num_points % 2) != 0) {
|
||||
*c = (*a) * (*b);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x2_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
|
||||
lv_32fc_t* cPtr = cVector;
|
||||
const lv_32fc_t* aPtr = aVector;
|
||||
const lv_32fc_t* bPtr= bVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) * (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points / 2;
|
||||
|
||||
__m128 x, y, yl, yh, z, tmp1, tmp2;
|
||||
lv_32fc_t* c = cVector;
|
||||
const lv_32fc_t* a = aVector;
|
||||
const lv_32fc_t* b = bVector;
|
||||
for(;number < halfPoints; number++){
|
||||
|
||||
x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
_mm_store_ps((float*)c,z); // Store the results back into the C container
|
||||
|
||||
a += 2;
|
||||
b += 2;
|
||||
c += 2;
|
||||
}
|
||||
|
||||
if((num_points % 2) != 0) {
|
||||
*c = (*a) * (*b);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
|
||||
lv_32fc_t* cPtr = cVector;
|
||||
const lv_32fc_t* aPtr = aVector;
|
||||
const lv_32fc_t* bPtr= bVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) * (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
extern void volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_32fc_x2_multiply_32fc_u_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
|
||||
volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */
|
@ -0,0 +1,409 @@
|
||||
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H
|
||||
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
/*!
|
||||
* TODO: Code the SSE4 version and benchmark it
|
||||
*/
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points / 2;
|
||||
|
||||
lv_32fc_t dotProduct_E;
|
||||
memset(&dotProduct_E, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_P;
|
||||
memset(&dotProduct_P, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_L;
|
||||
memset(&dotProduct_L, 0x0, 2*sizeof(float));
|
||||
|
||||
// Aux vars
|
||||
__m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L;
|
||||
|
||||
z_E = _mm_setzero_ps();
|
||||
z_P = _mm_setzero_ps();
|
||||
z_L = _mm_setzero_ps();
|
||||
|
||||
//input and output vectors
|
||||
//lv_32fc_t* _input_BB = input_BB;
|
||||
const lv_32fc_t* _input = input;
|
||||
const lv_32fc_t* _carrier = carrier;
|
||||
const lv_32fc_t* _E_code = E_code;
|
||||
const lv_32fc_t* _P_code = P_code;
|
||||
const lv_32fc_t* _L_code = L_code;
|
||||
|
||||
for(;number < halfPoints; number++)
|
||||
{
|
||||
// carrier wipe-off (vector point-to-point product)
|
||||
x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
//_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container
|
||||
|
||||
// correlation E,P,L (3x vector scalar product)
|
||||
// Early
|
||||
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
x = z;
|
||||
|
||||
y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
|
||||
|
||||
// Prompt
|
||||
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
|
||||
|
||||
// Late
|
||||
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
|
||||
|
||||
/*pointer increment*/
|
||||
_carrier += 2;
|
||||
_input += 2;
|
||||
//_input_BB += 2;
|
||||
_E_code += 2;
|
||||
_P_code += 2;
|
||||
_L_code +=2;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
|
||||
//__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB;
|
||||
|
||||
_mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
|
||||
_mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
|
||||
_mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
|
||||
|
||||
dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
|
||||
dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
|
||||
dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
|
||||
|
||||
if((num_points % 2) != 0)
|
||||
{
|
||||
//_input_BB = (*_input) * (*_carrier);
|
||||
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
|
||||
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
|
||||
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
|
||||
}
|
||||
|
||||
*E_out = dotProduct_E;
|
||||
*P_out = dotProduct_P;
|
||||
*L_out = dotProduct_L;
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
lv_32fc_t bb_signal_sample;
|
||||
|
||||
bb_signal_sample = lv_cmake(0, 0);
|
||||
|
||||
*E_out = 0;
|
||||
*P_out = 0;
|
||||
*L_out = 0;
|
||||
// perform Early, Prompt and Late correlation
|
||||
for(int i=0; i < num_points; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = input[i] * carrier[i];
|
||||
// Now get early, late, and prompt values for each
|
||||
*E_out += bb_signal_sample * E_code[i];
|
||||
*P_out += bb_signal_sample * P_code[i];
|
||||
*L_out += bb_signal_sample * L_code[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H */
|
||||
|
||||
|
||||
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H
|
||||
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points / 2;
|
||||
|
||||
lv_32fc_t dotProduct_E;
|
||||
memset(&dotProduct_E, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_P;
|
||||
memset(&dotProduct_P, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_L;
|
||||
memset(&dotProduct_L, 0x0, 2*sizeof(float));
|
||||
|
||||
// Aux vars
|
||||
__m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L;
|
||||
|
||||
z_E = _mm_setzero_ps();
|
||||
z_P = _mm_setzero_ps();
|
||||
z_L = _mm_setzero_ps();
|
||||
|
||||
//input and output vectors
|
||||
//lv_32fc_t* _input_BB = input_BB;
|
||||
const lv_32fc_t* _input = input;
|
||||
const lv_32fc_t* _carrier = carrier;
|
||||
const lv_32fc_t* _E_code = E_code;
|
||||
const lv_32fc_t* _P_code = P_code;
|
||||
const lv_32fc_t* _L_code = L_code;
|
||||
|
||||
for(;number < halfPoints; number++)
|
||||
{
|
||||
// carrier wipe-off (vector point-to-point product)
|
||||
x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
//_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container
|
||||
|
||||
// correlation E,P,L (3x vector scalar product)
|
||||
// Early
|
||||
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
x = z;
|
||||
|
||||
y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
|
||||
|
||||
// Prompt
|
||||
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
|
||||
|
||||
// Late
|
||||
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
|
||||
|
||||
/*pointer increment*/
|
||||
_carrier += 2;
|
||||
_input += 2;
|
||||
//_input_BB += 2;
|
||||
_E_code += 2;
|
||||
_P_code += 2;
|
||||
_L_code +=2;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
|
||||
//__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB;
|
||||
|
||||
_mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
|
||||
_mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
|
||||
_mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
|
||||
|
||||
dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
|
||||
dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
|
||||
dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
|
||||
|
||||
if((num_points % 2) != 0)
|
||||
{
|
||||
//_input_BB = (*_input) * (*_carrier);
|
||||
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
|
||||
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
|
||||
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
|
||||
}
|
||||
|
||||
*E_out = dotProduct_E;
|
||||
*P_out = dotProduct_P;
|
||||
*L_out = dotProduct_L;
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
lv_32fc_t bb_signal_sample;
|
||||
|
||||
bb_signal_sample = lv_cmake(0, 0);
|
||||
|
||||
*E_out = 0;
|
||||
*P_out = 0;
|
||||
*L_out = 0;
|
||||
// perform Early, Prompt and Late correlation
|
||||
for(int i=0; i < num_points; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = input[i] * carrier[i];
|
||||
// Now get early, late, and prompt values for each
|
||||
*E_out += bb_signal_sample * E_code[i];
|
||||
*P_out += bb_signal_sample * P_code[i];
|
||||
*L_out += bb_signal_sample * L_code[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H */
|
@ -0,0 +1,524 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5
|
||||
* \brief Volk protokernel: performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation with 64 bits vectors
|
||||
* \authors <ul>
|
||||
* <li>Javier Arribas, 2011. jarribas(at)cttc.es
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that performs the carrier wipe-off mixing and the
|
||||
* VE, Early, Prompt, Late and VL correlation with 64 bits vectors (32 bits the
|
||||
* real part and 32 bits the imaginary part):
|
||||
* - The carrier wipe-off is done by multiplying the input signal by the
|
||||
* carrier (multiplication of 64 bits vectors) It returns the input
|
||||
* signal in base band (BB)
|
||||
* - VE values are calculated by multiplying the input signal in BB by the
|
||||
* VE code (multiplication of 64 bits vectors), accumulating the results
|
||||
* - Early values are calculated by multiplying the input signal in BB by the
|
||||
* early code (multiplication of 64 bits vectors), accumulating the results
|
||||
* - Prompt values are calculated by multiplying the input signal in BB by the
|
||||
* prompt code (multiplication of 64 bits vectors), accumulating the results
|
||||
* - Late values are calculated by multiplying the input signal in BB by the
|
||||
* late code (multiplication of 64 bits vectors), accumulating the results
|
||||
* - VL values are calculated by multiplying the input signal in BB by the
|
||||
* VL code (multiplication of 64 bits vectors), accumulating the results
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H
|
||||
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
/*!
|
||||
* TODO: Code the SSE4 version and benchmark it
|
||||
*/
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param VE_code VE PRN code replica input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param VL_code VL PRN code replica input
|
||||
\param VE_out VE correlation output
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param VL_out VL correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
|
||||
{
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points / 2;
|
||||
|
||||
lv_32fc_t dotProduct_VE;
|
||||
memset(&dotProduct_VE, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_E;
|
||||
memset(&dotProduct_E, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_P;
|
||||
memset(&dotProduct_P, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_L;
|
||||
memset(&dotProduct_L, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_VL;
|
||||
memset(&dotProduct_VL, 0x0, 2*sizeof(float));
|
||||
|
||||
// Aux vars
|
||||
__m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
|
||||
__m128 bb_signal_sample, bb_signal_sample_shuffled;
|
||||
|
||||
z_VE = _mm_setzero_ps();
|
||||
z_E = _mm_setzero_ps();
|
||||
z_P = _mm_setzero_ps();
|
||||
z_L = _mm_setzero_ps();
|
||||
z_VL = _mm_setzero_ps();
|
||||
|
||||
//input and output vectors
|
||||
const lv_32fc_t* _input = input;
|
||||
const lv_32fc_t* _carrier = carrier;
|
||||
const lv_32fc_t* _VE_code = VE_code;
|
||||
const lv_32fc_t* _E_code = E_code;
|
||||
const lv_32fc_t* _P_code = P_code;
|
||||
const lv_32fc_t* _L_code = L_code;
|
||||
const lv_32fc_t* _VL_code = VL_code;
|
||||
|
||||
for(;number < halfPoints; number++)
|
||||
{
|
||||
// carrier wipe-off (vector point-to-point product)
|
||||
x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
|
||||
|
||||
// correlation VE,E,P,L,VL (5x vector scalar product)
|
||||
// VE
|
||||
y = _mm_loadu_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together
|
||||
|
||||
// Early
|
||||
y = _mm_loadu_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
|
||||
|
||||
// Prompt
|
||||
y = _mm_loadu_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
|
||||
|
||||
// Late
|
||||
y = _mm_loadu_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
|
||||
|
||||
// VL
|
||||
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_loadu_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together
|
||||
|
||||
/*pointer increment*/
|
||||
_carrier += 2;
|
||||
_input += 2;
|
||||
_VE_code += 2;
|
||||
_E_code += 2;
|
||||
_P_code += 2;
|
||||
_L_code +=2;
|
||||
_VL_code +=2;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2];
|
||||
|
||||
_mm_storeu_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
|
||||
_mm_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
|
||||
_mm_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
|
||||
_mm_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
|
||||
_mm_storeu_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
|
||||
|
||||
dotProduct_VE += ( dotProductVector_VE[0] + dotProductVector_VE[1] );
|
||||
dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
|
||||
dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
|
||||
dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
|
||||
dotProduct_VL += ( dotProductVector_VL[0] + dotProductVector_VL[1] );
|
||||
|
||||
if((num_points % 2) != 0)
|
||||
{
|
||||
dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier);
|
||||
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
|
||||
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
|
||||
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
|
||||
dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier);
|
||||
}
|
||||
|
||||
*VE_out = dotProduct_VE;
|
||||
*E_out = dotProduct_E;
|
||||
*P_out = dotProduct_P;
|
||||
*L_out = dotProduct_L;
|
||||
*VL_out = dotProduct_VL;
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param VE_code VE PRN code replica input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param VL_code VL PRN code replica input
|
||||
\param VE_out VE correlation output
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param VL_out VL correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
|
||||
{
|
||||
lv_32fc_t bb_signal_sample;
|
||||
|
||||
bb_signal_sample = lv_cmake(0, 0);
|
||||
|
||||
*VE_out = 0;
|
||||
*E_out = 0;
|
||||
*P_out = 0;
|
||||
*L_out = 0;
|
||||
*VL_out = 0;
|
||||
// perform Early, Prompt and Late correlation
|
||||
for(int i=0; i < num_points; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = input[i] * carrier[i];
|
||||
// Now get early, late, and prompt values for each
|
||||
*VE_out += bb_signal_sample * VE_code[i];
|
||||
*E_out += bb_signal_sample * E_code[i];
|
||||
*P_out += bb_signal_sample * P_code[i];
|
||||
*L_out += bb_signal_sample * L_code[i];
|
||||
*VL_out += bb_signal_sample * VL_code[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H */
|
||||
|
||||
|
||||
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H
|
||||
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param VE_code VE PRN code replica input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param VL_code VL PRN code replica input
|
||||
\param VE_out VE correlation output
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param VL_out VL correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
|
||||
{
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = num_points / 2;
|
||||
|
||||
lv_32fc_t dotProduct_VE;
|
||||
memset(&dotProduct_VE, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_E;
|
||||
memset(&dotProduct_E, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_P;
|
||||
memset(&dotProduct_P, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_L;
|
||||
memset(&dotProduct_L, 0x0, 2*sizeof(float));
|
||||
lv_32fc_t dotProduct_VL;
|
||||
memset(&dotProduct_VL, 0x0, 2*sizeof(float));
|
||||
|
||||
// Aux vars
|
||||
__m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
|
||||
__m128 bb_signal_sample, bb_signal_sample_shuffled;
|
||||
|
||||
z_VE = _mm_setzero_ps();
|
||||
z_E = _mm_setzero_ps();
|
||||
z_P = _mm_setzero_ps();
|
||||
z_L = _mm_setzero_ps();
|
||||
z_VL = _mm_setzero_ps();
|
||||
|
||||
//input and output vectors
|
||||
const lv_32fc_t* _input = input;
|
||||
const lv_32fc_t* _carrier = carrier;
|
||||
const lv_32fc_t* _VE_code = VE_code;
|
||||
const lv_32fc_t* _E_code = E_code;
|
||||
const lv_32fc_t* _P_code = P_code;
|
||||
const lv_32fc_t* _L_code = L_code;
|
||||
const lv_32fc_t* _VL_code = VL_code;
|
||||
|
||||
for(;number < halfPoints; number++)
|
||||
{
|
||||
// carrier wipe-off (vector point-to-point product)
|
||||
x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
|
||||
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
|
||||
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
|
||||
|
||||
// correlation VE,E,P,L,VL (5x vector scalar product)
|
||||
// VE
|
||||
y = _mm_load_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together
|
||||
|
||||
// Early
|
||||
y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
|
||||
|
||||
// Prompt
|
||||
y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
|
||||
|
||||
// Late
|
||||
y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
|
||||
|
||||
// VL
|
||||
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
|
||||
y = _mm_load_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
|
||||
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
|
||||
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
|
||||
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together
|
||||
|
||||
/*pointer increment*/
|
||||
_carrier += 2;
|
||||
_input += 2;
|
||||
_VE_code += 2;
|
||||
_E_code += 2;
|
||||
_P_code += 2;
|
||||
_L_code +=2;
|
||||
_VL_code +=2;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2];
|
||||
|
||||
_mm_store_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
|
||||
_mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
|
||||
_mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
|
||||
_mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
|
||||
_mm_store_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
|
||||
|
||||
dotProduct_VE += ( dotProductVector_VE[0] + dotProductVector_VE[1] );
|
||||
dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
|
||||
dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
|
||||
dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
|
||||
dotProduct_VL += ( dotProductVector_VL[0] + dotProductVector_VL[1] );
|
||||
|
||||
if((num_points % 2) != 0)
|
||||
{
|
||||
dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier);
|
||||
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
|
||||
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
|
||||
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
|
||||
dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier);
|
||||
}
|
||||
|
||||
*VE_out = dotProduct_VE;
|
||||
*E_out = dotProduct_E;
|
||||
*P_out = dotProduct_P;
|
||||
*L_out = dotProduct_L;
|
||||
*VL_out = dotProduct_VL;
|
||||
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param VE_code VE PRN code replica input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param VL_code VL PRN code replica input
|
||||
\param VE_out VE correlation output
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param VL_out VL correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
|
||||
{
|
||||
lv_32fc_t bb_signal_sample;
|
||||
|
||||
bb_signal_sample = lv_cmake(0, 0);
|
||||
|
||||
*VE_out = 0;
|
||||
*E_out = 0;
|
||||
*P_out = 0;
|
||||
*L_out = 0;
|
||||
*VL_out = 0;
|
||||
// perform Early, Prompt and Late correlation
|
||||
for(int i=0; i < num_points; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = input[i] * carrier[i];
|
||||
// Now get early, late, and prompt values for each
|
||||
*VE_out += bb_signal_sample * VE_code[i];
|
||||
*E_out += bb_signal_sample * E_code[i];
|
||||
*P_out += bb_signal_sample * P_code[i];
|
||||
*L_out += bb_signal_sample * L_code[i];
|
||||
*VL_out += bb_signal_sample * VL_code[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H */
|
@ -0,0 +1,183 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8i_accumulator_s8i.h
|
||||
* \brief Volk protokernel: 8 bits (char) scalar accumulator
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that implements an accumulator of char values
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <xmmintrin.h>
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const char* inputBuffer, unsigned int num_points){
|
||||
char returnValue = 0;
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
const char* aPtr = inputBuffer;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) char tempBuffer[16];
|
||||
__m128i accumulator = _mm_setzero_si128();
|
||||
__m128i aVal = _mm_setzero_si128();
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++){
|
||||
aVal = _mm_lddqu_si128((__m128i*)aPtr);
|
||||
accumulator = _mm_add_epi8(accumulator, aVal);
|
||||
aPtr += 16;
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)tempBuffer,accumulator);
|
||||
|
||||
for(int i = 0; i<16; ++i){
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const char* inputBuffer, unsigned int num_points){
|
||||
const char* aPtr = inputBuffer;
|
||||
char returnValue = 0;
|
||||
|
||||
for(unsigned int number = 0;number < num_points; number++){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H */
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <xmmintrin.h>
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const char* inputBuffer, unsigned int num_points){
|
||||
char returnValue = 0;
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
const char* aPtr = inputBuffer;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) char tempBuffer[16];
|
||||
__m128i accumulator = _mm_setzero_si128();
|
||||
__m128i aVal = _mm_setzero_si128();
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++){
|
||||
aVal = _mm_load_si128((__m128i*)aPtr);
|
||||
accumulator = _mm_add_epi8(accumulator, aVal);
|
||||
aPtr += 16;
|
||||
}
|
||||
_mm_store_si128((__m128i*)tempBuffer,accumulator);
|
||||
|
||||
for(int i = 0; i<16; ++i){
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_accumulator_s8i_a_generic(char* result, const char* inputBuffer, unsigned int num_points){
|
||||
const char* aPtr = inputBuffer;
|
||||
char returnValue = 0;
|
||||
|
||||
for(unsigned int number = 0;number < num_points; number++){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
extern void volk_gnsssdr_8i_accumulator_s8i_a_orc_impl(short* result, const char* inputBuffer, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8i_accumulator_s8i_u_orc(char* result, const char* inputBuffer, unsigned int num_points){
|
||||
|
||||
short res = 0;
|
||||
char* resc = (char*)&res;
|
||||
resc++;
|
||||
|
||||
volk_gnsssdr_8i_accumulator_s8i_a_orc_impl(&res, inputBuffer, num_points);
|
||||
|
||||
*result = *resc;
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H */
|
||||
|
@ -0,0 +1,493 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8i_index_max_16u.h
|
||||
* \brief Volk protokernel: calculates the index of the maximum value in a group of 8 bits (char) scalars
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that returns the index of the maximum value of a group of 8 bits (char) scalars
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include "immintrin.h"
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
const unsigned int sse_iters = num_points / 32;
|
||||
|
||||
char* basePtr = (char*)src0;
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
|
||||
__m256i ones, compareResults, currentValues;
|
||||
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
|
||||
|
||||
ones = _mm256_set1_epi8(0xFF);
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
currentValues = _mm256_lddqu_si256((__m256i*)inputPtr);
|
||||
|
||||
lo = _mm256_castsi256_si128(currentValues);
|
||||
hi = _mm256_extractf128_si256(currentValues,1);
|
||||
|
||||
compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
|
||||
compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
|
||||
|
||||
//compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
|
||||
compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
|
||||
|
||||
if (!_mm256_testc_si256(compareResults, ones))
|
||||
{
|
||||
_mm256_storeu_si256((__m256i*)¤tValuesBuffer, currentValues);
|
||||
|
||||
for(int i = 0; i < 32; i++)
|
||||
{
|
||||
if(currentValuesBuffer[i] > max)
|
||||
{
|
||||
index = inputPtr - basePtr + i;
|
||||
max = currentValuesBuffer[i];
|
||||
}
|
||||
}
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
}
|
||||
|
||||
inputPtr += 32;
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 32); ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
index = i;
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target[0] = index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_AVX*/
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include<smmintrin.h>
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* basePtr = (char*)src0;
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
currentValues = _mm_lddqu_si128((__m128i*)inputPtr);
|
||||
|
||||
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
|
||||
|
||||
if (!_mm_test_all_ones(compareResults))
|
||||
{
|
||||
_mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues);
|
||||
|
||||
for(int i = 0; i < 16; i++)
|
||||
{
|
||||
if(currentValuesBuffer[i] > max)
|
||||
{
|
||||
index = inputPtr - basePtr + i;
|
||||
max = currentValuesBuffer[i];
|
||||
}
|
||||
}
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
}
|
||||
|
||||
inputPtr += 16;
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
index = i;
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target[0] = index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include<xmmintrin.h>
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* basePtr = (char*)src0;
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
unsigned short mask;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
currentValues = _mm_loadu_si128((__m128i*)inputPtr);
|
||||
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
|
||||
mask = _mm_movemask_epi8(compareResults);
|
||||
|
||||
if (mask != 0xFFFF)
|
||||
{
|
||||
_mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues);
|
||||
mask = ~mask;
|
||||
int i = 0;
|
||||
while (mask > 0)
|
||||
{
|
||||
if ((mask & 1) == 1)
|
||||
{
|
||||
if(currentValuesBuffer[i] > max)
|
||||
{
|
||||
index = inputPtr - basePtr + i;
|
||||
max = currentValuesBuffer[i];
|
||||
}
|
||||
}
|
||||
i++;
|
||||
mask >>= 1;
|
||||
}
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
}
|
||||
inputPtr += 16;
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
index = i;
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target[0] = index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE2*/
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points) {
|
||||
|
||||
if(num_points > 0)
|
||||
{
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
|
||||
for(unsigned int i = 1; i < num_points; ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
index = i;
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target[0] = index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H*/
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include "immintrin.h"
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
const unsigned int sse_iters = num_points / 32;
|
||||
|
||||
char* basePtr = (char*)src0;
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
|
||||
__m256i ones, compareResults, currentValues;
|
||||
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
|
||||
|
||||
ones = _mm256_set1_epi8(0xFF);
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
currentValues = _mm256_load_si256((__m256i*)inputPtr);
|
||||
|
||||
lo = _mm256_castsi256_si128(currentValues);
|
||||
hi = _mm256_extractf128_si256(currentValues,1);
|
||||
|
||||
compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
|
||||
compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
|
||||
|
||||
//compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
|
||||
compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
|
||||
|
||||
if (!_mm256_testc_si256(compareResults, ones))
|
||||
{
|
||||
_mm256_store_si256((__m256i*)¤tValuesBuffer, currentValues);
|
||||
|
||||
for(int i = 0; i < 32; i++)
|
||||
{
|
||||
if(currentValuesBuffer[i] > max)
|
||||
{
|
||||
index = inputPtr - basePtr + i;
|
||||
max = currentValuesBuffer[i];
|
||||
}
|
||||
}
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
}
|
||||
|
||||
inputPtr += 32;
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 32); ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
index = i;
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target[0] = index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_AVX*/
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include "smmintrin.h"
|
||||
#include "emmintrin.h"
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* basePtr = (char*)src0;
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
currentValues = _mm_load_si128((__m128i*)inputPtr);
|
||||
|
||||
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
|
||||
|
||||
if (!_mm_test_all_ones(compareResults))
|
||||
{
|
||||
_mm_store_si128((__m128i*)¤tValuesBuffer, currentValues);
|
||||
|
||||
for(int i = 0; i < 16; i++)
|
||||
{
|
||||
if(currentValuesBuffer[i] > max)
|
||||
{
|
||||
index = inputPtr - basePtr + i;
|
||||
max = currentValuesBuffer[i];
|
||||
}
|
||||
}
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
}
|
||||
|
||||
inputPtr += 16;
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
index = i;
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target[0] = index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include "emmintrin.h"
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* basePtr = (char*)src0;
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
unsigned short mask;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
currentValues = _mm_load_si128((__m128i*)inputPtr);
|
||||
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
|
||||
mask = _mm_movemask_epi8(compareResults);
|
||||
|
||||
if (mask != 0xFFFF)
|
||||
{
|
||||
_mm_store_si128((__m128i*)¤tValuesBuffer, currentValues);
|
||||
mask = ~mask;
|
||||
int i = 0;
|
||||
while (mask > 0)
|
||||
{
|
||||
if ((mask & 1) == 1)
|
||||
{
|
||||
if(currentValuesBuffer[i] > max)
|
||||
{
|
||||
index = inputPtr - basePtr + i;
|
||||
max = currentValuesBuffer[i];
|
||||
}
|
||||
}
|
||||
i++;
|
||||
mask >>= 1;
|
||||
}
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
}
|
||||
inputPtr += 16;
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
index = i;
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target[0] = index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE2*/
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_a_generic(unsigned int* target, const char* src0, unsigned int num_points) {
|
||||
|
||||
if(num_points > 0)
|
||||
{
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
|
||||
for(unsigned int i = 1; i < num_points; ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
index = i;
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target[0] = index;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H*/
|
@ -0,0 +1,327 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8i_max_s8i.h
|
||||
* \brief Volk protokernel: calculates the maximum value in a group of 8 bits (char) scalars
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that returns the maximum value of a group of 8 bits (char) scalars
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_u_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_max_s8i_u_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include<smmintrin.h>
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
__VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
currentValues = _mm_loadu_si128((__m128i*)inputPtr);
|
||||
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
|
||||
maxValues = _mm_blendv_epi8(currentValues, maxValues, compareResults);
|
||||
inputPtr += 16;
|
||||
}
|
||||
|
||||
_mm_storeu_si128((__m128i*)maxValuesBuffer, maxValues);
|
||||
|
||||
for(int i = 0; i<16; ++i)
|
||||
{
|
||||
if(maxValuesBuffer[i] > max)
|
||||
{
|
||||
max = maxValuesBuffer[i];
|
||||
}
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target = max;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include<xmmintrin.h>
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned short mask;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
currentValues = _mm_loadu_si128((__m128i*)inputPtr);
|
||||
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
|
||||
mask = _mm_movemask_epi8(compareResults);
|
||||
|
||||
if (mask != 0xFFFF)
|
||||
{
|
||||
_mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues);
|
||||
mask = ~mask;
|
||||
int i = 0;
|
||||
while (mask > 0)
|
||||
{
|
||||
if ((mask & 1) == 1)
|
||||
{
|
||||
if(currentValuesBuffer[i] > max)
|
||||
{
|
||||
max = currentValuesBuffer[i];
|
||||
}
|
||||
}
|
||||
i++;
|
||||
mask >>= 1;
|
||||
}
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
}
|
||||
inputPtr += 16;
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target = max;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE2*/
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_max_s8i_generic(char target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0)
|
||||
{
|
||||
char max = src0[0];
|
||||
|
||||
for(unsigned int i = 1; i < num_points; ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target = max;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_u_H*/
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_a_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_max_s8i_a_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include "smmintrin.h"
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
__VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
currentValues = _mm_load_si128((__m128i*)inputPtr);
|
||||
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
|
||||
maxValues = _mm_blendv_epi8(currentValues, maxValues, compareResults);
|
||||
inputPtr += 16;
|
||||
}
|
||||
|
||||
_mm_store_si128((__m128i*)maxValuesBuffer, maxValues);
|
||||
|
||||
for(int i = 0; i<16; ++i)
|
||||
{
|
||||
if(maxValuesBuffer[i] > max)
|
||||
{
|
||||
max = maxValuesBuffer[i];
|
||||
}
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target = max;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include "emmintrin.h"
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0){
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned short mask;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
currentValues = _mm_load_si128((__m128i*)inputPtr);
|
||||
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
|
||||
mask = _mm_movemask_epi8(compareResults);
|
||||
|
||||
if (mask != 0xFFFF)
|
||||
{
|
||||
_mm_store_si128((__m128i*)¤tValuesBuffer, currentValues);
|
||||
mask = ~mask;
|
||||
int i = 0;
|
||||
while (mask > 0)
|
||||
{
|
||||
if ((mask & 1) == 1)
|
||||
{
|
||||
if(currentValuesBuffer[i] > max)
|
||||
{
|
||||
max = currentValuesBuffer[i];
|
||||
}
|
||||
}
|
||||
i++;
|
||||
mask >>= 1;
|
||||
}
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
}
|
||||
inputPtr += 16;
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target = max;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE2*/
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_max_s8i_a_generic(char target, const char* src0, unsigned int num_points) {
|
||||
if(num_points > 0)
|
||||
{
|
||||
if(num_points > 0)
|
||||
{
|
||||
char max = src0[0];
|
||||
|
||||
for(unsigned int i = 1; i < num_points; ++i)
|
||||
{
|
||||
if(src0[i] > max)
|
||||
{
|
||||
max = src0[i];
|
||||
}
|
||||
}
|
||||
target = max;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_a_H*/
|
@ -0,0 +1,184 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8i_x2_add_8i.h
|
||||
* \brief Volk protokernel: adds pairs of 8 bits (char) scalars
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that adds pairs of 8 bits (char) scalars
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include "pmmintrin.h"
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* cPtr = cVector;
|
||||
const char* aPtr = aVector;
|
||||
const char* bPtr= bVector;
|
||||
|
||||
__m128i aVal, bVal, cVal;
|
||||
|
||||
for(int number = 0; number < sse_iters; number++){
|
||||
|
||||
aVal = _mm_lddqu_si128((__m128i*)aPtr);
|
||||
bVal = _mm_lddqu_si128((__m128i*)bPtr);
|
||||
|
||||
cVal = _mm_add_epi8(aVal, bVal);
|
||||
|
||||
_mm_storeu_si128((__m128i*)cPtr,cVal); // Store the results back into the C container
|
||||
|
||||
aPtr += 16;
|
||||
bPtr += 16;
|
||||
cPtr += 16;
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){
|
||||
char* cPtr = cVector;
|
||||
const char* aPtr = aVector;
|
||||
const char* bPtr= bVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H */
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include "pmmintrin.h"
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* cPtr = cVector;
|
||||
const char* aPtr = aVector;
|
||||
const char* bPtr= bVector;
|
||||
|
||||
__m128i aVal, bVal, cVal;
|
||||
|
||||
for(int number = 0; number < sse_iters; number++){
|
||||
|
||||
aVal = _mm_load_si128((__m128i*)aPtr);
|
||||
bVal = _mm_load_si128((__m128i*)bPtr);
|
||||
|
||||
cVal = _mm_add_epi8(aVal, bVal);
|
||||
|
||||
_mm_store_si128((__m128i*)cPtr,cVal); // Store the results back into the C container
|
||||
|
||||
aPtr += 16;
|
||||
bPtr += 16;
|
||||
cPtr += 16;
|
||||
}
|
||||
|
||||
for(int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8i_x2_add_8i_a_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){
|
||||
char* cPtr = cVector;
|
||||
const char* aPtr = aVector;
|
||||
const char* bPtr= bVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) + (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
extern void volk_gnsssdr_8i_x2_add_8i_a_orc_impl(char* cVector, const char* aVector, const char* bVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8i_x2_add_8i_u_orc(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){
|
||||
volk_gnsssdr_8i_x2_add_8i_a_orc_impl(cVector, aVector, bVector, num_points);
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H */
|
@ -0,0 +1,326 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_conjugate_8ic.h
|
||||
* \brief Volk protokernel: calculates the conjugate of a 16 bits vector
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that calculates the conjugate of a
|
||||
* 16 bits vector (8 bits the real part and 8 bits the imaginary part)
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include "immintrin.h"
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
|
||||
__m256 tmp;
|
||||
__m128i tmp128lo, tmp128hi;
|
||||
__m256 conjugator1 = _mm256_castsi256_ps(_mm256_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255));
|
||||
__m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
|
||||
|
||||
for (int i = 0; i < sse_iters; ++i)
|
||||
{
|
||||
tmp = _mm256_loadu_ps((float*)a);
|
||||
tmp = _mm256_xor_ps(tmp, conjugator1);
|
||||
tmp128lo = _mm256_castsi256_si128(_mm256_castps_si256(tmp));
|
||||
tmp128lo = _mm_add_epi8(tmp128lo, conjugator2);
|
||||
tmp128hi = _mm256_extractf128_si256(_mm256_castps_si256(tmp),1);
|
||||
tmp128hi = _mm_add_epi8(tmp128hi, conjugator2);
|
||||
//tmp = _mm256_set_m128i(tmp128hi , tmp128lo); //not defined in some versions of immintrin.h
|
||||
tmp = _mm256_insertf128_si256(_mm256_castsi128_si256(tmp128lo),(tmp128hi),1);
|
||||
_mm256_storeu_ps((float*)c, tmp);
|
||||
|
||||
a += 16;
|
||||
c += 16;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
*c++ = lv_conj(*a++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_AVX */
|
||||
|
||||
#ifdef LV_HAVE_SSSE3
|
||||
#include "tmmintrin.h"
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
__m128i tmp;
|
||||
|
||||
__m128i conjugator = _mm_setr_epi8(1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1);
|
||||
|
||||
for (int i = 0; i < sse_iters; ++i)
|
||||
{
|
||||
tmp = _mm_lddqu_si128((__m128i*)a);
|
||||
tmp = _mm_sign_epi8(tmp, conjugator);
|
||||
_mm_storeu_si128((__m128i*)c, tmp);
|
||||
a += 8;
|
||||
c += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
*c++ = lv_conj(*a++);
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSSE3 */
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
__m128i tmp;
|
||||
|
||||
__m128i conjugator1 = _mm_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
__m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
|
||||
|
||||
for (int i = 0; i < sse_iters; ++i)
|
||||
{
|
||||
tmp = _mm_lddqu_si128((__m128i*)a);
|
||||
tmp = _mm_xor_si128(tmp, conjugator1);
|
||||
tmp = _mm_add_epi8(tmp, conjugator2);
|
||||
_mm_storeu_si128((__m128i*)c, tmp);
|
||||
a += 8;
|
||||
c += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
*c++ = lv_conj(*a++);
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
|
||||
lv_8sc_t* cPtr = cVector;
|
||||
const lv_8sc_t* aPtr = aVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*cPtr++ = lv_conj(*aPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H */
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include "immintrin.h"
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
|
||||
__m256 tmp;
|
||||
__m128i tmp128lo, tmp128hi;
|
||||
__m256 conjugator1 = _mm256_castsi256_ps(_mm256_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255));
|
||||
__m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
|
||||
|
||||
for (int i = 0; i < sse_iters; ++i)
|
||||
{
|
||||
tmp = _mm256_load_ps((float*)a);
|
||||
tmp = _mm256_xor_ps(tmp, conjugator1);
|
||||
tmp128lo = _mm256_castsi256_si128(_mm256_castps_si256(tmp));
|
||||
tmp128lo = _mm_add_epi8(tmp128lo, conjugator2);
|
||||
tmp128hi = _mm256_extractf128_si256(_mm256_castps_si256(tmp),1);
|
||||
tmp128hi = _mm_add_epi8(tmp128hi, conjugator2);
|
||||
//tmp = _mm256_set_m128i(tmp128hi , tmp128lo); //not defined in some versions of immintrin.h
|
||||
tmp = _mm256_insertf128_si256(_mm256_castsi128_si256(tmp128lo),(tmp128hi),1);
|
||||
_mm256_store_ps((float*)c, tmp);
|
||||
|
||||
a += 16;
|
||||
c += 16;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
*c++ = lv_conj(*a++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_AVX */
|
||||
|
||||
#ifdef LV_HAVE_SSSE3
|
||||
#include "tmmintrin.h"
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
__m128i tmp;
|
||||
|
||||
__m128i conjugator = _mm_setr_epi8(1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1);
|
||||
|
||||
for (int i = 0; i < sse_iters; ++i)
|
||||
{
|
||||
tmp = _mm_load_si128((__m128i*)a);
|
||||
tmp = _mm_sign_epi8(tmp, conjugator);
|
||||
_mm_store_si128((__m128i*)c, tmp);
|
||||
a += 8;
|
||||
c += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
*c++ = lv_conj(*a++);
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSSE3 */
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
__m128i tmp;
|
||||
|
||||
__m128i conjugator1 = _mm_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
__m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
|
||||
|
||||
for (int i = 0; i < sse_iters; ++i)
|
||||
{
|
||||
tmp = _mm_load_si128((__m128i*)a);
|
||||
tmp = _mm_xor_si128(tmp, conjugator1);
|
||||
tmp = _mm_add_epi8(tmp, conjugator2);
|
||||
_mm_store_si128((__m128i*)c, tmp);
|
||||
a += 8;
|
||||
c += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
*c++ = lv_conj(*a++);
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
|
||||
lv_8sc_t* cPtr = cVector;
|
||||
const lv_8sc_t* aPtr = aVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++){
|
||||
*cPtr++ = lv_conj(*aPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
extern void volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
|
||||
volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl(cVector, aVector, num_points);
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H */
|
@ -0,0 +1,320 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_magnitude_squared_8i.h
|
||||
* \brief Volk protokernel: calculates the magnitude squared of a 16 bits vector
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that calculates the magnitude squared of a
|
||||
* 16 bits vector (8 bits the real part and 8 bits the imaginary part)
|
||||
* result = (real*real) + (imag*imag)
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
#include "tmmintrin.h"
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
const char* complexVectorPtr = (char*)complexVector;
|
||||
char* magnitudeVectorPtr = magnitudeVector;
|
||||
|
||||
__m128i zero, result8;
|
||||
__m128i avector, avectorhi, avectorlo, avectorlomult, avectorhimult, aadded, maska;
|
||||
__m128i bvector, bvectorhi, bvectorlo, bvectorlomult, bvectorhimult, badded, maskb;
|
||||
|
||||
zero = _mm_setzero_si128();
|
||||
maska = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
|
||||
maskb = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
|
||||
|
||||
for(int number = 0;number < sse_iters; number++)
|
||||
{
|
||||
avector = _mm_lddqu_si128((__m128i*)complexVectorPtr);
|
||||
avectorlo = _mm_unpacklo_epi8 (avector, zero);
|
||||
avectorhi = _mm_unpackhi_epi8 (avector, zero);
|
||||
avectorlomult = _mm_mullo_epi16 (avectorlo, avectorlo);
|
||||
avectorhimult = _mm_mullo_epi16 (avectorhi, avectorhi);
|
||||
aadded = _mm_hadd_epi16 (avectorlomult, avectorhimult);
|
||||
|
||||
complexVectorPtr += 16;
|
||||
|
||||
bvector = _mm_lddqu_si128((__m128i*)complexVectorPtr);
|
||||
bvectorlo = _mm_unpacklo_epi8 (bvector, zero);
|
||||
bvectorhi = _mm_unpackhi_epi8 (bvector, zero);
|
||||
bvectorlomult = _mm_mullo_epi16 (bvectorlo, bvectorlo);
|
||||
bvectorhimult = _mm_mullo_epi16 (bvectorhi, bvectorhi);
|
||||
badded = _mm_hadd_epi16 (bvectorlomult, bvectorhimult);
|
||||
|
||||
complexVectorPtr += 16;
|
||||
|
||||
result8 = _mm_or_si128(_mm_shuffle_epi8(aadded, maska), _mm_shuffle_epi8(badded, maskb));
|
||||
|
||||
_mm_storeu_si128((__m128i*)magnitudeVectorPtr, result8);
|
||||
|
||||
magnitudeVectorPtr += 16;
|
||||
|
||||
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
const char valReal = *complexVectorPtr++;
|
||||
const char valImag = *complexVectorPtr++;
|
||||
*magnitudeVectorPtr++ = (valReal * valReal) + (valImag * valImag);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
//#ifdef LV_HAVE_SSE
|
||||
//#include <xmmintrin.h>
|
||||
///*!
|
||||
// \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
// \param complexVector The vector containing the complex input values
|
||||
// \param magnitudeVector The vector containing the real output values
|
||||
// \param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
// */
|
||||
//static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
|
||||
// unsigned int number = 0;
|
||||
// const unsigned int quarterPoints = num_points / 4;
|
||||
//
|
||||
// const float* complexVectorPtr = (float*)complexVector;
|
||||
// float* magnitudeVectorPtr = magnitudeVector;
|
||||
//
|
||||
// __m128 cplxValue1, cplxValue2, iValue, qValue, result;
|
||||
// for(;number < quarterPoints; number++){
|
||||
// cplxValue1 = _mm_loadu_ps(complexVectorPtr);
|
||||
// complexVectorPtr += 4;
|
||||
//
|
||||
// cplxValue2 = _mm_loadu_ps(complexVectorPtr);
|
||||
// complexVectorPtr += 4;
|
||||
//
|
||||
// // Arrange in i1i2i3i4 format
|
||||
// iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
|
||||
// // Arrange in q1q2q3q4 format
|
||||
// qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
|
||||
//
|
||||
// iValue = _mm_mul_ps(iValue, iValue); // Square the I values
|
||||
// qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
|
||||
//
|
||||
// result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
|
||||
//
|
||||
// _mm_storeu_ps(magnitudeVectorPtr, result);
|
||||
// magnitudeVectorPtr += 4;
|
||||
// }
|
||||
//
|
||||
// number = quarterPoints * 4;
|
||||
// for(; number < num_points; number++){
|
||||
// float val1Real = *complexVectorPtr++;
|
||||
// float val1Imag = *complexVectorPtr++;
|
||||
// *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
|
||||
// }
|
||||
//}
|
||||
//#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){
|
||||
const char* complexVectorPtr = (char*)complexVector;
|
||||
char* magnitudeVectorPtr = magnitudeVector;
|
||||
|
||||
for(int number = 0; number < num_points; number++){
|
||||
const char real = *complexVectorPtr++;
|
||||
const char imag = *complexVectorPtr++;
|
||||
*magnitudeVectorPtr++ = (real*real) + (imag*imag);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
const char* complexVectorPtr = (char*)complexVector;
|
||||
char* magnitudeVectorPtr = magnitudeVector;
|
||||
|
||||
__m128i zero, result8;
|
||||
__m128i avector, avectorhi, avectorlo, avectorlomult, avectorhimult, aadded, maska;
|
||||
__m128i bvector, bvectorhi, bvectorlo, bvectorlomult, bvectorhimult, badded, maskb;
|
||||
|
||||
zero = _mm_setzero_si128();
|
||||
maska = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
|
||||
maskb = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
|
||||
|
||||
for(int number = 0;number < sse_iters; number++)
|
||||
{
|
||||
avector = _mm_load_si128((__m128i*)complexVectorPtr);
|
||||
avectorlo = _mm_unpacklo_epi8 (avector, zero);
|
||||
avectorhi = _mm_unpackhi_epi8 (avector, zero);
|
||||
avectorlomult = _mm_mullo_epi16 (avectorlo, avectorlo);
|
||||
avectorhimult = _mm_mullo_epi16 (avectorhi, avectorhi);
|
||||
aadded = _mm_hadd_epi16 (avectorlomult, avectorhimult);
|
||||
|
||||
complexVectorPtr += 16;
|
||||
|
||||
bvector = _mm_load_si128((__m128i*)complexVectorPtr);
|
||||
bvectorlo = _mm_unpacklo_epi8 (bvector, zero);
|
||||
bvectorhi = _mm_unpackhi_epi8 (bvector, zero);
|
||||
bvectorlomult = _mm_mullo_epi16 (bvectorlo, bvectorlo);
|
||||
bvectorhimult = _mm_mullo_epi16 (bvectorhi, bvectorhi);
|
||||
badded = _mm_hadd_epi16 (bvectorlomult, bvectorhimult);
|
||||
|
||||
complexVectorPtr += 16;
|
||||
|
||||
result8 = _mm_or_si128(_mm_shuffle_epi8(aadded, maska), _mm_shuffle_epi8(badded, maskb));
|
||||
|
||||
_mm_store_si128((__m128i*)magnitudeVectorPtr, result8);
|
||||
|
||||
magnitudeVectorPtr += 16;
|
||||
|
||||
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
const char valReal = *complexVectorPtr++;
|
||||
const char valImag = *complexVectorPtr++;
|
||||
*magnitudeVectorPtr++ = (valReal * valReal) + (valImag * valImag);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
//#ifdef LV_HAVE_SSE
|
||||
//#include <xmmintrin.h>
|
||||
///*!
|
||||
// \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
// \param complexVector The vector containing the complex input values
|
||||
// \param magnitudeVector The vector containing the real output values
|
||||
// \param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
// */
|
||||
//static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
|
||||
// unsigned int number = 0;
|
||||
// const unsigned int quarterPoints = num_points / 4;
|
||||
//
|
||||
// const float* complexVectorPtr = (float*)complexVector;
|
||||
// float* magnitudeVectorPtr = magnitudeVector;
|
||||
//
|
||||
// __m128 cplxValue1, cplxValue2, iValue, qValue, result;
|
||||
// for(;number < quarterPoints; number++){
|
||||
// cplxValue1 = _mm_load_ps(complexVectorPtr);
|
||||
// complexVectorPtr += 4;
|
||||
//
|
||||
// cplxValue2 = _mm_load_ps(complexVectorPtr);
|
||||
// complexVectorPtr += 4;
|
||||
//
|
||||
// // Arrange in i1i2i3i4 format
|
||||
// iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
|
||||
// // Arrange in q1q2q3q4 format
|
||||
// qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
|
||||
//
|
||||
// iValue = _mm_mul_ps(iValue, iValue); // Square the I values
|
||||
// qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
|
||||
//
|
||||
// result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
|
||||
//
|
||||
// _mm_store_ps(magnitudeVectorPtr, result);
|
||||
// magnitudeVectorPtr += 4;
|
||||
// }
|
||||
//
|
||||
// number = quarterPoints * 4;
|
||||
// for(; number < num_points; number++){
|
||||
// float val1Real = *complexVectorPtr++;
|
||||
// float val1Imag = *complexVectorPtr++;
|
||||
// *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
|
||||
// }
|
||||
//}
|
||||
//#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){
|
||||
const char* complexVectorPtr = (char*)complexVector;
|
||||
char* magnitudeVectorPtr = magnitudeVector;
|
||||
|
||||
for(int number = 0; number < num_points; number++){
|
||||
const char real = *complexVectorPtr++;
|
||||
const char imag = *complexVectorPtr++;
|
||||
*magnitudeVectorPtr++ = (real*real) + (imag*imag);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
extern void volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_orc(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){
|
||||
volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl(magnitudeVector, complexVector, num_points);
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */
|
@ -0,0 +1,271 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_s8ic_multiply_8ic.h
|
||||
* \brief Volk protokernel: multiplies a group of 16 bits vectors by one constant vector
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies a group of 16 bits vectors
|
||||
* (8 bits the real part and 8 bits the imaginary part) by one constant vector
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
|
||||
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
y = _mm_set1_epi16 (*(short*)&scalar);
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
x = _mm_lddqu_si128((__m128i*)a);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128 (realc, mult1);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128 (imagc, mult1);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realc, imagc);
|
||||
|
||||
_mm_storeu_si128((__m128i*)c, totalc);
|
||||
|
||||
a += 8;
|
||||
c += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
*c++ = (*a++) * scalar;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){
|
||||
|
||||
/*lv_8sc_t* cPtr = cVector;
|
||||
const lv_8sc_t* aPtr = aVector;
|
||||
|
||||
for (int i = 0; i<num_points; ++i)
|
||||
{
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
}*/
|
||||
|
||||
lv_8sc_t* cPtr = cVector;
|
||||
const lv_8sc_t* aPtr = aVector;
|
||||
unsigned int number = num_points;
|
||||
|
||||
// unwrap loop
|
||||
while (number >= 8){
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
number -= 8;
|
||||
}
|
||||
|
||||
// clean up any remaining
|
||||
while (number-- > 0)
|
||||
*cPtr++ = *aPtr++ * scalar;
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
|
||||
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
y = _mm_set1_epi16 (*(short*)&scalar);
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128 (realc, mult1);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128 (imagc, mult1);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realc, imagc);
|
||||
|
||||
_mm_store_si128((__m128i*)c, totalc);
|
||||
|
||||
a += 8;
|
||||
c += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
*c++ = (*a++) * scalar;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){
|
||||
|
||||
/*lv_8sc_t* cPtr = cVector;
|
||||
const lv_8sc_t* aPtr = aVector;
|
||||
|
||||
for (int i = 0; i<num_points; ++i)
|
||||
{
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
}*/
|
||||
|
||||
lv_8sc_t* cPtr = cVector;
|
||||
const lv_8sc_t* aPtr = aVector;
|
||||
unsigned int number = num_points;
|
||||
|
||||
// unwrap loop
|
||||
while (number >= 8){
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
*cPtr++ = (*aPtr++) * scalar;
|
||||
number -= 8;
|
||||
}
|
||||
|
||||
// clean up any remaining
|
||||
while (number-- > 0)
|
||||
*cPtr++ = *aPtr++ * scalar;
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
|
||||
*/
|
||||
extern void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const char scalarreal, const char scalarimag, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){
|
||||
volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl(cVector, aVector, lv_creal(scalar), lv_cimag(scalar), num_points);
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */
|
@ -0,0 +1,499 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_x2_dot_prod_8ic.h
|
||||
* \brief Volk protokernel: multiplies two 16 bits vectors and accumulates them
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* and 8 bits the imaginary part) and accumulates them
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
|
||||
|
||||
/*lv_8sc_t* cPtr = result;
|
||||
const lv_8sc_t* aPtr = input;
|
||||
const lv_8sc_t* bPtr = taps;
|
||||
|
||||
for(int number = 0; number < num_points; number++){
|
||||
*cPtr += (*aPtr++) * (*bPtr++);
|
||||
}*/
|
||||
|
||||
char * res = (char*) result;
|
||||
char * in = (char*) input;
|
||||
char * tp = (char*) taps;
|
||||
unsigned int n_2_ccomplex_blocks = num_points/2;
|
||||
unsigned int isodd = num_points & 1;
|
||||
|
||||
char sum0[2] = {0,0};
|
||||
char sum1[2] = {0,0};
|
||||
unsigned int i = 0;
|
||||
|
||||
for(i = 0; i < n_2_ccomplex_blocks; ++i) {
|
||||
sum0[0] += in[0] * tp[0] - in[1] * tp[1];
|
||||
sum0[1] += in[0] * tp[1] + in[1] * tp[0];
|
||||
sum1[0] += in[2] * tp[2] - in[3] * tp[3];
|
||||
sum1[1] += in[2] * tp[3] + in[3] * tp[2];
|
||||
|
||||
in += 4;
|
||||
tp += 4;
|
||||
}
|
||||
|
||||
res[0] = sum0[0] + sum1[0];
|
||||
res[1] = sum0[1] + sum1[1];
|
||||
|
||||
// Cleanup if we had an odd number of points
|
||||
for(i = 0; i < isodd; ++i) {
|
||||
*result += input[num_points - 1] * taps[num_points - 1];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include "emmintrin.h"
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
|
||||
|
||||
lv_8sc_t dotProduct;
|
||||
memset(&dotProduct, 0x0, 2*sizeof(char));
|
||||
|
||||
const lv_8sc_t* a = input;
|
||||
const lv_8sc_t* b = taps;
|
||||
|
||||
const unsigned int sse_iters = num_points/8;
|
||||
|
||||
if (sse_iters>0)
|
||||
{
|
||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
realcacc = _mm_setzero_si128();
|
||||
imagcacc = _mm_setzero_si128();
|
||||
|
||||
for(int number = 0; number < sse_iters; number++){
|
||||
|
||||
x = _mm_lddqu_si128((__m128i*)a);
|
||||
y = _mm_lddqu_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
realcacc = _mm_add_epi16 (realcacc, realc);
|
||||
imagcacc = _mm_add_epi16 (imagcacc, imagc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
|
||||
realcacc = _mm_and_si128 (realcacc, mult1);
|
||||
imagcacc = _mm_and_si128 (imagcacc, mult1);
|
||||
imagcacc = _mm_slli_si128 (imagcacc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realcacc, imagcacc);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_storeu_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
|
||||
|
||||
for (int i = 0; i<8; ++i)
|
||||
{
|
||||
dotProduct += dotProductVector[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
dotProduct += (*a++) * (*b++);
|
||||
}
|
||||
|
||||
*result = dotProduct;
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE2*/
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include "smmintrin.h"
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
|
||||
|
||||
lv_8sc_t dotProduct;
|
||||
memset(&dotProduct, 0x0, 2*sizeof(char));
|
||||
|
||||
const lv_8sc_t* a = input;
|
||||
const lv_8sc_t* b = taps;
|
||||
|
||||
const unsigned int sse_iters = num_points/8;
|
||||
|
||||
if (sse_iters>0)
|
||||
{
|
||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
realcacc = _mm_setzero_si128();
|
||||
imagcacc = _mm_setzero_si128();
|
||||
|
||||
for(int number = 0; number < sse_iters; number++){
|
||||
|
||||
x = _mm_lddqu_si128((__m128i*)a);
|
||||
y = _mm_lddqu_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
realcacc = _mm_add_epi16 (realcacc, realc);
|
||||
imagcacc = _mm_add_epi16 (imagcacc, imagc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
|
||||
imagcacc = _mm_slli_si128 (imagcacc, 1);
|
||||
|
||||
totalc = _mm_blendv_epi8 (imagcacc, realcacc, mult1);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_storeu_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
|
||||
|
||||
for (int i = 0; i<8; ++i)
|
||||
{
|
||||
dotProduct += dotProductVector[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
dotProduct += (*a++) * (*b++);
|
||||
}
|
||||
|
||||
*result = dotProduct;
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H*/
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
|
||||
|
||||
/*lv_8sc_t* cPtr = result;
|
||||
const lv_8sc_t* aPtr = input;
|
||||
const lv_8sc_t* bPtr = taps;
|
||||
|
||||
for(int number = 0; number < num_points; number++){
|
||||
*cPtr += (*aPtr++) * (*bPtr++);
|
||||
}*/
|
||||
|
||||
char * res = (char*) result;
|
||||
char * in = (char*) input;
|
||||
char * tp = (char*) taps;
|
||||
unsigned int n_2_ccomplex_blocks = num_points/2;
|
||||
unsigned int isodd = num_points & 1;
|
||||
|
||||
char sum0[2] = {0,0};
|
||||
char sum1[2] = {0,0};
|
||||
unsigned int i = 0;
|
||||
|
||||
for(i = 0; i < n_2_ccomplex_blocks; ++i) {
|
||||
sum0[0] += in[0] * tp[0] - in[1] * tp[1];
|
||||
sum0[1] += in[0] * tp[1] + in[1] * tp[0];
|
||||
sum1[0] += in[2] * tp[2] - in[3] * tp[3];
|
||||
sum1[1] += in[2] * tp[3] + in[3] * tp[2];
|
||||
|
||||
in += 4;
|
||||
tp += 4;
|
||||
}
|
||||
|
||||
res[0] = sum0[0] + sum1[0];
|
||||
res[1] = sum0[1] + sum1[1];
|
||||
|
||||
// Cleanup if we had an odd number of points
|
||||
for(i = 0; i < isodd; ++i) {
|
||||
*result += input[num_points - 1] * taps[num_points - 1];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include "emmintrin.h"
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
|
||||
|
||||
lv_8sc_t dotProduct;
|
||||
memset(&dotProduct, 0x0, 2*sizeof(char));
|
||||
|
||||
const lv_8sc_t* a = input;
|
||||
const lv_8sc_t* b = taps;
|
||||
|
||||
const unsigned int sse_iters = num_points/8;
|
||||
|
||||
if (sse_iters>0)
|
||||
{
|
||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
realcacc = _mm_setzero_si128();
|
||||
imagcacc = _mm_setzero_si128();
|
||||
|
||||
for(int number = 0; number < sse_iters; number++){
|
||||
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
y = _mm_load_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
realcacc = _mm_add_epi16 (realcacc, realc);
|
||||
imagcacc = _mm_add_epi16 (imagcacc, imagc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
|
||||
realcacc = _mm_and_si128 (realcacc, mult1);
|
||||
imagcacc = _mm_and_si128 (imagcacc, mult1);
|
||||
imagcacc = _mm_slli_si128 (imagcacc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realcacc, imagcacc);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_store_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
|
||||
|
||||
for (int i = 0; i<8; ++i)
|
||||
{
|
||||
dotProduct += dotProductVector[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
dotProduct += (*a++) * (*b++);
|
||||
}
|
||||
|
||||
*result = dotProduct;
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE2*/
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include "smmintrin.h"
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
|
||||
|
||||
lv_8sc_t dotProduct;
|
||||
memset(&dotProduct, 0x0, 2*sizeof(char));
|
||||
|
||||
const lv_8sc_t* a = input;
|
||||
const lv_8sc_t* b = taps;
|
||||
|
||||
const unsigned int sse_iters = num_points/8;
|
||||
|
||||
if (sse_iters>0)
|
||||
{
|
||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
realcacc = _mm_setzero_si128();
|
||||
imagcacc = _mm_setzero_si128();
|
||||
|
||||
for(int number = 0; number < sse_iters; number++){
|
||||
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
y = _mm_load_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
realcacc = _mm_add_epi16 (realcacc, realc);
|
||||
imagcacc = _mm_add_epi16 (imagcacc, imagc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
|
||||
imagcacc = _mm_slli_si128 (imagcacc, 1);
|
||||
|
||||
totalc = _mm_blendv_epi8 (imagcacc, realcacc, mult1);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_store_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
|
||||
|
||||
for (int i = 0; i<8; ++i)
|
||||
{
|
||||
dotProduct += dotProductVector[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
dotProduct += (*a++) * (*b++);
|
||||
}
|
||||
|
||||
*result = dotProduct;
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points){
|
||||
|
||||
short resReal = 0;
|
||||
char* resRealChar = (char*)&resReal;
|
||||
resRealChar++;
|
||||
|
||||
short resImag = 0;
|
||||
char* resImagChar = (char*)&resImag;
|
||||
resImagChar++;
|
||||
|
||||
volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(&resReal, &resImag, input, taps, num_points);
|
||||
|
||||
*result = lv_cmake(*resRealChar, *resImagChar);
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H*/
|
@ -0,0 +1,346 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_x2_multiply_8ic.h
|
||||
* \brief Volk protokernel: multiplies two 16 bits vectors
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* and 8 bits the imaginary part)
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include "emmintrin.h"
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
const lv_8sc_t* b = bVector;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
x = _mm_lddqu_si128((__m128i*)a);
|
||||
y = _mm_lddqu_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128 (realc, mult1);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128 (imagc, mult1);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realc, imagc);
|
||||
|
||||
_mm_storeu_si128((__m128i*)c, totalc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
c += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
*c++ = (*a++) * (*b++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include "smmintrin.h"
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, zero;
|
||||
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
const lv_8sc_t* b = bVector;
|
||||
|
||||
zero = _mm_setzero_si128();
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
x = _mm_lddqu_si128((__m128i*)a);
|
||||
y = _mm_lddqu_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
|
||||
totalc = _mm_blendv_epi8 (imagc, realc, mult1);
|
||||
|
||||
_mm_storeu_si128((__m128i*)c, totalc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
c += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
*c++ = (*a++) * (*b++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE4_1 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
|
||||
lv_8sc_t* cPtr = cVector;
|
||||
const lv_8sc_t* aPtr = aVector;
|
||||
const lv_8sc_t* bPtr = bVector;
|
||||
|
||||
for(int number = 0; number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) * (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H */
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include "emmintrin.h"
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
const lv_8sc_t* b = bVector;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
y = _mm_load_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128 (realc, mult1);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128 (imagc, mult1);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realc, imagc);
|
||||
|
||||
_mm_store_si128((__m128i*)c, totalc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
c += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
*c++ = (*a++) * (*b++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include "smmintrin.h"
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, zero;
|
||||
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
|
||||
lv_8sc_t* c = cVector;
|
||||
const lv_8sc_t* a = aVector;
|
||||
const lv_8sc_t* b = bVector;
|
||||
|
||||
zero = _mm_setzero_si128();
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
y = _mm_load_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
|
||||
totalc = _mm_blendv_epi8 (imagc, realc, mult1);
|
||||
|
||||
_mm_store_si128((__m128i*)c, totalc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
c += 8;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 8); ++i)
|
||||
{
|
||||
*c++ = (*a++) * (*b++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE4_1 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
|
||||
lv_8sc_t* cPtr = cVector;
|
||||
const lv_8sc_t* aPtr = aVector;
|
||||
const lv_8sc_t* bPtr = bVector;
|
||||
|
||||
for(int number = 0; number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) * (*bPtr++);
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
|
||||
volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(cVector, aVector, bVector, num_points);
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H */
|
@ -0,0 +1,882 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3.h
|
||||
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that performs the carrier wipe-off mixing and the
|
||||
* Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
|
||||
* real part and 8 bits the imaginary part):
|
||||
* - The carrier wipe-off is done by multiplying the input signal by the
|
||||
* carrier (multiplication of 16 bits vectors) It returns the input
|
||||
* signal in base band (BB)
|
||||
* - Early values are calculated by multiplying the input signal in BB by the
|
||||
* early code (multiplication of 16 bits vectors), accumulating the results
|
||||
* - Prompt values are calculated by multiplying the input signal in BB by the
|
||||
* prompt code (multiplication of 16 bits vectors), accumulating the results
|
||||
* - Late values are calculated by multiplying the input signal in BB by the
|
||||
* late code (multiplication of 16 bits vectors), accumulating the results
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H
|
||||
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include "smmintrin.h"
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
|
||||
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
|
||||
|
||||
const lv_8sc_t* input_ptr = input;
|
||||
const lv_8sc_t* carrier_ptr = carrier;
|
||||
|
||||
const lv_8sc_t* E_code_ptr = E_code;
|
||||
lv_8sc_t* E_out_ptr = E_out;
|
||||
const lv_8sc_t* L_code_ptr = L_code;
|
||||
lv_8sc_t* L_out_ptr = L_out;
|
||||
const lv_8sc_t* P_code_ptr = P_code;
|
||||
lv_8sc_t* P_out_ptr = P_out;
|
||||
|
||||
*E_out_ptr = 0;
|
||||
*P_out_ptr = 0;
|
||||
*L_out_ptr = 0;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
real_E_code_acc = _mm_setzero_si128();
|
||||
imag_E_code_acc = _mm_setzero_si128();
|
||||
real_L_code_acc = _mm_setzero_si128();
|
||||
imag_L_code_acc = _mm_setzero_si128();
|
||||
real_P_code_acc = _mm_setzero_si128();
|
||||
imag_P_code_acc = _mm_setzero_si128();
|
||||
|
||||
if (sse_iters>0)
|
||||
{
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
//Perform the carrier wipe-off
|
||||
x = _mm_lddqu_si128((__m128i*)input_ptr);
|
||||
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
//Get early values
|
||||
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
|
||||
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
|
||||
|
||||
//Get late values
|
||||
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
|
||||
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
|
||||
|
||||
//Get prompt values
|
||||
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
|
||||
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
|
||||
|
||||
input_ptr += 8;
|
||||
carrier_ptr += 8;
|
||||
E_code_ptr += 8;
|
||||
L_code_ptr += 8;
|
||||
P_code_ptr += 8;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t E_dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t L_dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t P_dotProductVector[8];
|
||||
|
||||
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
|
||||
output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1);
|
||||
_mm_storeu_si128((__m128i*)E_dotProductVector, output);
|
||||
|
||||
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
|
||||
output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1);
|
||||
_mm_storeu_si128((__m128i*)L_dotProductVector, output);
|
||||
|
||||
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
|
||||
output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1);
|
||||
_mm_storeu_si128((__m128i*)P_dotProductVector, output);
|
||||
|
||||
for (int i = 0; i<8; ++i)
|
||||
{
|
||||
*E_out_ptr += E_dotProductVector[i];
|
||||
*L_out_ptr += L_dotProductVector[i];
|
||||
*P_out_ptr += P_dotProductVector[i];
|
||||
}
|
||||
}
|
||||
|
||||
lv_8sc_t bb_signal_sample;
|
||||
for(int i=0; i < num_points%8; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
|
||||
// Now get early, late, and prompt values for each
|
||||
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
|
||||
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
|
||||
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE4_1 */
|
||||
|
||||
//#ifdef LV_HAVE_SSE2
|
||||
//#include "emmintrin.h"
|
||||
///*!
|
||||
// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
// \param input The input signal input
|
||||
// \param carrier The carrier signal input
|
||||
// \param E_code Early PRN code replica input
|
||||
// \param P_code Early PRN code replica input
|
||||
// \param L_code Early PRN code replica input
|
||||
// \param E_out Early correlation output
|
||||
// \param P_out Early correlation output
|
||||
// \param L_out Early correlation output
|
||||
// \param num_points The number of complex values in vectors
|
||||
// */
|
||||
//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
//{
|
||||
// const unsigned int sse_iters = num_points / 8;
|
||||
//
|
||||
// __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
|
||||
// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
|
||||
//
|
||||
// const lv_8sc_t* input_ptr = input;
|
||||
// const lv_8sc_t* carrier_ptr = carrier;
|
||||
//
|
||||
// const lv_8sc_t* E_code_ptr = E_code;
|
||||
// lv_8sc_t* E_out_ptr = E_out;
|
||||
// const lv_8sc_t* L_code_ptr = L_code;
|
||||
// lv_8sc_t* L_out_ptr = L_out;
|
||||
// const lv_8sc_t* P_code_ptr = P_code;
|
||||
// lv_8sc_t* P_out_ptr = P_out;
|
||||
//
|
||||
// *E_out_ptr = 0;
|
||||
// *P_out_ptr = 0;
|
||||
// *L_out_ptr = 0;
|
||||
//
|
||||
// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
//
|
||||
// real_E_code_acc = _mm_setzero_si128();
|
||||
// imag_E_code_acc = _mm_setzero_si128();
|
||||
// real_L_code_acc = _mm_setzero_si128();
|
||||
// imag_L_code_acc = _mm_setzero_si128();
|
||||
// real_P_code_acc = _mm_setzero_si128();
|
||||
// imag_P_code_acc = _mm_setzero_si128();
|
||||
//
|
||||
// if (sse_iters>0)
|
||||
// {
|
||||
// for(int number = 0;number < sse_iters; number++){
|
||||
//
|
||||
// //Perform the carrier wipe-off
|
||||
// x = _mm_lddqu_si128((__m128i*)input_ptr);
|
||||
// y = _mm_lddqu_si128((__m128i*)carrier_ptr);
|
||||
//
|
||||
// imagx = _mm_srli_si128 (x, 1);
|
||||
// imagx = _mm_and_si128 (imagx, mult1);
|
||||
// realx = _mm_and_si128 (x, mult1);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
//
|
||||
// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// //Get early values
|
||||
// y = _mm_lddqu_si128((__m128i*)E_code_ptr);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
//
|
||||
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
|
||||
// imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
|
||||
//
|
||||
// //Get late values
|
||||
// y = _mm_lddqu_si128((__m128i*)L_code_ptr);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
//
|
||||
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
|
||||
// imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
|
||||
//
|
||||
// //Get prompt values
|
||||
// y = _mm_lddqu_si128((__m128i*)P_code_ptr);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
//
|
||||
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
|
||||
// imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
|
||||
//
|
||||
// input_ptr += 8;
|
||||
// carrier_ptr += 8;
|
||||
// E_code_ptr += 8;
|
||||
// L_code_ptr += 8;
|
||||
// P_code_ptr += 8;
|
||||
// }
|
||||
//
|
||||
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
|
||||
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
|
||||
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
|
||||
//
|
||||
// real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1);
|
||||
// imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1);
|
||||
// imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
|
||||
// output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc);
|
||||
// _mm_storeu_si128((__m128i*)E_dotProductVector, output);
|
||||
//
|
||||
// real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1);
|
||||
// imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1);
|
||||
// imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
|
||||
// output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc);
|
||||
// _mm_storeu_si128((__m128i*)L_dotProductVector, output);
|
||||
//
|
||||
// real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1);
|
||||
// imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1);
|
||||
// imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
|
||||
// output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc);
|
||||
// _mm_storeu_si128((__m128i*)P_dotProductVector, output);
|
||||
//
|
||||
// for (int i = 0; i<8; ++i)
|
||||
// {
|
||||
// *E_out_ptr += E_dotProductVector[i];
|
||||
// *L_out_ptr += L_dotProductVector[i];
|
||||
// *P_out_ptr += P_dotProductVector[i];
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lv_8sc_t bb_signal_sample;
|
||||
// for(int i=0; i < num_points%8; ++i)
|
||||
// {
|
||||
// //Perform the carrier wipe-off
|
||||
// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
|
||||
// // Now get early, late, and prompt values for each
|
||||
// *E_out_ptr += bb_signal_sample * (*E_code_ptr++);
|
||||
// *P_out_ptr += bb_signal_sample * (*P_code_ptr++);
|
||||
// *L_out_ptr += bb_signal_sample * (*L_code_ptr++);
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
lv_8sc_t bb_signal_sample;
|
||||
lv_16sc_t tmp1;
|
||||
lv_16sc_t tmp2;
|
||||
lv_16sc_t tmp3;
|
||||
|
||||
bb_signal_sample = lv_cmake(0, 0);
|
||||
|
||||
*E_out = 0;
|
||||
*P_out = 0;
|
||||
*L_out = 0;
|
||||
// perform Early, Prompt and Late correlation
|
||||
for(int i=0; i < num_points; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = input[i] * carrier[i];
|
||||
|
||||
tmp1 = bb_signal_sample * E_code[i];
|
||||
tmp2 = bb_signal_sample * P_code[i];
|
||||
tmp3 = bb_signal_sample * L_code[i];
|
||||
|
||||
// Now get early, late, and prompt values for each
|
||||
*E_out += tmp1;
|
||||
*P_out += tmp2;
|
||||
*L_out += tmp3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H */
|
||||
|
||||
|
||||
//#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H
|
||||
//#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H
|
||||
//
|
||||
//#include <inttypes.h>
|
||||
//#include <stdio.h>
|
||||
//#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
//#include <float.h>
|
||||
//#include <string.h>
|
||||
//
|
||||
//#ifdef LV_HAVE_SSE4_1
|
||||
//#include "smmintrin.h"
|
||||
///*!
|
||||
// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
// \param input The input signal input
|
||||
// \param carrier The carrier signal input
|
||||
// \param E_code Early PRN code replica input
|
||||
// \param P_code Early PRN code replica input
|
||||
// \param L_code Early PRN code replica input
|
||||
// \param E_out Early correlation output
|
||||
// \param P_out Early correlation output
|
||||
// \param L_out Early correlation output
|
||||
// \param num_points The number of complex values in vectors
|
||||
// */
|
||||
//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
//{
|
||||
// const unsigned int sse_iters = num_points / 8;
|
||||
//
|
||||
// __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
|
||||
// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
|
||||
//
|
||||
// const lv_8sc_t* input_ptr = input;
|
||||
// const lv_8sc_t* carrier_ptr = carrier;
|
||||
//
|
||||
// const lv_8sc_t* E_code_ptr = E_code;
|
||||
// lv_8sc_t* E_out_ptr = E_out;
|
||||
// const lv_8sc_t* L_code_ptr = L_code;
|
||||
// lv_8sc_t* L_out_ptr = L_out;
|
||||
// const lv_8sc_t* P_code_ptr = P_code;
|
||||
// lv_8sc_t* P_out_ptr = P_out;
|
||||
//
|
||||
// *E_out_ptr = 0;
|
||||
// *P_out_ptr = 0;
|
||||
// *L_out_ptr = 0;
|
||||
//
|
||||
// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
//
|
||||
// real_E_code_acc = _mm_setzero_si128();
|
||||
// imag_E_code_acc = _mm_setzero_si128();
|
||||
// real_L_code_acc = _mm_setzero_si128();
|
||||
// imag_L_code_acc = _mm_setzero_si128();
|
||||
// real_P_code_acc = _mm_setzero_si128();
|
||||
// imag_P_code_acc = _mm_setzero_si128();
|
||||
//
|
||||
// if (sse_iters>0)
|
||||
// {
|
||||
// for(int number = 0;number < sse_iters; number++){
|
||||
//
|
||||
// //Perform the carrier wipe-off
|
||||
// x = _mm_load_si128((__m128i*)input_ptr);
|
||||
// y = _mm_load_si128((__m128i*)carrier_ptr);
|
||||
//
|
||||
// imagx = _mm_srli_si128 (x, 1);
|
||||
// imagx = _mm_and_si128 (imagx, mult1);
|
||||
// realx = _mm_and_si128 (x, mult1);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
//
|
||||
// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// //Get early values
|
||||
// y = _mm_load_si128((__m128i*)E_code_ptr);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
//
|
||||
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
|
||||
// imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
|
||||
//
|
||||
// //Get late values
|
||||
// y = _mm_load_si128((__m128i*)L_code_ptr);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
//
|
||||
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
|
||||
// imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
|
||||
//
|
||||
// //Get prompt values
|
||||
// y = _mm_load_si128((__m128i*)P_code_ptr);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
//
|
||||
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
|
||||
// imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
|
||||
//
|
||||
// input_ptr += 8;
|
||||
// carrier_ptr += 8;
|
||||
// E_code_ptr += 8;
|
||||
// L_code_ptr += 8;
|
||||
// P_code_ptr += 8;
|
||||
// }
|
||||
//
|
||||
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
|
||||
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
|
||||
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
|
||||
//
|
||||
// imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
|
||||
// output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1);
|
||||
// _mm_store_si128((__m128i*)E_dotProductVector, output);
|
||||
//
|
||||
// imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
|
||||
// output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1);
|
||||
// _mm_store_si128((__m128i*)L_dotProductVector, output);
|
||||
//
|
||||
// imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
|
||||
// output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1);
|
||||
// _mm_store_si128((__m128i*)P_dotProductVector, output);
|
||||
//
|
||||
// for (int i = 0; i<8; ++i)
|
||||
// {
|
||||
// *E_out_ptr += E_dotProductVector[i];
|
||||
// *L_out_ptr += L_dotProductVector[i];
|
||||
// *P_out_ptr += P_dotProductVector[i];
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lv_8sc_t bb_signal_sample;
|
||||
// for(int i=0; i < num_points%8; ++i)
|
||||
// {
|
||||
// //Perform the carrier wipe-off
|
||||
// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
|
||||
// // Now get early, late, and prompt values for each
|
||||
// *E_out_ptr += bb_signal_sample * (*E_code_ptr++);
|
||||
// *P_out_ptr += bb_signal_sample * (*P_code_ptr++);
|
||||
// *L_out_ptr += bb_signal_sample * (*L_code_ptr++);
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//#endif /* LV_HAVE_SSE4_1 */
|
||||
//
|
||||
//#ifdef LV_HAVE_SSE2
|
||||
//#include "emmintrin.h"
|
||||
///*!
|
||||
// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
// \param input The input signal input
|
||||
// \param carrier The carrier signal input
|
||||
// \param E_code Early PRN code replica input
|
||||
// \param P_code Early PRN code replica input
|
||||
// \param L_code Early PRN code replica input
|
||||
// \param E_out Early correlation output
|
||||
// \param P_out Early correlation output
|
||||
// \param L_out Early correlation output
|
||||
// \param num_points The number of complex values in vectors
|
||||
// */
|
||||
//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
//{
|
||||
// const unsigned int sse_iters = num_points / 8;
|
||||
//
|
||||
// __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
|
||||
// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
|
||||
//
|
||||
// const lv_8sc_t* input_ptr = input;
|
||||
// const lv_8sc_t* carrier_ptr = carrier;
|
||||
//
|
||||
// const lv_8sc_t* E_code_ptr = E_code;
|
||||
// lv_8sc_t* E_out_ptr = E_out;
|
||||
// const lv_8sc_t* L_code_ptr = L_code;
|
||||
// lv_8sc_t* L_out_ptr = L_out;
|
||||
// const lv_8sc_t* P_code_ptr = P_code;
|
||||
// lv_8sc_t* P_out_ptr = P_out;
|
||||
//
|
||||
// *E_out_ptr = 0;
|
||||
// *P_out_ptr = 0;
|
||||
// *L_out_ptr = 0;
|
||||
//
|
||||
// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
//
|
||||
// real_E_code_acc = _mm_setzero_si128();
|
||||
// imag_E_code_acc = _mm_setzero_si128();
|
||||
// real_L_code_acc = _mm_setzero_si128();
|
||||
// imag_L_code_acc = _mm_setzero_si128();
|
||||
// real_P_code_acc = _mm_setzero_si128();
|
||||
// imag_P_code_acc = _mm_setzero_si128();
|
||||
//
|
||||
// if (sse_iters>0)
|
||||
// {
|
||||
// for(int number = 0;number < sse_iters; number++){
|
||||
//
|
||||
// //Perform the carrier wipe-off
|
||||
// x = _mm_load_si128((__m128i*)input_ptr);
|
||||
// y = _mm_load_si128((__m128i*)carrier_ptr);
|
||||
//
|
||||
// imagx = _mm_srli_si128 (x, 1);
|
||||
// imagx = _mm_and_si128 (imagx, mult1);
|
||||
// realx = _mm_and_si128 (x, mult1);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
//
|
||||
// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// //Get early values
|
||||
// y = _mm_load_si128((__m128i*)E_code_ptr);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
//
|
||||
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
|
||||
// imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
|
||||
//
|
||||
// //Get late values
|
||||
// y = _mm_load_si128((__m128i*)L_code_ptr);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
//
|
||||
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
|
||||
// imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
|
||||
//
|
||||
// //Get prompt values
|
||||
// y = _mm_load_si128((__m128i*)P_code_ptr);
|
||||
//
|
||||
// imagy = _mm_srli_si128 (y, 1);
|
||||
// imagy = _mm_and_si128 (imagy, mult1);
|
||||
// realy = _mm_and_si128 (y, mult1);
|
||||
//
|
||||
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
//
|
||||
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
//
|
||||
// real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
|
||||
// imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
|
||||
//
|
||||
// input_ptr += 8;
|
||||
// carrier_ptr += 8;
|
||||
// E_code_ptr += 8;
|
||||
// L_code_ptr += 8;
|
||||
// P_code_ptr += 8;
|
||||
// }
|
||||
//
|
||||
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
|
||||
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
|
||||
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
|
||||
//
|
||||
// real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1);
|
||||
// imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1);
|
||||
// imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
|
||||
// output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc);
|
||||
// _mm_store_si128((__m128i*)E_dotProductVector, output);
|
||||
//
|
||||
// real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1);
|
||||
// imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1);
|
||||
// imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
|
||||
// output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc);
|
||||
// _mm_store_si128((__m128i*)L_dotProductVector, output);
|
||||
//
|
||||
// real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1);
|
||||
// imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1);
|
||||
// imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
|
||||
// output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc);
|
||||
// _mm_store_si128((__m128i*)P_dotProductVector, output);
|
||||
//
|
||||
// for (int i = 0; i<8; ++i)
|
||||
// {
|
||||
// *E_out_ptr += E_dotProductVector[i];
|
||||
// *L_out_ptr += L_dotProductVector[i];
|
||||
// *P_out_ptr += P_dotProductVector[i];
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// lv_8sc_t bb_signal_sample;
|
||||
// for(int i=0; i < num_points%8; ++i)
|
||||
// {
|
||||
// //Perform the carrier wipe-off
|
||||
// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
|
||||
// // Now get early, late, and prompt values for each
|
||||
// *E_out_ptr += bb_signal_sample * (*E_code_ptr++);
|
||||
// *P_out_ptr += bb_signal_sample * (*P_code_ptr++);
|
||||
// *L_out_ptr += bb_signal_sample * (*L_code_ptr++);
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//#endif /* LV_HAVE_SSE2 */
|
||||
//
|
||||
//#ifdef LV_HAVE_GENERIC
|
||||
///*!
|
||||
// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
// \param input The input signal input
|
||||
// \param carrier The carrier signal input
|
||||
// \param E_code Early PRN code replica input
|
||||
// \param P_code Early PRN code replica input
|
||||
// \param L_code Early PRN code replica input
|
||||
// \param E_out Early correlation output
|
||||
// \param P_out Early correlation output
|
||||
// \param L_out Early correlation output
|
||||
// \param num_points The number of complex values in vectors
|
||||
// */
|
||||
//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
//{
|
||||
// lv_8sc_t bb_signal_sample;
|
||||
//
|
||||
// bb_signal_sample = lv_cmake(0, 0);
|
||||
//
|
||||
// *E_out = 0;
|
||||
// *P_out = 0;
|
||||
// *L_out = 0;
|
||||
// // perform Early, Prompt and Late correlation
|
||||
// for(int i=0; i < num_points; ++i)
|
||||
// {
|
||||
// //Perform the carrier wipe-off
|
||||
// bb_signal_sample = input[i] * carrier[i];
|
||||
// // Now get early, late, and prompt values for each
|
||||
// *E_out += bb_signal_sample * E_code[i];
|
||||
// *P_out += bb_signal_sample * P_code[i];
|
||||
// *L_out += bb_signal_sample * L_code[i];
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//#endif /* LV_HAVE_GENERIC */
|
||||
//
|
||||
//#ifdef LV_HAVE_ORC
|
||||
///*!
|
||||
// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
// \param input The input signal input
|
||||
// \param carrier The carrier signal input
|
||||
// \param E_code Early PRN code replica input
|
||||
// \param P_code Early PRN code replica input
|
||||
// \param L_code Early PRN code replica input
|
||||
// \param E_out Early correlation output
|
||||
// \param P_out Early correlation output
|
||||
// \param L_out Early correlation output
|
||||
// \param num_points The number of complex values in vectors
|
||||
// */
|
||||
//
|
||||
//extern void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_first_a_orc_impl(short* E_out_real, short* E_out_imag, short* P_out_real, short* P_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, unsigned int num_points);
|
||||
//extern void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_second_a_orc_impl(short* L_out_real, short* L_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* L_code, unsigned int num_points);
|
||||
//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_orc(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points){
|
||||
//
|
||||
// short E_out_real = 0;
|
||||
// short E_out_imag = 0;
|
||||
// char* E_out_real_c = (char*)&E_out_real;
|
||||
// E_out_real_c++;
|
||||
// char* E_out_imag_c = (char*)&E_out_imag;
|
||||
// E_out_imag_c++;
|
||||
//
|
||||
// short P_out_real = 0;
|
||||
// short P_out_imag = 0;
|
||||
// char* P_out_real_c = (char*)&P_out_real;
|
||||
// P_out_real_c++;
|
||||
// char* P_out_imag_c = (char*)&P_out_imag;
|
||||
// P_out_imag_c++;
|
||||
//
|
||||
// short L_out_real = 0;
|
||||
// short L_out_imag = 0;
|
||||
// char* L_out_real_c = (char*)&L_out_real;
|
||||
// L_out_real_c++;
|
||||
// char* L_out_imag_c = (char*)&L_out_imag;
|
||||
// L_out_imag_c++;
|
||||
//
|
||||
// volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_first_a_orc_impl( &E_out_real, &E_out_imag, &P_out_real, &P_out_imag, input, carrier, E_code, P_code, num_points);
|
||||
// volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_second_a_orc_impl( &L_out_real, &L_out_imag, input, carrier, L_code, num_points);
|
||||
//
|
||||
// //ORC implementation of 8ic_x5_cw_epl_corr_32fc_x3 is done in two different functions because it seems that
|
||||
// //in one function the length of the code gives memory problems (bad access, segmentation fault).
|
||||
// //Also, the maximum number of accumulators that can be used is 4 (and we need 6).
|
||||
// //The "carrier wipe-off" step is done two times: one in the first function and another one in the second.
|
||||
// //Joining all the ORC code in one function would be quicker because the "carrier wipe-off" step would be done just
|
||||
// //one time.
|
||||
//
|
||||
// *E_out = lv_cmake(*E_out_real_c, *E_out_imag_c);
|
||||
// *P_out = lv_cmake(*P_out_real_c, *P_out_imag_c);
|
||||
// *L_out = lv_cmake(*L_out_real_c, *L_out_imag_c);
|
||||
//}
|
||||
//#endif /* LV_HAVE_ORC */
|
||||
//
|
||||
//#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H */
|
@ -0,0 +1,874 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.h
|
||||
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that performs the carrier wipe-off mixing and the
|
||||
* Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
|
||||
* real part and 8 bits the imaginary part):
|
||||
* - The carrier wipe-off is done by multiplying the input signal by the
|
||||
* carrier (multiplication of 16 bits vectors) It returns the input
|
||||
* signal in base band (BB)
|
||||
* - Early values are calculated by multiplying the input signal in BB by the
|
||||
* early code (multiplication of 16 bits vectors), accumulating the results
|
||||
* - Prompt values are calculated by multiplying the input signal in BB by the
|
||||
* prompt code (multiplication of 16 bits vectors), accumulating the results
|
||||
* - Late values are calculated by multiplying the input signal in BB by the
|
||||
* late code (multiplication of 16 bits vectors), accumulating the results
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H
|
||||
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include "smmintrin.h"
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
|
||||
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
|
||||
|
||||
const lv_8sc_t* input_ptr = input;
|
||||
const lv_8sc_t* carrier_ptr = carrier;
|
||||
|
||||
const lv_8sc_t* E_code_ptr = E_code;
|
||||
lv_8sc_t* E_out_ptr = E_out;
|
||||
const lv_8sc_t* L_code_ptr = L_code;
|
||||
lv_8sc_t* L_out_ptr = L_out;
|
||||
const lv_8sc_t* P_code_ptr = P_code;
|
||||
lv_8sc_t* P_out_ptr = P_out;
|
||||
|
||||
*E_out_ptr = 0;
|
||||
*P_out_ptr = 0;
|
||||
*L_out_ptr = 0;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
real_E_code_acc = _mm_setzero_si128();
|
||||
imag_E_code_acc = _mm_setzero_si128();
|
||||
real_L_code_acc = _mm_setzero_si128();
|
||||
imag_L_code_acc = _mm_setzero_si128();
|
||||
real_P_code_acc = _mm_setzero_si128();
|
||||
imag_P_code_acc = _mm_setzero_si128();
|
||||
|
||||
if (sse_iters>0)
|
||||
{
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
//Perform the carrier wipe-off
|
||||
x = _mm_lddqu_si128((__m128i*)input_ptr);
|
||||
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
//Get early values
|
||||
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
|
||||
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
|
||||
|
||||
//Get late values
|
||||
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
|
||||
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
|
||||
|
||||
//Get prompt values
|
||||
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
|
||||
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
|
||||
|
||||
input_ptr += 8;
|
||||
carrier_ptr += 8;
|
||||
E_code_ptr += 8;
|
||||
L_code_ptr += 8;
|
||||
P_code_ptr += 8;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
|
||||
|
||||
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
|
||||
output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1);
|
||||
_mm_storeu_si128((__m128i*)E_dotProductVector, output);
|
||||
|
||||
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
|
||||
output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1);
|
||||
_mm_storeu_si128((__m128i*)L_dotProductVector, output);
|
||||
|
||||
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
|
||||
output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1);
|
||||
_mm_storeu_si128((__m128i*)P_dotProductVector, output);
|
||||
|
||||
for (int i = 0; i<8; ++i)
|
||||
{
|
||||
*E_out_ptr += E_dotProductVector[i];
|
||||
*L_out_ptr += L_dotProductVector[i];
|
||||
*P_out_ptr += P_dotProductVector[i];
|
||||
}
|
||||
}
|
||||
|
||||
lv_8sc_t bb_signal_sample;
|
||||
for(int i=0; i < num_points%8; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
|
||||
// Now get early, late, and prompt values for each
|
||||
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
|
||||
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
|
||||
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE4_1 */
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include "emmintrin.h"
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
|
||||
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
|
||||
|
||||
const lv_8sc_t* input_ptr = input;
|
||||
const lv_8sc_t* carrier_ptr = carrier;
|
||||
|
||||
const lv_8sc_t* E_code_ptr = E_code;
|
||||
lv_8sc_t* E_out_ptr = E_out;
|
||||
const lv_8sc_t* L_code_ptr = L_code;
|
||||
lv_8sc_t* L_out_ptr = L_out;
|
||||
const lv_8sc_t* P_code_ptr = P_code;
|
||||
lv_8sc_t* P_out_ptr = P_out;
|
||||
|
||||
*E_out_ptr = 0;
|
||||
*P_out_ptr = 0;
|
||||
*L_out_ptr = 0;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
real_E_code_acc = _mm_setzero_si128();
|
||||
imag_E_code_acc = _mm_setzero_si128();
|
||||
real_L_code_acc = _mm_setzero_si128();
|
||||
imag_L_code_acc = _mm_setzero_si128();
|
||||
real_P_code_acc = _mm_setzero_si128();
|
||||
imag_P_code_acc = _mm_setzero_si128();
|
||||
|
||||
if (sse_iters>0)
|
||||
{
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
//Perform the carrier wipe-off
|
||||
x = _mm_lddqu_si128((__m128i*)input_ptr);
|
||||
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
//Get early values
|
||||
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
|
||||
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
|
||||
|
||||
//Get late values
|
||||
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
|
||||
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
|
||||
|
||||
//Get prompt values
|
||||
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
|
||||
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
|
||||
|
||||
input_ptr += 8;
|
||||
carrier_ptr += 8;
|
||||
E_code_ptr += 8;
|
||||
L_code_ptr += 8;
|
||||
P_code_ptr += 8;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
|
||||
|
||||
real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1);
|
||||
imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1);
|
||||
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
|
||||
output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc);
|
||||
_mm_storeu_si128((__m128i*)E_dotProductVector, output);
|
||||
|
||||
real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1);
|
||||
imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1);
|
||||
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
|
||||
output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc);
|
||||
_mm_storeu_si128((__m128i*)L_dotProductVector, output);
|
||||
|
||||
real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1);
|
||||
imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1);
|
||||
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
|
||||
output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc);
|
||||
_mm_storeu_si128((__m128i*)P_dotProductVector, output);
|
||||
|
||||
for (int i = 0; i<8; ++i)
|
||||
{
|
||||
*E_out_ptr += E_dotProductVector[i];
|
||||
*L_out_ptr += L_dotProductVector[i];
|
||||
*P_out_ptr += P_dotProductVector[i];
|
||||
}
|
||||
}
|
||||
|
||||
lv_8sc_t bb_signal_sample;
|
||||
for(int i=0; i < num_points%8; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
|
||||
// Now get early, late, and prompt values for each
|
||||
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
|
||||
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
|
||||
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
lv_8sc_t bb_signal_sample;
|
||||
|
||||
bb_signal_sample = lv_cmake(0, 0);
|
||||
|
||||
*E_out = 0;
|
||||
*P_out = 0;
|
||||
*L_out = 0;
|
||||
// perform Early, Prompt and Late correlation
|
||||
for(int i=0; i < num_points; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = input[i] * carrier[i];
|
||||
// Now get early, late, and prompt values for each
|
||||
*E_out += bb_signal_sample * E_code[i];
|
||||
*P_out += bb_signal_sample * P_code[i];
|
||||
*L_out += bb_signal_sample * L_code[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H */
|
||||
|
||||
|
||||
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H
|
||||
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include "smmintrin.h"
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
|
||||
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
|
||||
|
||||
const lv_8sc_t* input_ptr = input;
|
||||
const lv_8sc_t* carrier_ptr = carrier;
|
||||
|
||||
const lv_8sc_t* E_code_ptr = E_code;
|
||||
lv_8sc_t* E_out_ptr = E_out;
|
||||
const lv_8sc_t* L_code_ptr = L_code;
|
||||
lv_8sc_t* L_out_ptr = L_out;
|
||||
const lv_8sc_t* P_code_ptr = P_code;
|
||||
lv_8sc_t* P_out_ptr = P_out;
|
||||
|
||||
*E_out_ptr = 0;
|
||||
*P_out_ptr = 0;
|
||||
*L_out_ptr = 0;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
real_E_code_acc = _mm_setzero_si128();
|
||||
imag_E_code_acc = _mm_setzero_si128();
|
||||
real_L_code_acc = _mm_setzero_si128();
|
||||
imag_L_code_acc = _mm_setzero_si128();
|
||||
real_P_code_acc = _mm_setzero_si128();
|
||||
imag_P_code_acc = _mm_setzero_si128();
|
||||
|
||||
if (sse_iters>0)
|
||||
{
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
//Perform the carrier wipe-off
|
||||
x = _mm_load_si128((__m128i*)input_ptr);
|
||||
y = _mm_load_si128((__m128i*)carrier_ptr);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
//Get early values
|
||||
y = _mm_load_si128((__m128i*)E_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
|
||||
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
|
||||
|
||||
//Get late values
|
||||
y = _mm_load_si128((__m128i*)L_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
|
||||
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
|
||||
|
||||
//Get prompt values
|
||||
y = _mm_load_si128((__m128i*)P_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
|
||||
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
|
||||
|
||||
input_ptr += 8;
|
||||
carrier_ptr += 8;
|
||||
E_code_ptr += 8;
|
||||
L_code_ptr += 8;
|
||||
P_code_ptr += 8;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
|
||||
|
||||
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
|
||||
output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1);
|
||||
_mm_store_si128((__m128i*)E_dotProductVector, output);
|
||||
|
||||
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
|
||||
output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1);
|
||||
_mm_store_si128((__m128i*)L_dotProductVector, output);
|
||||
|
||||
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
|
||||
output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1);
|
||||
_mm_store_si128((__m128i*)P_dotProductVector, output);
|
||||
|
||||
for (int i = 0; i<8; ++i)
|
||||
{
|
||||
*E_out_ptr += E_dotProductVector[i];
|
||||
*L_out_ptr += L_dotProductVector[i];
|
||||
*P_out_ptr += P_dotProductVector[i];
|
||||
}
|
||||
}
|
||||
|
||||
lv_8sc_t bb_signal_sample;
|
||||
for(int i=0; i < num_points%8; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
|
||||
// Now get early, late, and prompt values for each
|
||||
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
|
||||
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
|
||||
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE4_1 */
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include "emmintrin.h"
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
|
||||
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
|
||||
|
||||
const lv_8sc_t* input_ptr = input;
|
||||
const lv_8sc_t* carrier_ptr = carrier;
|
||||
|
||||
const lv_8sc_t* E_code_ptr = E_code;
|
||||
lv_8sc_t* E_out_ptr = E_out;
|
||||
const lv_8sc_t* L_code_ptr = L_code;
|
||||
lv_8sc_t* L_out_ptr = L_out;
|
||||
const lv_8sc_t* P_code_ptr = P_code;
|
||||
lv_8sc_t* P_out_ptr = P_out;
|
||||
|
||||
*E_out_ptr = 0;
|
||||
*P_out_ptr = 0;
|
||||
*L_out_ptr = 0;
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
real_E_code_acc = _mm_setzero_si128();
|
||||
imag_E_code_acc = _mm_setzero_si128();
|
||||
real_L_code_acc = _mm_setzero_si128();
|
||||
imag_L_code_acc = _mm_setzero_si128();
|
||||
real_P_code_acc = _mm_setzero_si128();
|
||||
imag_P_code_acc = _mm_setzero_si128();
|
||||
|
||||
if (sse_iters>0)
|
||||
{
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
|
||||
//Perform the carrier wipe-off
|
||||
x = _mm_load_si128((__m128i*)input_ptr);
|
||||
y = _mm_load_si128((__m128i*)carrier_ptr);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
|
||||
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
//Get early values
|
||||
y = _mm_load_si128((__m128i*)E_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
|
||||
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
|
||||
|
||||
//Get late values
|
||||
y = _mm_load_si128((__m128i*)L_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
|
||||
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
|
||||
|
||||
//Get prompt values
|
||||
y = _mm_load_si128((__m128i*)P_code_ptr);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
|
||||
|
||||
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
|
||||
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
|
||||
|
||||
input_ptr += 8;
|
||||
carrier_ptr += 8;
|
||||
E_code_ptr += 8;
|
||||
L_code_ptr += 8;
|
||||
P_code_ptr += 8;
|
||||
}
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
|
||||
|
||||
real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1);
|
||||
imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1);
|
||||
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
|
||||
output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc);
|
||||
_mm_store_si128((__m128i*)E_dotProductVector, output);
|
||||
|
||||
real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1);
|
||||
imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1);
|
||||
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
|
||||
output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc);
|
||||
_mm_store_si128((__m128i*)L_dotProductVector, output);
|
||||
|
||||
real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1);
|
||||
imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1);
|
||||
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
|
||||
output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc);
|
||||
_mm_store_si128((__m128i*)P_dotProductVector, output);
|
||||
|
||||
for (int i = 0; i<8; ++i)
|
||||
{
|
||||
*E_out_ptr += E_dotProductVector[i];
|
||||
*L_out_ptr += L_dotProductVector[i];
|
||||
*P_out_ptr += P_dotProductVector[i];
|
||||
}
|
||||
}
|
||||
|
||||
lv_8sc_t bb_signal_sample;
|
||||
for(int i=0; i < num_points%8; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
|
||||
// Now get early, late, and prompt values for each
|
||||
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
|
||||
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
|
||||
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
|
||||
{
|
||||
lv_8sc_t bb_signal_sample;
|
||||
|
||||
bb_signal_sample = lv_cmake(0, 0);
|
||||
|
||||
*E_out = 0;
|
||||
*P_out = 0;
|
||||
*L_out = 0;
|
||||
// perform Early, Prompt and Late correlation
|
||||
for(int i=0; i < num_points; ++i)
|
||||
{
|
||||
//Perform the carrier wipe-off
|
||||
bb_signal_sample = input[i] * carrier[i];
|
||||
// Now get early, late, and prompt values for each
|
||||
*E_out += bb_signal_sample * E_code[i];
|
||||
*P_out += bb_signal_sample * P_code[i];
|
||||
*L_out += bb_signal_sample * L_code[i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
|
||||
\param input The input signal input
|
||||
\param carrier The carrier signal input
|
||||
\param E_code Early PRN code replica input
|
||||
\param P_code Early PRN code replica input
|
||||
\param L_code Early PRN code replica input
|
||||
\param E_out Early correlation output
|
||||
\param P_out Early correlation output
|
||||
\param L_out Early correlation output
|
||||
\param num_points The number of complex values in vectors
|
||||
*/
|
||||
|
||||
extern void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl(short* E_out_real, short* E_out_imag, short* P_out_real, short* P_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, unsigned int num_points);
|
||||
extern void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl(short* L_out_real, short* L_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* L_code, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_orc(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points){
|
||||
|
||||
short E_out_real = 0;
|
||||
short E_out_imag = 0;
|
||||
char* E_out_real_c = (char*)&E_out_real;
|
||||
E_out_real_c++;
|
||||
char* E_out_imag_c = (char*)&E_out_imag;
|
||||
E_out_imag_c++;
|
||||
|
||||
short P_out_real = 0;
|
||||
short P_out_imag = 0;
|
||||
char* P_out_real_c = (char*)&P_out_real;
|
||||
P_out_real_c++;
|
||||
char* P_out_imag_c = (char*)&P_out_imag;
|
||||
P_out_imag_c++;
|
||||
|
||||
short L_out_real = 0;
|
||||
short L_out_imag = 0;
|
||||
char* L_out_real_c = (char*)&L_out_real;
|
||||
L_out_real_c++;
|
||||
char* L_out_imag_c = (char*)&L_out_imag;
|
||||
L_out_imag_c++;
|
||||
|
||||
volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl( &E_out_real, &E_out_imag, &P_out_real, &P_out_imag, input, carrier, E_code, P_code, num_points);
|
||||
volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl( &L_out_real, &L_out_imag, input, carrier, L_code, num_points);
|
||||
|
||||
//ORC implementation of 8ic_x5_cw_epl_corr_8ic_x3 is done in two different functions because it seems that
|
||||
//in one function the length of the code gives memory problems (bad access, segmentation fault).
|
||||
//Also, the maximum number of accumulators that can be used is 4 (and we need 6).
|
||||
//The "carrier wipe-off" step is done two times: one in the first function and another one in the second.
|
||||
//Joining all the ORC code in one function would be quicker because the "carrier wipe-off" step would be done just
|
||||
//one time.
|
||||
|
||||
*E_out = lv_cmake(*E_out_real_c, *E_out_imag_c);
|
||||
*P_out = lv_cmake(*P_out_real_c, *P_out_imag_c);
|
||||
*L_out = lv_cmake(*L_out_real_c, *L_out_imag_c);
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H */
|
@ -0,0 +1,210 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8u_x2_multiply_8u.h
|
||||
* \brief Volk protokernel: multiplies unsigned char values
|
||||
* \authors <ul>
|
||||
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies unsigned char values (8 bits data)
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H
|
||||
#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
|
||||
\param cChar The unsigned char where the results will be stored
|
||||
\param aChar One of the unsigned char to be multiplied
|
||||
\param bChar One of the unsigned char to be multiplied
|
||||
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
|
||||
*/
|
||||
static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
__m128i x, y, x1, x2, y1, y2, mult1, x1_mult_y1, x2_mult_y2, tmp, tmp1, tmp2, totalc;
|
||||
unsigned char* c = cChar;
|
||||
const unsigned char* a = aChar;
|
||||
const unsigned char* b = bChar;
|
||||
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
x = _mm_lddqu_si128((__m128i*)a);
|
||||
y = _mm_lddqu_si128((__m128i*)b);
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
x1 = _mm_srli_si128 (x, 1);
|
||||
x1 = _mm_and_si128 (x1, mult1);
|
||||
x2 = _mm_and_si128 (x, mult1);
|
||||
|
||||
y1 = _mm_srli_si128 (y, 1);
|
||||
y1 = _mm_and_si128 (y1, mult1);
|
||||
y2 = _mm_and_si128 (y, mult1);
|
||||
|
||||
x1_mult_y1 = _mm_mullo_epi16 (x1, y1);
|
||||
x2_mult_y2 = _mm_mullo_epi16 (x2, y2);
|
||||
|
||||
tmp = _mm_and_si128 (x1_mult_y1, mult1);
|
||||
tmp1 = _mm_slli_si128 (tmp, 1);
|
||||
tmp2 = _mm_and_si128 (x2_mult_y2, mult1);
|
||||
totalc = _mm_or_si128 (tmp1, tmp2);
|
||||
|
||||
_mm_storeu_si128((__m128i*)c, totalc);
|
||||
|
||||
a += 16;
|
||||
b += 16;
|
||||
c += 16;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
*c++ = (*a++) * (*b++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
|
||||
\param cChar The unsigned char where the results will be stored
|
||||
\param aChar One of the unsigned char to be multiplied
|
||||
\param bChar One of the unsigned char to be multiplied
|
||||
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
|
||||
*/
|
||||
static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){
|
||||
unsigned char* cPtr = cChar;
|
||||
const unsigned char* aPtr = aChar;
|
||||
const unsigned char* bPtr = bChar;
|
||||
|
||||
for(int number = 0; number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) * (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H */
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H
|
||||
#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
|
||||
\param cChar The unsigned char where the results will be stored
|
||||
\param aChar One of the unsigned char to be multiplied
|
||||
\param bChar One of the unsigned char to be multiplied
|
||||
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
|
||||
*/
|
||||
static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){
|
||||
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
__m128i x, y, x1, x2, y1, y2, mult1, x1_mult_y1, x2_mult_y2, tmp, tmp1, tmp2, totalc;
|
||||
unsigned char* c = cChar;
|
||||
const unsigned char* a = aChar;
|
||||
const unsigned char* b = bChar;
|
||||
|
||||
for(int number = 0;number < sse_iters; number++){
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
y = _mm_load_si128((__m128i*)b);
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
x1 = _mm_srli_si128 (x, 1);
|
||||
x1 = _mm_and_si128 (x1, mult1);
|
||||
x2 = _mm_and_si128 (x, mult1);
|
||||
|
||||
y1 = _mm_srli_si128 (y, 1);
|
||||
y1 = _mm_and_si128 (y1, mult1);
|
||||
y2 = _mm_and_si128 (y, mult1);
|
||||
|
||||
x1_mult_y1 = _mm_mullo_epi16 (x1, y1);
|
||||
x2_mult_y2 = _mm_mullo_epi16 (x2, y2);
|
||||
|
||||
tmp = _mm_and_si128 (x1_mult_y1, mult1);
|
||||
tmp1 = _mm_slli_si128 (tmp, 1);
|
||||
tmp2 = _mm_and_si128 (x2_mult_y2, mult1);
|
||||
totalc = _mm_or_si128 (tmp1, tmp2);
|
||||
|
||||
_mm_store_si128((__m128i*)c, totalc);
|
||||
|
||||
a += 16;
|
||||
b += 16;
|
||||
c += 16;
|
||||
}
|
||||
|
||||
for (int i = 0; i<(num_points % 16); ++i)
|
||||
{
|
||||
*c++ = (*a++) * (*b++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
|
||||
\param cChar The unsigned char where the results will be stored
|
||||
\param aChar One of the unsigned char to be multiplied
|
||||
\param bChar One of the unsigned char to be multiplied
|
||||
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
|
||||
*/
|
||||
static inline void volk_gnsssdr_8u_x2_multiply_8u_a_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){
|
||||
unsigned char* cPtr = cChar;
|
||||
const unsigned char* aPtr = aChar;
|
||||
const unsigned char* bPtr = bChar;
|
||||
|
||||
for(int number = 0; number < num_points; number++){
|
||||
*cPtr++ = (*aPtr++) * (*bPtr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
|
||||
\param cChar The unsigned char where the results will be stored
|
||||
\param aChar One of the unsigned char to be multiplied
|
||||
\param bChar One of the unsigned char to be multiplied
|
||||
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
|
||||
*/
|
||||
extern void volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl(unsigned char* cVector, const unsigned char* aVector, const unsigned char* bVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8u_x2_multiply_8u_u_orc(unsigned char* cVector, const unsigned char* aVector, const unsigned char* bVector, unsigned int num_points){
|
||||
volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl(cVector, aVector, bVector, num_points);
|
||||
}
|
||||
#endif /* LV_HAVE_ORC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H */
|
572
src/algorithms/libs/volk_gnsssdr/lib/CMakeLists.txt
Normal file
572
src/algorithms/libs/volk_gnsssdr/lib/CMakeLists.txt
Normal file
@ -0,0 +1,572 @@
|
||||
#
|
||||
# Copyright 2011-2012,2014 Free Software Foundation, Inc.
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
|
||||
########################################################################
|
||||
# header file detection
|
||||
########################################################################
|
||||
include(CheckIncludeFile)
|
||||
CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H)
|
||||
if(HAVE_CPUID_H)
|
||||
add_definitions(-DHAVE_CPUID_H)
|
||||
endif()
|
||||
|
||||
CHECK_INCLUDE_FILE(intrin.h HAVE_INTRIN_H)
|
||||
if(HAVE_INTRIN_H)
|
||||
add_definitions(-DHAVE_INTRIN_H)
|
||||
endif()
|
||||
|
||||
CHECK_INCLUDE_FILE(fenv.h HAVE_FENV_H)
|
||||
if(HAVE_FENV_H)
|
||||
add_definitions(-DHAVE_FENV_H)
|
||||
endif()
|
||||
|
||||
CHECK_INCLUDE_FILE(dlfcn.h HAVE_DLFCN_H)
|
||||
if(HAVE_DLFCN_H)
|
||||
add_definitions(-DHAVE_DLFCN_H)
|
||||
list(APPEND volk_gnsssdr_libraries ${CMAKE_DL_LIBS})
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# Setup the compiler name
|
||||
########################################################################
|
||||
set(COMPILER_NAME ${CMAKE_C_COMPILER_ID})
|
||||
if(MSVC) #its not set otherwise
|
||||
set(COMPILER_NAME MSVC)
|
||||
endif()
|
||||
|
||||
message(STATUS "Compiler name: ${COMPILER_NAME}")
|
||||
|
||||
if(NOT DEFINED COMPILER_NAME)
|
||||
message(FATAL_ERROR "COMPILER_NAME undefined. Volk build may not support this compiler.")
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# Special clang flag so flag checks can fail
|
||||
########################################################################
|
||||
if(COMPILER_NAME MATCHES "GNU")
|
||||
include(CheckCXXCompilerFlag)
|
||||
CHECK_CXX_COMPILER_FLAG("-Werror=unused-command-line-argument" HAVE_WERROR_UNUSED_CMD_LINE_ARG)
|
||||
if(HAVE_WERROR_UNUSED_CMD_LINE_ARG)
|
||||
set(VOLK_FLAG_CHECK_FLAGS "-Werror=unused-command-line-argument")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# check for posix_memalign, since some OSs do not internally define
|
||||
# _XOPEN_SOURCE or _POSIX_C_SOURCE; they leave this to the user.
|
||||
########################################################################
|
||||
|
||||
include(CheckFunctionExists)
|
||||
CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
|
||||
|
||||
if(HAVE_POSIX_MEMALIGN)
|
||||
add_definitions(-DHAVE_POSIX_MEMALIGN)
|
||||
endif(HAVE_POSIX_MEMALIGN)
|
||||
|
||||
########################################################################
|
||||
# detect x86 flavor of CPU
|
||||
########################################################################
|
||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(i.86|x86|x86_64|amd64)$")
|
||||
message(STATUS "x86* CPU detected")
|
||||
set(CPU_IS_x86 TRUE)
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# determine passing architectures based on compile flag tests
|
||||
########################################################################
|
||||
execute_process(
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
|
||||
${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_compile_utils.py
|
||||
--mode "arch_flags" --compiler "${COMPILER_NAME}"
|
||||
OUTPUT_VARIABLE arch_flag_lines OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
|
||||
macro(check_arch arch_name)
|
||||
set(flags ${ARGN})
|
||||
set(have_${arch_name} TRUE)
|
||||
foreach(flag ${flags})
|
||||
include(CheckCXXCompilerFlag)
|
||||
set(have_flag have${flag})
|
||||
execute_process( #make the have_flag have nice alphanum chars (just for looks/not necessary)
|
||||
COMMAND ${PYTHON_EXECUTABLE} -c "import re; print(re.sub('\\W', '_', '${have_flag}'))"
|
||||
OUTPUT_VARIABLE have_flag OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
if(VOLK_FLAG_CHECK_FLAGS)
|
||||
set(CMAKE_REQUIRED_FLAGS ${VOLK_FLAG_CHECK_FLAGS})
|
||||
endif()
|
||||
CHECK_CXX_COMPILER_FLAG(${flag} ${have_flag})
|
||||
unset(CMAKE_REQUIRED_FLAGS)
|
||||
if (NOT ${have_flag})
|
||||
set(have_${arch_name} FALSE)
|
||||
endif()
|
||||
endforeach()
|
||||
if (have_${arch_name})
|
||||
list(APPEND available_archs ${arch_name})
|
||||
endif()
|
||||
endmacro(check_arch)
|
||||
|
||||
foreach(line ${arch_flag_lines})
|
||||
string(REGEX REPLACE "," ";" arch_flags ${line})
|
||||
check_arch(${arch_flags})
|
||||
endforeach(line)
|
||||
|
||||
macro(OVERRULE_ARCH arch reason)
|
||||
message(STATUS "${reason}, Overruled arch ${arch}")
|
||||
list(REMOVE_ITEM available_archs ${arch})
|
||||
endmacro(OVERRULE_ARCH)
|
||||
|
||||
########################################################################
|
||||
# eliminate AVX on if not on x86, or if the compiler does not accept
|
||||
# the xgetbv instruction, or {if not cross-compiling and the xgetbv
|
||||
# executable does not function correctly}.
|
||||
########################################################################
|
||||
set(HAVE_XGETBV 0)
|
||||
set(HAVE_AVX_CVTPI32_PS 0)
|
||||
if(CPU_IS_x86)
|
||||
# check to see if the compiler/linker works with xgetb instruction
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c "unsigned long long _xgetbv(unsigned int index) { unsigned int eax, edx; __asm__ __volatile__(\"xgetbv\" : \"=a\"(eax), \"=d\"(edx) : \"c\"(index)); return ((unsigned long long)edx << 32) | eax; } int main (void) { (void) _xgetbv(0); return (0); }")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -o
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c
|
||||
OUTPUT_QUIET ERROR_QUIET
|
||||
RESULT_VARIABLE avx_compile_result)
|
||||
if(NOT ${avx_compile_result} EQUAL 0)
|
||||
OVERRULE_ARCH(avx "Compiler or linker missing xgetbv instruction")
|
||||
elseif(NOT CROSSCOMPILE_MULTILIB)
|
||||
execute_process(COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv
|
||||
OUTPUT_QUIET ERROR_QUIET
|
||||
RESULT_VARIABLE avx_exe_result)
|
||||
if(NOT ${avx_exe_result} EQUAL 0)
|
||||
OVERRULE_ARCH(avx "CPU missing xgetbv")
|
||||
else()
|
||||
set(HAVE_XGETBV 1)
|
||||
endif()
|
||||
else()
|
||||
# cross compiling and compiler/linker seems to work; assume working
|
||||
set(HAVE_XGETBV 1)
|
||||
endif()
|
||||
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c)
|
||||
|
||||
#########################################################################
|
||||
# eliminate AVX if cvtpi32_ps intrinsic fails like some versions of clang
|
||||
#########################################################################
|
||||
|
||||
# check to see if the compiler/linker works with cvtpi32_ps instrinsic when using AVX
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c "#include <immintrin.h>\nint main (void) {__m128 __a; __m64 __b; __m128 foo = _mm_cvtpi32_ps(__a, __b); return (0); }")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -mavx -o
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c
|
||||
OUTPUT_QUIET ERROR_QUIET
|
||||
RESULT_VARIABLE avx_compile_result)
|
||||
if(NOT ${avx_compile_result} EQUAL 0)
|
||||
OVERRULE_ARCH(avx "Compiler missing cvtpi32_ps instrinsic")
|
||||
elseif(NOT CROSSCOMPILE_MULTILIB)
|
||||
execute_process(COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps
|
||||
OUTPUT_QUIET ERROR_QUIET
|
||||
RESULT_VARIABLE avx_exe_result)
|
||||
if(NOT ${avx_exe_result} EQUAL 0)
|
||||
OVERRULE_ARCH(avx "CPU missing cvtpi32_ps")
|
||||
else()
|
||||
set(HAVE_AVX_CVTPI32_PS 1)
|
||||
endif()
|
||||
else()
|
||||
set(HAVE_AVX_CVTPI32_PS 1)
|
||||
endif()
|
||||
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c)
|
||||
|
||||
# Disable SSE4a if Clang is less than version 3.2
|
||||
if("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
|
||||
# Figure out the version of Clang
|
||||
if(CMAKE_VERSION VERSION_LESS "2.8.10")
|
||||
# Exctract the Clang version from the --version string.
|
||||
# In cmake 2.8.10, we can just use CMAKE_C_COMPILER_VERSION
|
||||
# without having to go through these string manipulations
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} --version
|
||||
OUTPUT_VARIABLE clang_version)
|
||||
string(REGEX MATCH "[0-9].[0-9]" CMAKE_C_COMPILER_VERSION ${clang_version})
|
||||
endif(CMAKE_VERSION VERSION_LESS "2.8.10")
|
||||
|
||||
if(CMAKE_C_COMPILER_VERSION VERSION_LESS "3.2")
|
||||
OVERRULE_ARCH(sse4_a "Clang >= 3.2 required for SSE4a")
|
||||
endif(CMAKE_C_COMPILER_VERSION VERSION_LESS "3.2")
|
||||
endif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
|
||||
|
||||
endif(CPU_IS_x86)
|
||||
|
||||
if(${HAVE_XGETBV})
|
||||
add_definitions(-DHAVE_XGETBV)
|
||||
endif()
|
||||
|
||||
if(${HAVE_AVX_CVTPI32_PS})
|
||||
add_definitions(-DHAVE_AVX_CVTPI32_PS)
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# if the CPU is not x86, eliminate all Intel SIMD
|
||||
########################################################################
|
||||
|
||||
if(NOT CPU_IS_x86)
|
||||
OVERRULE_ARCH(3dnow "Architecture is not x86 or x86_64")
|
||||
OVERRULE_ARCH(mmx "Architecture is not x86 or x86_64")
|
||||
OVERRULE_ARCH(sse "Architecture is not x86 or x86_64")
|
||||
OVERRULE_ARCH(sse2 "Architecture is not x86 or x86_64")
|
||||
OVERRULE_ARCH(sse3 "Architecture is not x86 or x86_64")
|
||||
OVERRULE_ARCH(ssse3 "Architecture is not x86 or x86_64")
|
||||
OVERRULE_ARCH(sse4_a "Architecture is not x86 or x86_64")
|
||||
OVERRULE_ARCH(sse4_1 "Architecture is not x86 or x86_64")
|
||||
OVERRULE_ARCH(sse4_2 "Architecture is not x86 or x86_64")
|
||||
OVERRULE_ARCH(avx "Architecture is not x86 or x86_64")
|
||||
endif(NOT CPU_IS_x86)
|
||||
|
||||
########################################################################
|
||||
# implement overruling in the ORC case,
|
||||
# since ORC always passes flag detection
|
||||
########################################################################
|
||||
if(NOT ORC_FOUND)
|
||||
OVERRULE_ARCH(orc "ORC support not found")
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# implement overruling in the non-multilib case
|
||||
# this makes things work when both -m32 and -m64 pass
|
||||
########################################################################
|
||||
if(NOT CROSSCOMPILE_MULTILIB AND CPU_IS_x86)
|
||||
include(CheckTypeSize)
|
||||
check_type_size("void*[8]" SIZEOF_CPU BUILTIN_TYPES_ONLY)
|
||||
if (${SIZEOF_CPU} EQUAL 64)
|
||||
OVERRULE_ARCH(32 "CPU width is 64 bits")
|
||||
endif()
|
||||
if (${SIZEOF_CPU} EQUAL 32)
|
||||
OVERRULE_ARCH(64 "CPU width is 32 bits")
|
||||
endif()
|
||||
|
||||
#MSVC 64 bit does not have MMX, overrule it
|
||||
if (${SIZEOF_CPU} EQUAL 64 AND MSVC)
|
||||
OVERRULE_ARCH(mmx "No MMX for Win64")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# done overrules! print the result
|
||||
########################################################################
|
||||
message(STATUS "Available architectures: ${available_archs}")
|
||||
|
||||
########################################################################
|
||||
# determine available machines given the available architectures
|
||||
########################################################################
|
||||
execute_process(
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
|
||||
${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_compile_utils.py
|
||||
--mode "machines" --archs "${available_archs}"
|
||||
OUTPUT_VARIABLE available_machines OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
|
||||
########################################################################
|
||||
# Implement machine overruling for redundant machines:
|
||||
# A machine is redundant when expansion rules occur,
|
||||
# and the arch superset passes configuration checks.
|
||||
# When this occurs, eliminate the redundant machines
|
||||
# to avoid unnecessary compilation of subset machines.
|
||||
########################################################################
|
||||
foreach(arch mmx orc 64 32)
|
||||
foreach(machine_name ${available_machines})
|
||||
string(REPLACE "_${arch}" "" machine_name_no_arch ${machine_name})
|
||||
if (${machine_name} STREQUAL ${machine_name_no_arch})
|
||||
else()
|
||||
list(REMOVE_ITEM available_machines ${machine_name_no_arch})
|
||||
endif()
|
||||
endforeach(machine_name)
|
||||
endforeach(arch)
|
||||
|
||||
########################################################################
|
||||
# done overrules! print the result
|
||||
########################################################################
|
||||
message(STATUS "Available machines: ${available_machines}")
|
||||
|
||||
########################################################################
|
||||
# Create rules to run the volk_gnsssdr generator
|
||||
########################################################################
|
||||
|
||||
#dependencies are all python, xml, and header implementation files
|
||||
file(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml)
|
||||
file(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py)
|
||||
file(GLOB h_files ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr/*.h)
|
||||
|
||||
macro(gen_template tmpl output)
|
||||
list(APPEND volk_gnsssdr_gen_sources ${output})
|
||||
add_custom_command(
|
||||
OUTPUT ${output}
|
||||
DEPENDS ${xml_files} ${py_files} ${h_files} ${tmpl}
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
|
||||
${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_tmpl_utils.py
|
||||
--input ${tmpl} --output ${output} ${ARGN}
|
||||
)
|
||||
endmacro(gen_template)
|
||||
|
||||
make_directory(${CMAKE_BINARY_DIR}/include/volk_gnsssdr)
|
||||
|
||||
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h)
|
||||
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr.c)
|
||||
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_typedefs.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_typedefs.h)
|
||||
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_cpu.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h)
|
||||
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_cpu.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr_cpu.c)
|
||||
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_config_fixed.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h)
|
||||
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machines.tmpl.h ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr_machines.h)
|
||||
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machines.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr_machines.c)
|
||||
|
||||
set(BASE_CFLAGS NONE)
|
||||
STRING(TOUPPER ${CMAKE_BUILD_TYPE} CBTU)
|
||||
MESSAGE(STATUS BUILT TYPE ${CBTU})
|
||||
MESSAGE(STATUS "Base cflags = ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS}")
|
||||
set(COMPILER_INFO "")
|
||||
IF(MSVC)
|
||||
IF(MSVC90) #Visual Studio 9
|
||||
SET(cmake_c_compiler_version "Microsoft Visual Studio 9.0")
|
||||
ELSE(MSVC10) #Visual Studio 10
|
||||
SET(cmake_c_compiler_version "Microsoft Visual Studio 10.0")
|
||||
ELSE(MSVC11) #Visual Studio 11
|
||||
SET(cmake_c_compiler_version "Microsoft Visual Studio 11.0")
|
||||
ELSE(MSVC12) #Visual Studio 12
|
||||
SET(cmake_c_compiler_version "Microsoft Visual Studio 12.0")
|
||||
ENDIF()
|
||||
ELSE()
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} --version
|
||||
OUTPUT_VARIABLE cmake_c_compiler_version)
|
||||
ENDIF(MSVC)
|
||||
set(COMPILER_INFO "${CMAKE_C_COMPILER}:::${CMAKE_C_FLAGS_${GRCBTU}} ${CMAKE_C_FLAGS}\n${CMAKE_CXX_COMPILER}:::${CMAKE_CXX_FLAGS_${GRCBTU}} ${CMAKE_CXX_FLAGS}\n" )
|
||||
|
||||
foreach(machine_name ${available_machines})
|
||||
#generate machine source
|
||||
set(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr_machine_${machine_name}.c)
|
||||
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machine_xxx.tmpl.c ${machine_source} ${machine_name})
|
||||
|
||||
#determine machine flags
|
||||
execute_process(
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
|
||||
${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_compile_utils.py
|
||||
--mode "machine_flags" --machine "${machine_name}" --compiler "${COMPILER_NAME}"
|
||||
OUTPUT_VARIABLE ${machine_name}_flags OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
MESSAGE(STATUS "BUILD INFO ::: ${machine_name} ::: ${COMPILER_NAME} ::: ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}")
|
||||
set(COMPILER_INFO "${COMPILER_INFO}${machine_name}:::${COMPILER_NAME}:::${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}\n" )
|
||||
if(${machine_name}_flags)
|
||||
set_source_files_properties(${machine_source} PROPERTIES COMPILE_FLAGS "${${machine_name}_flags}")
|
||||
endif()
|
||||
|
||||
#add to available machine defs
|
||||
string(TOUPPER LV_MACHINE_${machine_name} machine_def)
|
||||
list(APPEND machine_defs ${machine_def})
|
||||
endforeach(machine_name)
|
||||
|
||||
# Convert to a C string to compile and display properly
|
||||
string(STRIP "${cmake_c_compiler_version}" cmake_c_compiler_version)
|
||||
string(STRIP ${COMPILER_INFO} COMPILER_INFO)
|
||||
MESSAGE(STATUS "Compiler Version: ${cmake_c_compiler_version}")
|
||||
string(REPLACE "\n" " \\n" cmake_c_compiler_version ${cmake_c_compiler_version})
|
||||
string(REPLACE "\n" " \\n" COMPILER_INFO ${COMPILER_INFO})
|
||||
|
||||
########################################################################
|
||||
# Set local include directories first
|
||||
########################################################################
|
||||
include_directories(
|
||||
${CMAKE_BINARY_DIR}/include
|
||||
${CMAKE_SOURCE_DIR}/include
|
||||
${CMAKE_SOURCE_DIR}/kernels
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
########################################################################
|
||||
# Handle ASM support
|
||||
# on by default, but let users turn it off
|
||||
########################################################################
|
||||
if(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
|
||||
set(ASM_ARCHS_AVAILABLE "armv7")
|
||||
|
||||
set(FULL_C_FLAGS "${CMAKE_C_FLAGS}" "${CMAKE_CXX_COMPILER_ARG1}")
|
||||
|
||||
# sort through a list of all architectures we have ASM for
|
||||
# if we find one that matches our current system architecture
|
||||
# set up the assembler flags and include the source files
|
||||
foreach(ARCH ${ASM_ARCHS_AVAILABLE})
|
||||
message(STATUS "--==>> -CFLAGS1: ${FULL_C_FLAGS}")
|
||||
string(REGEX MATCH "${ARCH}" ASM_ARCH "${FULL_C_FLAGS}")
|
||||
if( ASM_ARCH STREQUAL "armv7" )
|
||||
set(ASM-ATT $ENV{ASM})
|
||||
message(STATUS "---- Adding ASM files") # we always use ATT syntax
|
||||
message(STATUS "-- Detected armv7 architecture; enabling ASM")
|
||||
# setup architecture specific assembler flags
|
||||
set(ARCH_ASM_FLAGS "-mfpu=neon -g")
|
||||
# then add the files
|
||||
include_directories(${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr/asm/neon)
|
||||
file(GLOB asm_files ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr/asm/neon/*.s)
|
||||
foreach(asm_file ${asm_files})
|
||||
list(APPEND volk_gnsssdr_sources ${asm_file})
|
||||
message(STATUS "Adding source file: ${asm_file}")
|
||||
endforeach(asm_file)
|
||||
endif()
|
||||
set(CMAKE_ASM-ATT_FLAGS_INIT ${ARCH_ASM_FLAGS})
|
||||
enable_language(ASM-ATT) # this must be after flags_init
|
||||
message(STATUS "asm flags: ${CMAKE_ASM-ATT_FLAGS}")
|
||||
endforeach(ARCH)
|
||||
|
||||
else(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
|
||||
message(STATUS "Not enabling ASM support. CMake >= 2.8.10 required.")
|
||||
endif(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
|
||||
|
||||
########################################################################
|
||||
# Handle orc support
|
||||
########################################################################
|
||||
if(ORC_FOUND)
|
||||
#setup orc library usage
|
||||
include_directories(${ORC_INCLUDE_DIRS})
|
||||
link_directories(${ORC_LIBRARY_DIRS})
|
||||
list(APPEND volk_gnsssdr_libraries ${ORC_LIBRARIES})
|
||||
|
||||
#setup orc functions
|
||||
file(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc)
|
||||
foreach(orc_file ${orc_files})
|
||||
|
||||
#extract the name for the generated c source from the orc file
|
||||
get_filename_component(orc_file_name_we ${orc_file} NAME_WE)
|
||||
set(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c)
|
||||
|
||||
#create a rule to generate the source and add to the list of sources
|
||||
add_custom_command(
|
||||
COMMAND ${ORCC_EXECUTABLE} --include math.h --implementation -o ${orcc_gen} ${orc_file}
|
||||
DEPENDS ${orc_file} OUTPUT ${orcc_gen}
|
||||
)
|
||||
list(APPEND volk_gnsssdr_sources ${orcc_gen})
|
||||
|
||||
endforeach(orc_file)
|
||||
else()
|
||||
message(STATUS "Did not find liborc and orcc, disabling orc support...")
|
||||
endif()
|
||||
|
||||
|
||||
########################################################################
|
||||
# Handle the generated constants
|
||||
########################################################################
|
||||
|
||||
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c
|
||||
"import time;print time.strftime('%a, %d %b %Y %H:%M:%S', time.gmtime())"
|
||||
OUTPUT_VARIABLE BUILD_DATE OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
)
|
||||
message(STATUS "Loading build date ${BUILD_DATE} into constants...")
|
||||
message(STATUS "Loading version ${VERSION} into constants...")
|
||||
|
||||
#double escape for windows backslash path separators
|
||||
string(REPLACE "\\" "\\\\" prefix ${prefix})
|
||||
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/constants.c.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/constants.c
|
||||
@ONLY)
|
||||
|
||||
list(APPEND volk_gnsssdr_sources ${CMAKE_CURRENT_BINARY_DIR}/constants.c)
|
||||
|
||||
########################################################################
|
||||
# Setup the volk_gnsssdr sources list and library
|
||||
########################################################################
|
||||
if(NOT WIN32)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
|
||||
endif()
|
||||
|
||||
list(APPEND volk_gnsssdr_sources
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_prefs.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_rank_archs.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_malloc.c
|
||||
${volk_gnsssdr_gen_sources}
|
||||
)
|
||||
|
||||
#set the machine definitions where applicable
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.c
|
||||
${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr_machines.c
|
||||
PROPERTIES COMPILE_DEFINITIONS "${machine_defs}")
|
||||
|
||||
if(MSVC)
|
||||
#add compatibility includes for stdint types
|
||||
include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc)
|
||||
add_definitions(-DHAVE_CONFIG_H)
|
||||
#compile the sources as C++ due to the lack of complex.h under MSVC
|
||||
set_source_files_properties(${volk_gnsssdr_sources} PROPERTIES LANGUAGE CXX)
|
||||
endif()
|
||||
|
||||
#create the volk_gnsssdr runtime library
|
||||
|
||||
#MODIFICATIONS BY GNSS-SDR
|
||||
file(GLOB orc ${CMAKE_SOURCE_DIR}/orc/*.orc)
|
||||
|
||||
#add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources})
|
||||
add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources} ${h_files} ${orc})
|
||||
|
||||
source_group("Kernels" FILES ${h_files})
|
||||
source_group("ORC Files" FILES ${orc})
|
||||
#END OF MODIFICATIONS
|
||||
|
||||
target_link_libraries(volk_gnsssdr ${volk_gnsssdr_libraries})
|
||||
set_target_properties(volk_gnsssdr PROPERTIES SOVERSION ${LIBVER})
|
||||
set_target_properties(volk_gnsssdr PROPERTIES DEFINE_SYMBOL "volk_gnsssdr_EXPORTS")
|
||||
|
||||
|
||||
install(TARGETS volk_gnsssdr
|
||||
LIBRARY DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_runtime" # .so file
|
||||
ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file
|
||||
RUNTIME DESTINATION bin COMPONENT "volk_gnsssdr_runtime" # .dll file
|
||||
)
|
||||
|
||||
if(ENABLE_STATIC_LIBS)
|
||||
add_library(volk_gnsssdr_static STATIC ${volk_gnsssdr_sources})
|
||||
|
||||
if(NOT WIN32)
|
||||
set_target_properties(volk_gnsssdr_static
|
||||
PROPERTIES OUTPUT_NAME volk_gnsssdr)
|
||||
endif(NOT WIN32)
|
||||
|
||||
install(TARGETS volk_gnsssdr_static
|
||||
ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file
|
||||
)
|
||||
endif(ENABLE_STATIC_LIBS)
|
||||
|
||||
########################################################################
|
||||
# Build the QA test application
|
||||
########################################################################
|
||||
|
||||
|
||||
if(Boost_FOUND)
|
||||
|
||||
set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES
|
||||
COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN"
|
||||
)
|
||||
|
||||
include_directories(${Boost_INCLUDE_DIRS})
|
||||
link_directories(${Boost_LIBRARY_DIRS})
|
||||
|
||||
add_executable(test_all
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc
|
||||
)
|
||||
target_link_libraries(test_all volk_gnsssdr ${Boost_LIBRARIES})
|
||||
add_test(qa_volk_gnsssdr_test_all test_all)
|
||||
|
||||
endif(Boost_FOUND)
|
63
src/algorithms/libs/volk_gnsssdr/lib/constants.c.in
Normal file
63
src/algorithms/libs/volk_gnsssdr/lib/constants.c.in
Normal file
@ -0,0 +1,63 @@
|
||||
/* -*- c++ -*- */
|
||||
/*
|
||||
* Copyright 2013 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include <volk_gnsssdr/constants.h>
|
||||
|
||||
char*
|
||||
volk_gnsssdr_prefix()
|
||||
{
|
||||
return "@prefix@";
|
||||
}
|
||||
|
||||
char*
|
||||
volk_gnsssdr_build_date()
|
||||
{
|
||||
return "@BUILD_DATE@";
|
||||
}
|
||||
|
||||
char*
|
||||
volk_gnsssdr_version()
|
||||
{
|
||||
return "@VERSION@";
|
||||
}
|
||||
|
||||
char*
|
||||
volk_gnsssdr_c_compiler()
|
||||
{
|
||||
return "@cmake_c_compiler_version@";
|
||||
}
|
||||
|
||||
char*
|
||||
volk_gnsssdr_compiler_flags()
|
||||
{
|
||||
return "@COMPILER_INFO@";
|
||||
}
|
||||
|
||||
char*
|
||||
volk_gnsssdr_available_machines()
|
||||
{
|
||||
return "@available_machines@";
|
||||
}
|
188
src/algorithms/libs/volk_gnsssdr/lib/gcc_x86_cpuid.h
Normal file
188
src/algorithms/libs/volk_gnsssdr/lib/gcc_x86_cpuid.h
Normal file
@ -0,0 +1,188 @@
|
||||
/*
|
||||
* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the
|
||||
* Free Software Foundation; either version 3, or (at your option) any
|
||||
* later version.
|
||||
*
|
||||
* This file is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Under Section 7 of GPL version 3, you are granted additional
|
||||
* permissions described in the GCC Runtime Library Exception, version
|
||||
* 3.1, as published by the Free Software Foundation.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License and
|
||||
* a copy of the GCC Runtime Library Exception along with this program;
|
||||
* see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
* <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/* %ecx */
|
||||
#define bit_SSE3 (1 << 0)
|
||||
#define bit_PCLMUL (1 << 1)
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#define bit_FMA (1 << 12)
|
||||
#define bit_CMPXCHG16B (1 << 13)
|
||||
#define bit_SSE4_1 (1 << 19)
|
||||
#define bit_SSE4_2 (1 << 20)
|
||||
#define bit_MOVBE (1 << 22)
|
||||
#define bit_POPCNT (1 << 23)
|
||||
#define bit_AES (1 << 25)
|
||||
#define bit_XSAVE (1 << 26)
|
||||
#define bit_OSXSAVE (1 << 27)
|
||||
#define bit_AVX (1 << 28)
|
||||
#define bit_F16C (1 << 29)
|
||||
#define bit_RDRND (1 << 30)
|
||||
|
||||
/* %edx */
|
||||
#define bit_CMPXCHG8B (1 << 8)
|
||||
#define bit_CMOV (1 << 15)
|
||||
#define bit_MMX (1 << 23)
|
||||
#define bit_FXSAVE (1 << 24)
|
||||
#define bit_SSE (1 << 25)
|
||||
#define bit_SSE2 (1 << 26)
|
||||
|
||||
/* Extended Features */
|
||||
/* %ecx */
|
||||
#define bit_LAHF_LM (1 << 0)
|
||||
#define bit_ABM (1 << 5)
|
||||
#define bit_SSE4a (1 << 6)
|
||||
#define bit_XOP (1 << 11)
|
||||
#define bit_LWP (1 << 15)
|
||||
#define bit_FMA4 (1 << 16)
|
||||
#define bit_TBM (1 << 21)
|
||||
|
||||
/* %edx */
|
||||
#define bit_MMXEXT (1 << 22)
|
||||
#define bit_LM (1 << 29)
|
||||
#define bit_3DNOWP (1 << 30)
|
||||
#define bit_3DNOW (1 << 31)
|
||||
|
||||
/* Extended Features (%eax == 7) */
|
||||
#define bit_FSGSBASE (1 << 0)
|
||||
#define bit_BMI (1 << 3)
|
||||
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
/* %ebx may be the PIC register. */
|
||||
#if __GNUC__ >= 3
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
__asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
|
||||
"cpuid\n\t" \
|
||||
"xchg{l}\t{%%}ebx, %1\n\t" \
|
||||
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level))
|
||||
|
||||
#define __cpuid_count(level, count, a, b, c, d) \
|
||||
__asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
|
||||
"cpuid\n\t" \
|
||||
"xchg{l}\t{%%}ebx, %1\n\t" \
|
||||
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level), "2" (count))
|
||||
#else
|
||||
/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
|
||||
nor alternatives in i386 code. */
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
__asm__ ("xchgl\t%%ebx, %1\n\t" \
|
||||
"cpuid\n\t" \
|
||||
"xchgl\t%%ebx, %1\n\t" \
|
||||
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level))
|
||||
|
||||
#define __cpuid_count(level, count, a, b, c, d) \
|
||||
__asm__ ("xchgl\t%%ebx, %1\n\t" \
|
||||
"cpuid\n\t" \
|
||||
"xchgl\t%%ebx, %1\n\t" \
|
||||
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level), "2" (count))
|
||||
#endif
|
||||
#else
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
__asm__ ("cpuid\n\t" \
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level))
|
||||
|
||||
#define __cpuid_count(level, count, a, b, c, d) \
|
||||
__asm__ ("cpuid\n\t" \
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level), "2" (count))
|
||||
#endif
|
||||
|
||||
/* Return highest supported input value for cpuid instruction. ext can
|
||||
be either 0x0 or 0x8000000 to return highest supported value for
|
||||
basic or extended cpuid information. Function returns 0 if cpuid
|
||||
is not supported or whatever cpuid returns in eax register. If sig
|
||||
pointer is non-null, then first four bytes of the signature
|
||||
(as found in ebx register) are returned in location pointed by sig. */
|
||||
|
||||
static __inline unsigned int
|
||||
__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
|
||||
{
|
||||
unsigned int __eax, __ebx, __ecx, __edx;
|
||||
|
||||
#ifndef __x86_64__
|
||||
/* See if we can use cpuid. On AMD64 we always can. */
|
||||
#if __GNUC__ >= 3
|
||||
__asm__ ("pushf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"mov{l}\t{%0, %1|%1, %0}\n\t"
|
||||
"xor{l}\t{%2, %0|%0, %2}\n\t"
|
||||
"push{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
: "=&r" (__eax), "=&r" (__ebx)
|
||||
: "i" (0x00200000));
|
||||
#else
|
||||
/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
|
||||
nor alternatives in i386 code. */
|
||||
__asm__ ("pushfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"movl\t%0, %1\n\t"
|
||||
"xorl\t%2, %0\n\t"
|
||||
"pushl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
: "=&r" (__eax), "=&r" (__ebx)
|
||||
: "i" (0x00200000));
|
||||
#endif
|
||||
|
||||
if (!((__eax ^ __ebx) & 0x00200000))
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
/* Host supports cpuid. Return highest supported cpuid input value. */
|
||||
__cpuid (__ext, __eax, __ebx, __ecx, __edx);
|
||||
|
||||
if (__sig)
|
||||
*__sig = __ebx;
|
||||
|
||||
return __eax;
|
||||
}
|
||||
|
||||
/* Return cpuid data for requested cpuid level, as found in returned
|
||||
eax, ebx, ecx and edx registers. The function checks if cpuid is
|
||||
supported and returns 1 for valid cpuid information or 0 for
|
||||
unsupported cpuid level. All pointers are required to be non-null. */
|
||||
|
||||
static __inline int
|
||||
__get_cpuid (unsigned int __level,
|
||||
unsigned int *__eax, unsigned int *__ebx,
|
||||
unsigned int *__ecx, unsigned int *__edx)
|
||||
{
|
||||
unsigned int __ext = __level & 0x80000000;
|
||||
|
||||
if (__get_cpuid_max (__ext, 0) < __level)
|
||||
return 0;
|
||||
|
||||
__cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
|
||||
return 1;
|
||||
}
|
@ -0,0 +1,89 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <qa_16s_add_quad_aligned16.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_16s_add_quad_aligned16.h>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
//test for sse2
|
||||
|
||||
#ifndef LV_HAVE_SSE2
|
||||
|
||||
void qa_16s_add_quad_aligned16::t1() {
|
||||
printf("sse2 not available... no test performed\n");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
|
||||
|
||||
void qa_16s_add_quad_aligned16::t1() {
|
||||
|
||||
volk_gnsssdr_environment_init();
|
||||
clock_t start, end;
|
||||
double total;
|
||||
const int vlen = 3200;
|
||||
const int ITERS = 100000;
|
||||
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short input4[vlen];
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short output2[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short output3[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short output01[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short output11[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short output21[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short output31[vlen];
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
|
||||
short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
|
||||
short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
|
||||
short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
|
||||
short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
|
||||
short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
|
||||
short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
|
||||
short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
|
||||
short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
|
||||
short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
|
||||
|
||||
input0[i] = plus0 - minus0;
|
||||
input1[i] = plus1 - minus1;
|
||||
input2[i] = plus2 - minus2;
|
||||
input3[i] = plus3 - minus3;
|
||||
input4[i] = plus4 - minus4;
|
||||
|
||||
}
|
||||
printf("16s_add_quad_aligned\n");
|
||||
|
||||
start = clock();
|
||||
for(int count = 0; count < ITERS; ++count) {
|
||||
volk_gnsssdr_16s_add_quad_aligned16_manual(output0, output1, output2, output3, input0, input1, input2, input3, input4, vlen << 1 , "generic");
|
||||
}
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("generic_time: %f\n", total);
|
||||
start = clock();
|
||||
for(int count = 0; count < ITERS; ++count) {
|
||||
volk_gnsssdr_16s_add_quad_aligned16_manual(output01, output11, output21, output31, input0, input1, input2, input3, input4, vlen << 1 , "sse2");
|
||||
}
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("sse2_time: %f\n", total);
|
||||
for(int i = 0; i < 1; ++i) {
|
||||
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
|
||||
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
|
||||
}
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
//printf("%d...%d\n", output0[i], output01[i]);
|
||||
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
|
||||
CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]);
|
||||
CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]);
|
||||
CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,18 @@
|
||||
#ifndef INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H
|
||||
#define INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H
|
||||
|
||||
#include <cppunit/extensions/HelperMacros.h>
|
||||
#include <cppunit/TestCase.h>
|
||||
|
||||
class qa_16s_add_quad_aligned16 : public CppUnit::TestCase {
|
||||
|
||||
CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16);
|
||||
CPPUNIT_TEST (t1);
|
||||
CPPUNIT_TEST_SUITE_END ();
|
||||
|
||||
private:
|
||||
void t1 ();
|
||||
};
|
||||
|
||||
|
||||
#endif /* INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H */
|
@ -0,0 +1,106 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <qa_16s_branch_4_state_8_aligned16.h>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
|
||||
//test for ssse3
|
||||
|
||||
#ifndef LV_HAVE_SSSE3
|
||||
|
||||
void qa_16s_branch_4_state_8_aligned16::t1() {
|
||||
printf("ssse3 not available... no test performed\n");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void qa_16s_branch_4_state_8_aligned16::t1() {
|
||||
const int num_iters = 1000000;
|
||||
const int vlen = 32;
|
||||
|
||||
static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03};
|
||||
static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01};
|
||||
static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
|
||||
static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
|
||||
static char* permuters[4] = {permute0, permute1, permute2, permute3};
|
||||
|
||||
unsigned int num_bytes = vlen << 1;
|
||||
|
||||
volk_gnsssdr_environment_init();
|
||||
clock_t start, end;
|
||||
double total;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) short target[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short target3[vlen];
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = {
|
||||
7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
|
||||
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = {
|
||||
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
|
||||
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = {
|
||||
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
|
||||
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = {
|
||||
0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 };
|
||||
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = {
|
||||
0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
|
||||
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
|
||||
|
||||
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
src0[i] = i;
|
||||
|
||||
}
|
||||
|
||||
|
||||
printf("16s_branch_4_state_8_aligned\n");
|
||||
|
||||
|
||||
start = clock();
|
||||
for(int i = 0; i < num_iters; ++i) {
|
||||
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
|
||||
}
|
||||
end = clock();
|
||||
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
printf("permute_and_scalar_add_time: %f\n", total);
|
||||
|
||||
|
||||
|
||||
start = clock();
|
||||
for(int i = 0; i < num_iters; ++i) {
|
||||
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3");
|
||||
}
|
||||
end = clock();
|
||||
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
printf("branch_4_state_8_time, ssse3: %f\n", total);
|
||||
|
||||
start = clock();
|
||||
for(int i = 0; i < num_iters; ++i) {
|
||||
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
|
||||
}
|
||||
end = clock();
|
||||
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
printf("permute_and_scalar_add_time, generic: %f\n", total);
|
||||
|
||||
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
|
||||
}
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
|
||||
CPPUNIT_ASSERT(target[i] == target2[i]);
|
||||
CPPUNIT_ASSERT(target[i] == target3[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
@ -0,0 +1,18 @@
|
||||
#ifndef INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H
|
||||
#define INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H
|
||||
|
||||
#include <cppunit/extensions/HelperMacros.h>
|
||||
#include <cppunit/TestCase.h>
|
||||
|
||||
class qa_16s_branch_4_state_8_aligned16 : public CppUnit::TestCase {
|
||||
|
||||
CPPUNIT_TEST_SUITE (qa_16s_branch_4_state_8_aligned16);
|
||||
CPPUNIT_TEST (t1);
|
||||
CPPUNIT_TEST_SUITE_END ();
|
||||
|
||||
private:
|
||||
void t1 ();
|
||||
};
|
||||
|
||||
|
||||
#endif /* INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H */
|
@ -0,0 +1,78 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <qa_16s_permute_and_scalar_add_aligned16.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_16s_permute_and_scalar_add_aligned16.h>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
|
||||
//test for sse2
|
||||
|
||||
#ifndef LV_HAVE_SSE2
|
||||
|
||||
void qa_16s_permute_and_scalar_add_aligned16::t1() {
|
||||
printf("sse2 not available... no test performed\n");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void qa_16s_permute_and_scalar_add_aligned16::t1() {
|
||||
const int vlen = 64;
|
||||
|
||||
unsigned int num_bytes = vlen << 1;
|
||||
|
||||
volk_gnsssdr_environment_init();
|
||||
clock_t start, end;
|
||||
double total;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) short target[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
src0[i] = i;
|
||||
permute_indexes[i] = (3 * i)%vlen;
|
||||
cntl0[i] = 0xff;
|
||||
cntl1[i] = 0xff * (i%2);
|
||||
cntl2[i] = 0xff * ((i>>1)%2);
|
||||
cntl3[i] = 0xff * ((i%4) == 3);
|
||||
}
|
||||
|
||||
printf("16s_permute_and_scalar_add_aligned\n");
|
||||
|
||||
start = clock();
|
||||
for(int i = 0; i < 100000; ++i) {
|
||||
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic");
|
||||
}
|
||||
end = clock();
|
||||
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
printf("generic_time: %f\n", total);
|
||||
|
||||
start = clock();
|
||||
for(int i = 0; i < 100000; ++i) {
|
||||
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
|
||||
}
|
||||
end = clock();
|
||||
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
|
||||
printf("sse2_time: %f\n", total);
|
||||
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
//printf("generic... %d, sse2... %d\n", target[i], target2[i]);
|
||||
}
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
|
||||
CPPUNIT_ASSERT(target[i] == target2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,18 @@
|
||||
#ifndef INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
|
||||
#define INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
|
||||
|
||||
#include <cppunit/extensions/HelperMacros.h>
|
||||
#include <cppunit/TestCase.h>
|
||||
|
||||
class qa_16s_permute_and_scalar_add_aligned16 : public CppUnit::TestCase {
|
||||
|
||||
CPPUNIT_TEST_SUITE (qa_16s_permute_and_scalar_add_aligned16);
|
||||
CPPUNIT_TEST (t1);
|
||||
CPPUNIT_TEST_SUITE_END ();
|
||||
|
||||
private:
|
||||
void t1 ();
|
||||
};
|
||||
|
||||
|
||||
#endif /* INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H */
|
@ -0,0 +1,60 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <qa_16s_quad_max_star_aligned16.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_16s_quad_max_star_aligned16.h>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
|
||||
//test for sse2
|
||||
|
||||
#ifndef LV_HAVE_SSE2
|
||||
|
||||
void qa_16s_quad_max_star_aligned16::t1() {
|
||||
printf("sse2 not available... no test performed\n");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void qa_16s_quad_max_star_aligned16::t1() {
|
||||
const int vlen = 34;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
short plus0 = (short) (rand() - (RAND_MAX/2));
|
||||
short plus1 = (short) (rand() - (RAND_MAX/2));
|
||||
short plus2 = (short) (rand() - (RAND_MAX/2));
|
||||
short plus3 = (short) (rand() - (RAND_MAX/2));
|
||||
|
||||
short minus0 = (short) (rand() - (RAND_MAX/2));
|
||||
short minus1 = (short) (rand() - (RAND_MAX/2));
|
||||
short minus2 = (short) (rand() - (RAND_MAX/2));
|
||||
short minus3 = (short) (rand() - (RAND_MAX/2));
|
||||
|
||||
input0[i] = plus0 - minus0;
|
||||
input1[i] = plus1 - minus1;
|
||||
input2[i] = plus2 - minus2;
|
||||
input3[i] = plus3 - minus3;
|
||||
}
|
||||
|
||||
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, input3, 2*vlen, "generic");
|
||||
|
||||
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, input3, 2*vlen, "sse2");
|
||||
|
||||
printf("16s_quad_max_star_aligned\n");
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]);
|
||||
}
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
|
||||
CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,18 @@
|
||||
#ifndef INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H
|
||||
#define INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H
|
||||
|
||||
#include <cppunit/extensions/HelperMacros.h>
|
||||
#include <cppunit/TestCase.h>
|
||||
|
||||
class qa_16s_quad_max_star_aligned16 : public CppUnit::TestCase {
|
||||
|
||||
CPPUNIT_TEST_SUITE (qa_16s_quad_max_star_aligned16);
|
||||
CPPUNIT_TEST (t1);
|
||||
CPPUNIT_TEST_SUITE_END ();
|
||||
|
||||
private:
|
||||
void t1 ();
|
||||
};
|
||||
|
||||
|
||||
#endif /* INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H */
|
@ -0,0 +1,61 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <qa_32f_fm_detect_aligned16.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_32f_fm_detect_aligned16.h>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
|
||||
//test for sse
|
||||
|
||||
#ifndef LV_HAVE_SSE
|
||||
|
||||
void qa_32f_fm_detect_aligned16::t1() {
|
||||
printf("sse not available... no test performed\n");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void qa_32f_fm_detect_aligned16::t1() {
|
||||
|
||||
volk_gnsssdr_environment_init();
|
||||
clock_t start, end;
|
||||
double total;
|
||||
const int vlen = 3201;
|
||||
const int ITERS = 10000;
|
||||
__VOLK_ATTR_ALIGNED(16) float input0[vlen];
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float output0[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) float output01[vlen];
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
|
||||
}
|
||||
printf("32f_fm_detect_aligned\n");
|
||||
|
||||
start = clock();
|
||||
float save = 0.1;
|
||||
for(int count = 0; count < ITERS; ++count) {
|
||||
volk_gnsssdr_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, "generic");
|
||||
}
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("generic_time: %f\n", total);
|
||||
start = clock();
|
||||
save = 0.1;
|
||||
for(int count = 0; count < ITERS; ++count) {
|
||||
volk_gnsssdr_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, "sse");
|
||||
}
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("sse_time: %f\n", total);
|
||||
for(int i = 0; i < 1; ++i) {
|
||||
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
|
||||
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
|
||||
}
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
//printf("%d...%d\n", output0[i], output01[i]);
|
||||
CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,18 @@
|
||||
#ifndef INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H
|
||||
#define INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H
|
||||
|
||||
#include <cppunit/extensions/HelperMacros.h>
|
||||
#include <cppunit/TestCase.h>
|
||||
|
||||
class qa_32f_fm_detect_aligned16 : public CppUnit::TestCase {
|
||||
|
||||
CPPUNIT_TEST_SUITE (qa_32f_fm_detect_aligned16);
|
||||
CPPUNIT_TEST (t1);
|
||||
CPPUNIT_TEST_SUITE_END ();
|
||||
|
||||
private:
|
||||
void t1 ();
|
||||
};
|
||||
|
||||
|
||||
#endif /* INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H */
|
@ -0,0 +1,103 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr_runtime.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <qa_32f_index_max_aligned16.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#define ERR_DELTA (1e-4)
|
||||
#define NUM_ITERS 1000000
|
||||
#define VEC_LEN 3097
|
||||
static float uniform() {
|
||||
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
|
||||
}
|
||||
|
||||
static void
|
||||
random_floats (float *buf, unsigned n)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
for (; i < n; i++) {
|
||||
|
||||
buf[i] = uniform () * 32767;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifndef LV_HAVE_SSE
|
||||
|
||||
void qa_32f_index_max_aligned16::t1(){
|
||||
printf("sse not available... no test performed\n");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
|
||||
void qa_32f_index_max_aligned16::t1(){
|
||||
|
||||
const int vlen = VEC_LEN;
|
||||
|
||||
|
||||
volk_gnsssdr_runtime_init();
|
||||
|
||||
volk_gnsssdr_environment_init();
|
||||
int ret;
|
||||
|
||||
unsigned int* target_sse4_1;
|
||||
unsigned int* target_sse;
|
||||
unsigned int* target_generic;
|
||||
float* src0 ;
|
||||
|
||||
|
||||
unsigned int i_target_sse4_1;
|
||||
target_sse4_1 = &i_target_sse4_1;
|
||||
unsigned int i_target_sse;
|
||||
target_sse = &i_target_sse;
|
||||
unsigned int i_target_generic;
|
||||
target_generic = &i_target_generic;
|
||||
|
||||
ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
|
||||
|
||||
random_floats((float*)src0, vlen);
|
||||
|
||||
printf("32f_index_max_aligned16\n");
|
||||
|
||||
clock_t start, end;
|
||||
double total;
|
||||
|
||||
|
||||
start = clock();
|
||||
for(int k = 0; k < NUM_ITERS; ++k) {
|
||||
volk_gnsssdr_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
|
||||
}
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("generic time: %f\n", total);
|
||||
|
||||
start = clock();
|
||||
for(int k = 0; k < NUM_ITERS; ++k) {
|
||||
volk_gnsssdr_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
|
||||
}
|
||||
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("sse time: %f\n", total);
|
||||
|
||||
start = clock();
|
||||
for(int k = 0; k < NUM_ITERS; ++k) {
|
||||
get_volk_gnsssdr_runtime()->volk_gnsssdr_32f_index_max_aligned16(target_sse4_1, src0, vlen);
|
||||
}
|
||||
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("sse4.1 time: %f\n", total);
|
||||
|
||||
|
||||
printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
|
||||
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
|
||||
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
|
||||
|
||||
free(src0);
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE3*/
|
@ -0,0 +1,18 @@
|
||||
#ifndef INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H
|
||||
#define INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H
|
||||
|
||||
#include <cppunit/extensions/HelperMacros.h>
|
||||
#include <cppunit/TestCase.h>
|
||||
|
||||
class qa_32f_index_max_aligned16 : public CppUnit::TestCase {
|
||||
|
||||
CPPUNIT_TEST_SUITE (qa_32f_index_max_aligned16);
|
||||
CPPUNIT_TEST (t1);
|
||||
CPPUNIT_TEST_SUITE_END ();
|
||||
|
||||
private:
|
||||
void t1 ();
|
||||
};
|
||||
|
||||
|
||||
#endif /* INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H */
|
@ -0,0 +1,89 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <qa_32fc_index_max_aligned16.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#define ERR_DELTA (1e-4)
|
||||
#define NUM_ITERS 1000000
|
||||
#define VEC_LEN 3096
|
||||
static float uniform() {
|
||||
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
|
||||
}
|
||||
|
||||
static void
|
||||
random_floats (float *buf, unsigned n)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
for (; i < n; i++) {
|
||||
|
||||
buf[i] = uniform () * 32767;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifndef LV_HAVE_SSE3
|
||||
|
||||
void qa_32fc_index_max_aligned16::t1(){
|
||||
printf("sse3 not available... no test performed\n");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
|
||||
void qa_32fc_index_max_aligned16::t1(){
|
||||
|
||||
const int vlen = VEC_LEN;
|
||||
|
||||
volk_gnsssdr_environment_init();
|
||||
int ret;
|
||||
|
||||
unsigned int* target;
|
||||
unsigned int* target_generic;
|
||||
std::complex<float>* src0 ;
|
||||
|
||||
|
||||
unsigned int i_target;
|
||||
target = &i_target;
|
||||
unsigned int i_target_generic;
|
||||
target_generic = &i_target_generic;
|
||||
ret = posix_memalign((void**)&src0, 16, vlen << 3);
|
||||
|
||||
random_floats((float*)src0, vlen * 2);
|
||||
|
||||
printf("32fc_index_max_aligned16\n");
|
||||
|
||||
clock_t start, end;
|
||||
double total;
|
||||
|
||||
|
||||
start = clock();
|
||||
for(int k = 0; k < NUM_ITERS; ++k) {
|
||||
volk_gnsssdr_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic");
|
||||
}
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("generic time: %f\n", total);
|
||||
|
||||
start = clock();
|
||||
for(int k = 0; k < NUM_ITERS; ++k) {
|
||||
volk_gnsssdr_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
|
||||
}
|
||||
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("sse3 time: %f\n", total);
|
||||
|
||||
|
||||
|
||||
|
||||
printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
|
||||
CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
|
||||
|
||||
|
||||
|
||||
free(src0);
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_SSE3*/
|
@ -0,0 +1,18 @@
|
||||
#ifndef INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H
|
||||
#define INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H
|
||||
|
||||
#include <cppunit/extensions/HelperMacros.h>
|
||||
#include <cppunit/TestCase.h>
|
||||
|
||||
class qa_32fc_index_max_aligned16 : public CppUnit::TestCase {
|
||||
|
||||
CPPUNIT_TEST_SUITE (qa_32fc_index_max_aligned16);
|
||||
CPPUNIT_TEST (t1);
|
||||
CPPUNIT_TEST_SUITE_END ();
|
||||
|
||||
private:
|
||||
void t1 ();
|
||||
};
|
||||
|
||||
|
||||
#endif /* INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H */
|
@ -0,0 +1,64 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <qa_32fc_power_spectral_density_32f_aligned16.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_32fc_power_spectral_density_32f_aligned16.h>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
|
||||
//test for sse3
|
||||
|
||||
#ifndef LV_HAVE_SSE3
|
||||
|
||||
void qa_32fc_power_spectral_density_32f_aligned16::t1() {
|
||||
printf("sse3 not available... no test performed\n");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void qa_32fc_power_spectral_density_32f_aligned16::t1() {
|
||||
|
||||
volk_gnsssdr_environment_init();
|
||||
clock_t start, end;
|
||||
double total;
|
||||
const int vlen = 3201;
|
||||
const int ITERS = 10000;
|
||||
__VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
|
||||
__VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
|
||||
|
||||
const float scalar = vlen;
|
||||
const float rbw = 1.7;
|
||||
|
||||
float* inputLoad = (float*)input0;
|
||||
for(int i = 0; i < 2*vlen; ++i) {
|
||||
inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
|
||||
}
|
||||
printf("32fc_power_spectral_density_32f_aligned\n");
|
||||
|
||||
start = clock();
|
||||
for(int count = 0; count < ITERS; ++count) {
|
||||
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_generic, input0, scalar, rbw, vlen, "generic");
|
||||
}
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("generic_time: %f\n", total);
|
||||
start = clock();
|
||||
for(int count = 0; count < ITERS; ++count) {
|
||||
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, scalar, rbw, vlen, "sse3");
|
||||
}
|
||||
end = clock();
|
||||
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
printf("sse3_time: %f\n", total);
|
||||
|
||||
for(int i = 0; i < 1; ++i) {
|
||||
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
|
||||
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
|
||||
}
|
||||
|
||||
for(int i = 0; i < vlen; ++i) {
|
||||
//printf("%d...%d\n", output0[i], output01[i]);
|
||||
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -0,0 +1,18 @@
|
||||
#ifndef INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
|
||||
#define INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
|
||||
|
||||
#include <cppunit/extensions/HelperMacros.h>
|
||||
#include <cppunit/TestCase.h>
|
||||
|
||||
class qa_32fc_power_spectral_density_32f_aligned16 : public CppUnit::TestCase {
|
||||
|
||||
CPPUNIT_TEST_SUITE (qa_32fc_power_spectral_density_32f_aligned16);
|
||||
CPPUNIT_TEST (t1);
|
||||
CPPUNIT_TEST_SUITE_END ();
|
||||
|
||||
private:
|
||||
void t1 ();
|
||||
};
|
||||
|
||||
|
||||
#endif /* INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H */
|
704
src/algorithms/libs/volk_gnsssdr/lib/qa_utils.cc
Normal file
704
src/algorithms/libs/volk_gnsssdr/lib/qa_utils.cc
Normal file
@ -0,0 +1,704 @@
|
||||
#include "qa_utils.h"
|
||||
#include <cstring>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/assign/list_of.hpp>
|
||||
#include <boost/tokenizer.hpp>
|
||||
#include <boost/xpressive/xpressive.hpp>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <ctime>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_cpu.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
|
||||
#include <boost/typeof/typeof.hpp>
|
||||
#include <boost/type_traits.hpp>
|
||||
#include <stdio.h>
|
||||
|
||||
float uniform() {
|
||||
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
|
||||
}
|
||||
|
||||
template <class t>
|
||||
void random_floats (t *buf, unsigned n)
|
||||
{
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
buf[i] = uniform ();
|
||||
}
|
||||
|
||||
void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n) {
|
||||
if(type.is_complex) n *= 2;
|
||||
if(type.is_float) {
|
||||
if(type.size == 8) random_floats<double>((double *)data, n);
|
||||
else random_floats<float>((float *)data, n);
|
||||
} else {
|
||||
float int_max = float(uint64_t(2) << (type.size*8));
|
||||
if(type.is_signed) int_max /= 2.0;
|
||||
for(unsigned int i=0; i<n; i++) {
|
||||
float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max;
|
||||
//man i really don't know how to do this in a more clever way, you have to cast down at some point
|
||||
switch(type.size) {
|
||||
case 8:
|
||||
if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
|
||||
else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
|
||||
break;
|
||||
case 4:
|
||||
if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
|
||||
else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
|
||||
break;
|
||||
case 2:
|
||||
if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand;
|
||||
else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
|
||||
break;
|
||||
case 1:
|
||||
if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
|
||||
else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
|
||||
break;
|
||||
default:
|
||||
throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::vector<std::string> get_arch_list(volk_gnsssdr_func_desc_t desc) {
|
||||
std::vector<std::string> archlist;
|
||||
|
||||
for(size_t i = 0; i < desc.n_impls; i++) {
|
||||
//if(!(archs[i+1] & volk_gnsssdr_get_lvarch())) continue; //this arch isn't available on this pc
|
||||
archlist.push_back(std::string(desc.impl_names[i]));
|
||||
}
|
||||
|
||||
return archlist;
|
||||
}
|
||||
|
||||
volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) {
|
||||
volk_gnsssdr_type_t type;
|
||||
type.is_float = false;
|
||||
type.is_scalar = false;
|
||||
type.is_complex = false;
|
||||
type.is_signed = false;
|
||||
type.size = 0;
|
||||
type.str = name;
|
||||
|
||||
if(name.size() < 2) throw std::string("name too short to be a datatype");
|
||||
|
||||
//is it a scalar?
|
||||
if(name[0] == 's') {
|
||||
type.is_scalar = true;
|
||||
name = name.substr(1, name.size()-1);
|
||||
}
|
||||
|
||||
//get the data size
|
||||
size_t last_size_pos = name.find_last_of("0123456789");
|
||||
if(last_size_pos == std::string::npos)
|
||||
throw std::string("no size spec in type ").append(name);
|
||||
//will throw if malformed
|
||||
int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1));
|
||||
|
||||
assert(((size % 8) == 0) && (size <= 64) && (size != 0));
|
||||
type.size = size/8; //in bytes
|
||||
|
||||
for(size_t i=last_size_pos+1; i < name.size(); i++) {
|
||||
switch (name[i]) {
|
||||
case 'f':
|
||||
type.is_float = true;
|
||||
break;
|
||||
case 'i':
|
||||
type.is_signed = true;
|
||||
break;
|
||||
case 'c':
|
||||
type.is_complex = true;
|
||||
break;
|
||||
case 'u':
|
||||
type.is_signed = false;
|
||||
break;
|
||||
default:
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
static void get_signatures_from_name(std::vector<volk_gnsssdr_type_t> &inputsig,
|
||||
std::vector<volk_gnsssdr_type_t> &outputsig,
|
||||
std::string name) {
|
||||
boost::char_separator<char> sep("_");
|
||||
boost::tokenizer<boost::char_separator<char> > tok(name, sep);
|
||||
std::vector<std::string> toked;
|
||||
tok.assign(name);
|
||||
toked.assign(tok.begin(), tok.end());
|
||||
assert(toked[0] == "volk");
|
||||
toked.erase(toked.begin());
|
||||
toked.erase(toked.begin());
|
||||
|
||||
//ok. we're assuming a string in the form
|
||||
//(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
|
||||
|
||||
enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT;
|
||||
std::string fn_name;
|
||||
volk_gnsssdr_type_t type;
|
||||
BOOST_FOREACH(std::string token, toked) {
|
||||
try {
|
||||
type = volk_gnsssdr_type_from_string(token);
|
||||
if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name...
|
||||
|
||||
if(side == SIDE_INPUT) inputsig.push_back(type);
|
||||
else outputsig.push_back(type);
|
||||
} catch (...){
|
||||
if(token[0] == 'x') { //it's a multiplier
|
||||
if(side == SIDE_INPUT) assert(inputsig.size() > 0);
|
||||
else assert(outputsig.size() > 0);
|
||||
int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid
|
||||
for(int i=1; i<multiplier; i++) {
|
||||
if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
|
||||
else outputsig.push_back(outputsig.back());
|
||||
}
|
||||
}
|
||||
else if(side == SIDE_INPUT) { //it's the function name, at least it better be
|
||||
side = SIDE_NAME;
|
||||
fn_name.append("_");
|
||||
fn_name.append(token);
|
||||
}
|
||||
else if(side == SIDE_OUTPUT) {
|
||||
if(token != toked.back()) throw; //the last token in the name is the alignment
|
||||
}
|
||||
}
|
||||
}
|
||||
//we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input!
|
||||
assert(inputsig.size() != 0);
|
||||
|
||||
}
|
||||
|
||||
inline void run_cast_test1(volk_gnsssdr_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test2(volk_gnsssdr_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test3(volk_gnsssdr_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test4(volk_gnsssdr_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test1_s32f(volk_gnsssdr_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test2_s32f(volk_gnsssdr_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test3_s32f(volk_gnsssdr_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test1_s32fc(volk_gnsssdr_fn_1arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test2_s32fc(volk_gnsssdr_fn_2arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test3_s32fc(volk_gnsssdr_fn_3arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
//ADDED BY GNSS-SDR. START
|
||||
inline void run_cast_test1_s8i(volk_gnsssdr_fn_1arg_s8i func, std::vector<void *> &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test2_s8i(volk_gnsssdr_fn_2arg_s8i func, std::vector<void *> &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test3_s8i(volk_gnsssdr_fn_3arg_s8i func, std::vector<void *> &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test1_s8ic(volk_gnsssdr_fn_1arg_s8ic func, std::vector<void *> &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test2_s8ic(volk_gnsssdr_fn_2arg_s8ic func, std::vector<void *> &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test3_s8ic(volk_gnsssdr_fn_3arg_s8ic func, std::vector<void *> &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test8(volk_gnsssdr_fn_8arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test8_s8i(volk_gnsssdr_fn_8arg_s8i func, std::vector<void *> &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test8_s8ic(volk_gnsssdr_fn_8arg_s8ic func, std::vector<void *> &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test8_s32f(volk_gnsssdr_fn_8arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test8_s32fc(volk_gnsssdr_fn_8arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test12(volk_gnsssdr_fn_12arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test12_s8i(volk_gnsssdr_fn_12arg_s8i func, std::vector<void *> &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test12_s8ic(volk_gnsssdr_fn_12arg_s8ic func, std::vector<void *> &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test12_s32f(volk_gnsssdr_fn_12arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test12_s32fc(volk_gnsssdr_fn_12arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
|
||||
}
|
||||
//ADDED BY GNSS-SDR. END
|
||||
|
||||
// This function is a nop that helps resolve GNU Radio bugs 582 and 583.
|
||||
// Without this the cast in run_volk_gnsssdr_tests for tol_i = static_cast<int>(float tol)
|
||||
// won't happen on armhf (reported on cortex A9 and A15).
|
||||
void lv_force_cast_hf( int tol_i, float tol_f)
|
||||
{
|
||||
int diff_i = 1;
|
||||
float diff_f = 1;
|
||||
if( diff_i > tol_i )
|
||||
std::cout << "" ;
|
||||
if( diff_f > tol_f )
|
||||
std::cout << "" ;
|
||||
}
|
||||
|
||||
template <class t>
|
||||
bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
|
||||
bool fail = false;
|
||||
int print_max_errs = 10;
|
||||
for(unsigned int i=0; i<vlen; i++) {
|
||||
// for very small numbers we'll see round off errors due to limited
|
||||
// precision. So a special test case...
|
||||
if(fabs(((t *)(in1))[i]) < 1e-30) {
|
||||
if( fabs( ((t *)(in2))[i] ) > tol )
|
||||
{
|
||||
fail=true;
|
||||
if(print_max_errs-- > 0) {
|
||||
std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
// the primary test is the percent different greater than given tol
|
||||
else if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
|
||||
fail=true;
|
||||
if(print_max_errs-- > 0) {
|
||||
std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
template <class t>
|
||||
bool ccompare(t *in1, t *in2, unsigned int vlen, float tol) {
|
||||
bool fail = false;
|
||||
int print_max_errs = 10;
|
||||
for(unsigned int i=0; i<2*vlen; i+=2) {
|
||||
t diff[2] = { in1[i] - in2[i], in1[i+1] - in2[i+1] };
|
||||
t err = std::sqrt(diff[0] * diff[0] + diff[1] * diff[1]);
|
||||
t norm = std::sqrt(in1[i] * in1[i] + in1[i+1] * in1[i+1]);
|
||||
|
||||
// for very small numbers we'll see round off errors due to limited
|
||||
// precision. So a special test case...
|
||||
if (norm < 1e-30) {
|
||||
if (err > tol)
|
||||
{
|
||||
fail=true;
|
||||
if(print_max_errs-- > 0) {
|
||||
std::cout << "offset " << i/2 << " in1: " << in1[i] << " + " << in1[i+1] << "j in2: " << in2[i] << " + " << in2[i+1] << "j" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
// the primary test is the percent different greater than given tol
|
||||
else if((err / norm) > tol) {
|
||||
fail=true;
|
||||
if(print_max_errs-- > 0) {
|
||||
std::cout << "offset " << i/2 << " in1: " << in1[i] << " + " << in1[i+1] << "j in2: " << in2[i] << " + " << in2[i+1] << "j" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
template <class t>
|
||||
bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
|
||||
bool fail = false;
|
||||
int print_max_errs = 10;
|
||||
for(unsigned int i=0; i<vlen; i++) {
|
||||
if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) {
|
||||
fail=true;
|
||||
if(print_max_errs-- > 0) {
|
||||
std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return fail;
|
||||
}
|
||||
|
||||
class volk_gnsssdr_qa_aligned_mem_pool{
|
||||
public:
|
||||
void *get_new(size_t size){
|
||||
size_t alignment = volk_gnsssdr_get_alignment();
|
||||
void* ptr = volk_gnsssdr_malloc(size, alignment);
|
||||
memset(ptr, 0x00, size);
|
||||
_mems.push_back(ptr);
|
||||
return ptr;
|
||||
}
|
||||
~volk_gnsssdr_qa_aligned_mem_pool() {
|
||||
for(unsigned int ii = 0; ii < _mems.size(); ++ii) {
|
||||
volk_gnsssdr_free(_mems[ii]);
|
||||
}
|
||||
}
|
||||
private: std::vector<void * > _mems;
|
||||
};
|
||||
|
||||
bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
void (*manual_func)(),
|
||||
std::string name,
|
||||
float tol,
|
||||
lv_32fc_t scalar,
|
||||
int vlen,
|
||||
int iter,
|
||||
std::vector<std::string> *best_arch_vector = 0,
|
||||
std::string puppet_master_name = "NULL",
|
||||
bool benchmark_mode,
|
||||
std::string kernel_regex
|
||||
) {
|
||||
boost::xpressive::sregex kernel_expression = boost::xpressive::sregex::compile(kernel_regex);
|
||||
if( !boost::xpressive::regex_search(name, kernel_expression) ) {
|
||||
// in this case we have a regex and are only looking to test one kernel
|
||||
return false;
|
||||
}
|
||||
std::cout << "RUN_VOLK_TESTS: " << name << "(" << vlen << "," << iter << ")" << std::endl;
|
||||
|
||||
// The multiply and lv_force_cast_hf are work arounds for GNU Radio bugs 582 and 583
|
||||
// The bug is the casting/assignment below do not happen, which results in false
|
||||
// positives when testing for errors in fcompare and icompare.
|
||||
// Since this only happens on armhf (reported for Cortex A9 and A15) combined with
|
||||
// the following fixes it is suspected to be a compiler bug.
|
||||
// Bug 1272024 on launchpad has been filed with Linaro GCC.
|
||||
const float tol_f = tol*1.0000001;
|
||||
const unsigned int tol_i = static_cast<const unsigned int>(tol);
|
||||
lv_force_cast_hf( tol_i, tol_f );
|
||||
|
||||
//first let's get a list of available architectures for the test
|
||||
std::vector<std::string> arch_list = get_arch_list(desc);
|
||||
|
||||
if((!benchmark_mode) && (arch_list.size() < 2)) {
|
||||
std::cout << "no architectures to test" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
//something that can hang onto memory and cleanup when this function exits
|
||||
volk_gnsssdr_qa_aligned_mem_pool mem_pool;
|
||||
|
||||
//now we have to get a function signature by parsing the name
|
||||
std::vector<volk_gnsssdr_type_t> inputsig, outputsig;
|
||||
get_signatures_from_name(inputsig, outputsig, name);
|
||||
|
||||
//pull the input scalars into their own vector
|
||||
std::vector<volk_gnsssdr_type_t> inputsc;
|
||||
for(size_t i=0; i<inputsig.size(); i++) {
|
||||
if(inputsig[i].is_scalar) {
|
||||
inputsc.push_back(inputsig[i]);
|
||||
inputsig.erase(inputsig.begin() + i);
|
||||
i -= 1;
|
||||
}
|
||||
}
|
||||
//for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
|
||||
//for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
|
||||
std::vector<void *> inbuffs;
|
||||
BOOST_FOREACH(volk_gnsssdr_type_t sig, inputsig) {
|
||||
if(!sig.is_scalar) //we don't make buffers for scalars
|
||||
inbuffs.push_back(mem_pool.get_new(vlen*sig.size*(sig.is_complex ? 2 : 1)));
|
||||
}
|
||||
for(size_t i=0; i<inbuffs.size(); i++) {
|
||||
load_random_data(inbuffs[i], inputsig[i], vlen);
|
||||
}
|
||||
|
||||
//ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
|
||||
std::vector<std::vector<void *> > test_data;
|
||||
for(size_t i=0; i<arch_list.size(); i++) {
|
||||
std::vector<void *> arch_buffs;
|
||||
for(size_t j=0; j<outputsig.size(); j++) {
|
||||
arch_buffs.push_back(mem_pool.get_new(vlen*outputsig[j].size*(outputsig[j].is_complex ? 2 : 1)));
|
||||
}
|
||||
for(size_t j=0; j<inputsig.size(); j++) {
|
||||
arch_buffs.push_back(inbuffs[j]);
|
||||
}
|
||||
test_data.push_back(arch_buffs);
|
||||
}
|
||||
|
||||
std::vector<volk_gnsssdr_type_t> both_sigs;
|
||||
both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end());
|
||||
both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end());
|
||||
|
||||
//now run the test
|
||||
clock_t start, end;
|
||||
std::vector<double> profile_times;
|
||||
for(size_t i = 0; i < arch_list.size(); i++) {
|
||||
start = clock();
|
||||
|
||||
switch(both_sigs.size()) {
|
||||
case 1:
|
||||
if(inputsc.size() == 0) {
|
||||
run_cast_test1((volk_gnsssdr_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
|
||||
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
|
||||
if(inputsc[0].is_complex) {
|
||||
run_cast_test1_s32fc((volk_gnsssdr_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
|
||||
} else {
|
||||
run_cast_test1_s32f((volk_gnsssdr_fn_1arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
|
||||
}
|
||||
}
|
||||
//ADDED BY GNSS-SDR. START
|
||||
else if(inputsc.size() == 1 && !inputsc[0].is_float) {
|
||||
if(inputsc[0].is_complex) {
|
||||
run_cast_test1_s8ic((volk_gnsssdr_fn_1arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
|
||||
} else {
|
||||
run_cast_test1_s8i((volk_gnsssdr_fn_1arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
|
||||
}
|
||||
}
|
||||
//ADDED BY GNSS-SDR. END
|
||||
else throw "unsupported 1 arg function >1 scalars";
|
||||
break;
|
||||
case 2:
|
||||
if(inputsc.size() == 0) {
|
||||
run_cast_test2((volk_gnsssdr_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
|
||||
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
|
||||
if(inputsc[0].is_complex) {
|
||||
run_cast_test2_s32fc((volk_gnsssdr_fn_2arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
|
||||
} else {
|
||||
run_cast_test2_s32f((volk_gnsssdr_fn_2arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
|
||||
}
|
||||
}
|
||||
//ADDED BY GNSS-SDR. START
|
||||
else if(inputsc.size() == 1 && !inputsc[0].is_float) {
|
||||
if(inputsc[0].is_complex) {
|
||||
run_cast_test2_s8ic((volk_gnsssdr_fn_2arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
|
||||
} else {
|
||||
run_cast_test2_s8i((volk_gnsssdr_fn_2arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
|
||||
}
|
||||
}
|
||||
//ADDED BY GNSS-SDR. END
|
||||
else throw "unsupported 2 arg function >1 scalars";
|
||||
break;
|
||||
case 3:
|
||||
if(inputsc.size() == 0) {
|
||||
run_cast_test3((volk_gnsssdr_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
|
||||
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
|
||||
if(inputsc[0].is_complex) {
|
||||
run_cast_test3_s32fc((volk_gnsssdr_fn_3arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
|
||||
} else {
|
||||
run_cast_test3_s32f((volk_gnsssdr_fn_3arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
|
||||
}
|
||||
}
|
||||
//ADDED BY GNSS-SDR. START
|
||||
else if(inputsc.size() == 1 && !inputsc[0].is_float) {
|
||||
if(inputsc[0].is_complex) {
|
||||
run_cast_test3_s8ic((volk_gnsssdr_fn_3arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
|
||||
} else {
|
||||
run_cast_test3_s8i((volk_gnsssdr_fn_3arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
|
||||
}
|
||||
}
|
||||
//ADDED BY GNSS-SDR. END
|
||||
else throw "unsupported 3 arg function >1 scalars";
|
||||
break;
|
||||
case 4:
|
||||
run_cast_test4((volk_gnsssdr_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
|
||||
break;
|
||||
//ADDED BY GNSS-SDR. START
|
||||
case 8:
|
||||
if(inputsc.size() == 0) {
|
||||
run_cast_test8((volk_gnsssdr_fn_8arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
|
||||
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
|
||||
if(inputsc[0].is_complex) {
|
||||
run_cast_test8_s32fc((volk_gnsssdr_fn_8arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
|
||||
} else {
|
||||
run_cast_test8_s32f((volk_gnsssdr_fn_8arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
|
||||
}
|
||||
}
|
||||
else if(inputsc.size() == 1 && !inputsc[0].is_float) {
|
||||
if(inputsc[0].is_complex) {
|
||||
run_cast_test8_s8ic((volk_gnsssdr_fn_8arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
|
||||
} else {
|
||||
run_cast_test8_s8i((volk_gnsssdr_fn_8arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
|
||||
}
|
||||
}
|
||||
else throw "unsupported 8 arg function >1 scalars";
|
||||
break;
|
||||
case 12:
|
||||
if(inputsc.size() == 0) {
|
||||
run_cast_test12((volk_gnsssdr_fn_12arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
|
||||
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
|
||||
if(inputsc[0].is_complex) {
|
||||
run_cast_test12_s32fc((volk_gnsssdr_fn_12arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
|
||||
} else {
|
||||
run_cast_test12_s32f((volk_gnsssdr_fn_12arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
|
||||
}
|
||||
}
|
||||
else if(inputsc.size() == 1 && !inputsc[0].is_float) {
|
||||
if(inputsc[0].is_complex) {
|
||||
run_cast_test12_s8ic((volk_gnsssdr_fn_12arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
|
||||
} else {
|
||||
run_cast_test12_s8i((volk_gnsssdr_fn_12arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
|
||||
}
|
||||
}
|
||||
else throw "unsupported 12 arg function >1 scalars";
|
||||
break;
|
||||
//ADDED BY GNSS-SDR. END
|
||||
default:
|
||||
throw "no function handler for this signature";
|
||||
break;
|
||||
}
|
||||
|
||||
end = clock();
|
||||
double arch_time = 1000.0 * (double)(end-start)/(double)CLOCKS_PER_SEC;
|
||||
std::cout << arch_list[i] << " completed in " << arch_time << "ms" << std::endl;
|
||||
|
||||
profile_times.push_back(arch_time);
|
||||
}
|
||||
|
||||
//and now compare each output to the generic output
|
||||
//first we have to know which output is the generic one, they aren't in order...
|
||||
size_t generic_offset=0;
|
||||
for(size_t i=0; i<arch_list.size(); i++)
|
||||
if(arch_list[i] == "generic") generic_offset=i;
|
||||
|
||||
//now compare
|
||||
//if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
|
||||
|
||||
bool fail = false;
|
||||
bool fail_global = false;
|
||||
std::vector<bool> arch_results;
|
||||
for(size_t i=0; i<arch_list.size(); i++) {
|
||||
fail = false;
|
||||
if(i != generic_offset) {
|
||||
for(size_t j=0; j<both_sigs.size(); j++) {
|
||||
if(both_sigs[j].is_float) {
|
||||
if(both_sigs[j].size == 8) {
|
||||
if (both_sigs[j].is_complex) {
|
||||
fail = ccompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen, tol_f);
|
||||
|
||||
} else {
|
||||
fail = fcompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen, tol_f);
|
||||
}
|
||||
} else {
|
||||
if (both_sigs[j].is_complex) {
|
||||
fail = ccompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen, tol_f);
|
||||
} else {
|
||||
fail = fcompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen, tol_f);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
|
||||
switch(both_sigs[j].size) {
|
||||
case 8:
|
||||
if(both_sigs[j].is_signed) {
|
||||
fail = icompare((int64_t *) test_data[generic_offset][j], (int64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
|
||||
} else {
|
||||
fail = icompare((uint64_t *) test_data[generic_offset][j], (uint64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if(both_sigs[j].is_signed) {
|
||||
fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
|
||||
} else {
|
||||
fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if(both_sigs[j].is_signed) {
|
||||
fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
|
||||
} else {
|
||||
fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if(both_sigs[j].is_signed) {
|
||||
fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
|
||||
} else {
|
||||
fail = icompare((uint8_t *) test_data[generic_offset][j], (uint8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
fail=1;
|
||||
}
|
||||
}
|
||||
if(fail) {
|
||||
fail_global = true;
|
||||
std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
|
||||
}
|
||||
//fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
|
||||
}
|
||||
}
|
||||
arch_results.push_back(!fail);
|
||||
}
|
||||
|
||||
double best_time_a = std::numeric_limits<double>::max();
|
||||
double best_time_u = std::numeric_limits<double>::max();
|
||||
std::string best_arch_a = "generic";
|
||||
std::string best_arch_u = "generic";
|
||||
for(size_t i=0; i < arch_list.size(); i++)
|
||||
{
|
||||
if((profile_times[i] < best_time_u) && arch_results[i] && desc.impl_alignment[i] == 0)
|
||||
{
|
||||
best_time_u = profile_times[i];
|
||||
best_arch_u = arch_list[i];
|
||||
}
|
||||
if((profile_times[i] < best_time_a) && arch_results[i])
|
||||
{
|
||||
best_time_a = profile_times[i];
|
||||
best_arch_a = arch_list[i];
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Best aligned arch: " << best_arch_a << std::endl;
|
||||
std::cout << "Best unaligned arch: " << best_arch_u << std::endl;
|
||||
if(best_arch_vector) {
|
||||
if(puppet_master_name == "NULL") {
|
||||
best_arch_vector->push_back(name + " " + best_arch_a + " " + best_arch_u);
|
||||
}
|
||||
else {
|
||||
best_arch_vector->push_back(puppet_master_name + " " + best_arch_a + " " + best_arch_u);
|
||||
}
|
||||
}
|
||||
|
||||
return fail_global;
|
||||
}
|
||||
|
||||
|
62
src/algorithms/libs/volk_gnsssdr/lib/qa_utils.h
Normal file
62
src/algorithms/libs/volk_gnsssdr/lib/qa_utils.h
Normal file
@ -0,0 +1,62 @@
|
||||
#ifndef VOLK_QA_UTILS_H
|
||||
#define VOLK_QA_UTILS_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
|
||||
struct volk_gnsssdr_type_t {
|
||||
bool is_float;
|
||||
bool is_scalar;
|
||||
bool is_signed;
|
||||
bool is_complex;
|
||||
int size;
|
||||
std::string str;
|
||||
};
|
||||
|
||||
volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string);
|
||||
|
||||
float uniform(void);
|
||||
void random_floats(float *buf, unsigned n);
|
||||
|
||||
bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t, void(*)(), std::string, float, lv_32fc_t, int, int, std::vector<std::string> *, std::string, bool benchmark_mode=false, std::string kernel_regex="");
|
||||
|
||||
|
||||
#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0, "NULL"), 0); }
|
||||
#define VOLK_PROFILE(func, tol, scalar, len, iter, results, bnmode, kernel_regex) run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results, "NULL", bnmode, kernel_regex)
|
||||
#define VOLK_PUPPET_PROFILE(func, puppet_master_func, tol, scalar, len, iter, results, bnmode, kernel_regex) run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results, std::string(#puppet_master_func), bnmode, kernel_regex)
|
||||
typedef void (*volk_gnsssdr_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
|
||||
typedef void (*volk_gnsssdr_fn_2arg)(void *, void *, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_3arg)(void *, void *, void *, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input
|
||||
typedef void (*volk_gnsssdr_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_1arg_s32fc)(void *, lv_32fc_t, unsigned int, const char*); //one input vector, one scalar float input
|
||||
typedef void (*volk_gnsssdr_fn_2arg_s32fc)(void *, void *, lv_32fc_t, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_3arg_s32fc)(void *, void *, void *, lv_32fc_t, unsigned int, const char*);
|
||||
|
||||
//ADDED BY GNSS-SDR. START
|
||||
typedef void (*volk_gnsssdr_fn_1arg_s8i)(void *, char, unsigned int, const char*); //one input vector, one scalar char input
|
||||
typedef void (*volk_gnsssdr_fn_2arg_s8i)(void *, void *, char, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_3arg_s8i)(void *, void *, void *, char, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_1arg_s8ic)(void *, lv_8sc_t, unsigned int, const char*); //one input vector, one scalar lv_8sc_t vector input
|
||||
typedef void (*volk_gnsssdr_fn_2arg_s8ic)(void *, void *, lv_8sc_t, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_3arg_s8ic)(void *, void *, void *, lv_8sc_t, unsigned int, const char*);
|
||||
|
||||
typedef void (*volk_gnsssdr_fn_8arg)(void *, void *, void *, void *, void *, void *, void *, void *, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_8arg_s32f)(void *, void *, void *, void *, void *, void *, void *, void *, float, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_8arg_s32fc)(void *, void *, void *, void *, void *, void *, void *, void *, lv_32fc_t, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_8arg_s8i)(void *, void *, void *, void *, void *, void *, void *, void *, char, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_8arg_s8ic)(void *, void *, void *, void *, void *, void *, void *, void *, lv_8sc_t, unsigned int, const char*);
|
||||
|
||||
typedef void (*volk_gnsssdr_fn_12arg)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_12arg_s32f)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, float, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_12arg_s32fc)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, lv_32fc_t, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_12arg_s8i)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, char, unsigned int, const char*);
|
||||
typedef void (*volk_gnsssdr_fn_12arg_s8ic)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, lv_8sc_t, unsigned int, const char*);
|
||||
//ADDED BY GNSS-SDR. END
|
||||
|
||||
#endif //VOLK_QA_UTILS_H
|
67
src/algorithms/libs/volk_gnsssdr/lib/testqa.cc
Normal file
67
src/algorithms/libs/volk_gnsssdr/lib/testqa.cc
Normal file
@ -0,0 +1,67 @@
|
||||
/* -*- c++ -*- */
|
||||
/*
|
||||
* Copyright 2012-2014 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "qa_utils.h"
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
//GNSS-SDR PROTO-KERNELS
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_32fc_x2_multiply_32fc, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_32fc_x2_dot_prod_32fc, 1e-4, 0, 204603, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204603, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_32fc_conjugate_32fc, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_32f_x2_add_32f, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_32f_index_max_16u, 3, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8i_index_max_16u, 3, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_32f_accumulator_s32f, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_32fc_magnitude_squared_32f, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 20462, 1);
|
||||
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 20462, 1);
|
||||
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 20462, 1);
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 20462, 1);
|
||||
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_16i_s32f_convert_32f, 1e-4, 32768.0, 20462, 1);
|
||||
|
||||
VOLK_RUN_TESTS(volk_gnsssdr_8i_max_s8i, 3, 0, 20462, 1);
|
||||
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_16i_max_star_16i, 0, 0, 20462, 10000);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 20462, 10000);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 1000);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 1000);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_16i_32fc_dot_prod_32fc, 1e-4, 0, 204602, 1);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 10000);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_32u_popcnt, 0, 0, 2046, 10000);
|
||||
//VOLK_RUN_TESTS(volk_gnsssdr_64u_popcnt, 0, 0, 2046, 10000);
|
142
src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_malloc.c
Normal file
142
src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_malloc.c
Normal file
@ -0,0 +1,142 @@
|
||||
/* -*- c -*- */
|
||||
/*
|
||||
* Copyright 2014 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/*
|
||||
* For #defines used to determine support for allocation functions,
|
||||
* see: http://linux.die.net/man/3/aligned_alloc
|
||||
*/
|
||||
|
||||
// Disabling use of aligned_alloc. This function requires that size be
|
||||
// a multiple of alignment, which is too restrictive for many uses of
|
||||
// VOLK.
|
||||
|
||||
//// If we are using C11 standard, use the aligned_alloc
|
||||
//#ifdef _ISOC11_SOURCE
|
||||
//
|
||||
//void *volk_gnsssdr_malloc(size_t size, size_t alignment)
|
||||
//{
|
||||
// void *ptr = aligned_alloc(alignment, size);
|
||||
// if(ptr == NULL) {
|
||||
// fprintf(stderr, "VOLK: Error allocating memory (aligned_alloc)\n");
|
||||
// }
|
||||
// return ptr;
|
||||
//}
|
||||
//
|
||||
//void volk_gnsssdr_free(void *ptr)
|
||||
//{
|
||||
// free(ptr);
|
||||
//}
|
||||
//
|
||||
//#else // _ISOC11_SOURCE
|
||||
|
||||
// Otherwise, test if we are a POSIX or X/Open system
|
||||
// This only has a restriction that alignment be a power of 2.
|
||||
#if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN
|
||||
|
||||
void *volk_gnsssdr_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
void *ptr;
|
||||
int err = posix_memalign(&ptr, alignment, size);
|
||||
if(err == 0) {
|
||||
return ptr;
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "VOLK: Error allocating memory (posix_memalign: %d)\n", err);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void volk_gnsssdr_free(void *ptr)
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
// _aligned_malloc has no restriction on size,
|
||||
// available on Windows since Visual C++ 2005
|
||||
#elif _MSC_VER >= 1400
|
||||
|
||||
void *volk_gnsssdr_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
void *ptr = _aligned_malloc(size, alignment);
|
||||
if(ptr == NULL) {
|
||||
fprintf(stderr, "VOLK: Error allocating memory (_aligned_malloc)\n");
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void volk_gnsssdr_free(void *ptr)
|
||||
{
|
||||
_aligned_free(ptr);
|
||||
}
|
||||
|
||||
// No standard handlers; we'll do it ourselves.
|
||||
#else // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN
|
||||
|
||||
struct block_info
|
||||
{
|
||||
void *real;
|
||||
};
|
||||
|
||||
void *
|
||||
volk_gnsssdr_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
void *real, *user;
|
||||
struct block_info *info;
|
||||
|
||||
/* At least align to sizeof our struct */
|
||||
if (alignment < sizeof(struct block_info))
|
||||
alignment = sizeof(struct block_info);
|
||||
|
||||
/* Alloc */
|
||||
real = malloc(size + (2 * alignment - 1));
|
||||
|
||||
/* Get pointer to the various zones */
|
||||
user = (void *)((((uintptr_t) real) + sizeof(struct block_info) + alignment - 1) & ~(alignment - 1));
|
||||
info = (struct block_info *)(((uintptr_t)user) - sizeof(struct block_info));
|
||||
|
||||
/* Store the info for the free */
|
||||
info->real = real;
|
||||
|
||||
/* Return pointer to user */
|
||||
return user;
|
||||
}
|
||||
|
||||
void
|
||||
volk_gnsssdr_free(void *ptr)
|
||||
{
|
||||
struct block_info *info;
|
||||
|
||||
/* Get the real pointer */
|
||||
info = (struct block_info *)(((uintptr_t)ptr) - sizeof(struct block_info));
|
||||
|
||||
/* Release real pointer */
|
||||
free(info->real);
|
||||
}
|
||||
|
||||
#endif // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN
|
||||
|
||||
//#endif // _ISOC11_SOURCE
|
50
src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_prefs.c
Normal file
50
src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_prefs.c
Normal file
@ -0,0 +1,50 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_prefs.h>
|
||||
|
||||
//#if defined(_WIN32)
|
||||
//#include <Windows.h>
|
||||
//#endif
|
||||
|
||||
void volk_gnsssdr_get_config_path(char *path)
|
||||
{
|
||||
const char *suffix = "/.volk_gnsssdr/volk_gnsssdr_config";
|
||||
char *home = NULL;
|
||||
if (home == NULL) home = getenv("HOME");
|
||||
if (home == NULL) home = getenv("APPDATA");
|
||||
if (home == NULL){
|
||||
path = NULL;
|
||||
return;
|
||||
}
|
||||
strcpy(path, home);
|
||||
strcat(path, suffix);
|
||||
}
|
||||
|
||||
size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **prefs_res)
|
||||
{
|
||||
FILE *config_file;
|
||||
char path[512], line[512];
|
||||
size_t n_arch_prefs = 0;
|
||||
volk_gnsssdr_arch_pref_t *prefs = NULL;
|
||||
|
||||
//get the config path
|
||||
volk_gnsssdr_get_config_path(path);
|
||||
if (path == NULL) return n_arch_prefs; //no prefs found
|
||||
config_file = fopen(path, "r");
|
||||
if(!config_file) return n_arch_prefs; //no prefs found
|
||||
|
||||
//reset the file pointer and write the prefs into volk_gnsssdr_arch_prefs
|
||||
while(fgets(line, sizeof(line), config_file) != NULL)
|
||||
{
|
||||
prefs = (volk_gnsssdr_arch_pref_t *) realloc(prefs, (n_arch_prefs+1) * sizeof(*prefs));
|
||||
volk_gnsssdr_arch_pref_t *p = prefs + n_arch_prefs;
|
||||
if(sscanf(line, "%s %s %s", p->name, p->impl_a, p->impl_u) == 3 && !strncmp(p->name, "volk_gnsssdr_", 5))
|
||||
{
|
||||
n_arch_prefs++;
|
||||
}
|
||||
}
|
||||
fclose(config_file);
|
||||
*prefs_res = prefs;
|
||||
return n_arch_prefs;
|
||||
}
|
119
src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.c
Normal file
119
src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.c
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright 2011-2012 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <volk_gnsssdr_rank_archs.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_prefs.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4
|
||||
#define __popcnt __builtin_popcount
|
||||
#else
|
||||
inline unsigned __popcnt(unsigned num)
|
||||
{
|
||||
unsigned pop = 0;
|
||||
while(num)
|
||||
{
|
||||
if (num & 0x1) pop++;
|
||||
num >>= 1;
|
||||
}
|
||||
return pop;
|
||||
}
|
||||
#endif
|
||||
|
||||
int volk_gnsssdr_get_index(
|
||||
const char *impl_names[], //list of implementations by name
|
||||
const size_t n_impls, //number of implementations available
|
||||
const char *impl_name //the implementation name to find
|
||||
){
|
||||
unsigned int i;
|
||||
for (i = 0; i < n_impls; i++) {
|
||||
if(!strncmp(impl_names[i], impl_name, 20)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
//TODO return -1;
|
||||
//something terrible should happen here
|
||||
printf("Volk warning: no arch found, returning generic impl\n");
|
||||
return volk_gnsssdr_get_index(impl_names, n_impls, "generic"); //but we'll fake it for now
|
||||
}
|
||||
|
||||
int volk_gnsssdr_rank_archs(
|
||||
const char *kern_name, //name of the kernel to rank
|
||||
const char *impl_names[], //list of implementations by name
|
||||
const int* impl_deps, //requirement mask per implementation
|
||||
const bool* alignment, //alignment status of each implementation
|
||||
size_t n_impls, //number of implementations available
|
||||
const bool align //if false, filter aligned implementations
|
||||
){
|
||||
size_t i;
|
||||
static volk_gnsssdr_arch_pref_t *volk_gnsssdr_arch_prefs;
|
||||
static size_t n_arch_prefs = 0;
|
||||
static int prefs_loaded = 0;
|
||||
if(!prefs_loaded) {
|
||||
n_arch_prefs = volk_gnsssdr_load_preferences(&volk_gnsssdr_arch_prefs);
|
||||
prefs_loaded = 1;
|
||||
}
|
||||
|
||||
// If we've defined VOLK_GENERIC to be anything, always return the
|
||||
// 'generic' kernel. Used in GR's QA code.
|
||||
char *gen_env = getenv("VOLK_GENERIC");
|
||||
if(gen_env) {
|
||||
return volk_gnsssdr_get_index(impl_names, n_impls, "generic");
|
||||
}
|
||||
|
||||
//now look for the function name in the prefs list
|
||||
for(i = 0; i < n_arch_prefs; i++)
|
||||
{
|
||||
if(!strncmp(kern_name, volk_gnsssdr_arch_prefs[i].name, sizeof(volk_gnsssdr_arch_prefs[i].name))) //found it
|
||||
{
|
||||
const char *impl_name = align? volk_gnsssdr_arch_prefs[i].impl_a : volk_gnsssdr_arch_prefs[i].impl_u;
|
||||
return volk_gnsssdr_get_index(impl_names, n_impls, impl_name);
|
||||
}
|
||||
}
|
||||
|
||||
//return the best index with the largest deps
|
||||
size_t best_index_a = 0;
|
||||
size_t best_index_u = 0;
|
||||
int best_value_a = -1;
|
||||
int best_value_u = -1;
|
||||
for(i = 0; i < n_impls; i++)
|
||||
{
|
||||
const signed val = __popcnt(impl_deps[i]);
|
||||
if (alignment[i] && val > best_value_a)
|
||||
{
|
||||
best_index_a = i;
|
||||
best_value_a = val;
|
||||
}
|
||||
if (!alignment[i] && val > best_value_u)
|
||||
{
|
||||
best_index_u = i;
|
||||
best_value_u = val;
|
||||
}
|
||||
}
|
||||
|
||||
//when align and we found a best aligned, use it
|
||||
if (align && best_value_a != -1) return best_index_a;
|
||||
|
||||
//otherwise return the best unaligned
|
||||
return best_index_u;
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright 2011-2012 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_VOLK_RANK_ARCHS_H
|
||||
#define INCLUDED_VOLK_RANK_ARCHS_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int volk_gnsssdr_get_index(
|
||||
const char *impl_names[], //list of implementations by name
|
||||
const size_t n_impls, //number of implementations available
|
||||
const char *impl_name //the implementation name to find
|
||||
);
|
||||
|
||||
int volk_gnsssdr_rank_archs(
|
||||
const char *kern_name, //name of the kernel to rank
|
||||
const char *impl_names[], //list of implementations by name
|
||||
const int* impl_deps, //requirement mask per implementation
|
||||
const bool* alignment, //alignment status of each implementation
|
||||
size_t n_impls, //number of implementations available
|
||||
const bool align //if false, filter aligned implementations
|
||||
);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /*INCLUDED_VOLK_RANK_ARCHS_H*/
|
@ -0,0 +1,25 @@
|
||||
.function volk_gnsssdr_16ic_magnitude_32f_a_orc_impl
|
||||
.source 4 src
|
||||
.dest 4 dst
|
||||
.floatparam 4 scalar
|
||||
.temp 4 reall
|
||||
.temp 4 imagl
|
||||
.temp 2 reals
|
||||
.temp 2 imags
|
||||
.temp 4 realf
|
||||
.temp 4 imagf
|
||||
.temp 4 sumf
|
||||
|
||||
|
||||
|
||||
splitlw reals, imags, src
|
||||
convswl reall, reals
|
||||
convswl imagl, imags
|
||||
convlf realf, reall
|
||||
convlf imagf, imagl
|
||||
divf realf, realf, scalar
|
||||
divf imagf, imagf, scalar
|
||||
mulf realf, realf, realf
|
||||
mulf imagf, imagf, imagf
|
||||
addf sumf, realf, imagf
|
||||
sqrtf dst, sumf
|
@ -0,0 +1,5 @@
|
||||
.function volk_gnsssdr_32f_x2_add_32f_a_orc_impl
|
||||
.dest 4 dst
|
||||
.source 4 src1
|
||||
.source 4 src2
|
||||
addf dst, src1, src2
|
@ -0,0 +1,18 @@
|
||||
.function volk_gnsssdr_32fc_s32fc_multiply_32fc_a_orc_impl
|
||||
.source 8 src1
|
||||
.floatparam 8 scalar
|
||||
.dest 8 dst
|
||||
.temp 8 iqprod
|
||||
.temp 4 real
|
||||
.temp 4 imag
|
||||
.temp 4 ac
|
||||
.temp 4 bd
|
||||
.temp 8 swapped
|
||||
x2 mulf iqprod, src1, scalar
|
||||
splitql bd, ac, iqprod
|
||||
subf real, ac, bd
|
||||
swaplq swapped, src1
|
||||
x2 mulf iqprod, swapped, scalar
|
||||
splitql bd, ac, iqprod
|
||||
addf imag, ac, bd
|
||||
mergelq dst, real, imag
|
@ -0,0 +1,18 @@
|
||||
.function volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl
|
||||
.source 8 src1
|
||||
.source 8 src2
|
||||
.dest 8 dst
|
||||
.temp 8 iqprod
|
||||
.temp 4 real
|
||||
.temp 4 imag
|
||||
.temp 4 ac
|
||||
.temp 4 bd
|
||||
.temp 8 swapped
|
||||
x2 mulf iqprod, src1, src2
|
||||
splitql bd, ac, iqprod
|
||||
subf real, ac, bd
|
||||
swaplq swapped, src1
|
||||
x2 mulf iqprod, swapped, src2
|
||||
splitql bd, ac, iqprod
|
||||
addf imag, ac, bd
|
||||
mergelq dst, real, imag
|
@ -0,0 +1,40 @@
|
||||
#/*!
|
||||
# * \file volk_gnsssdr_8i_accumulator_s8i.orc
|
||||
# * \brief ORC implementation: 8 bits (char) scalar accumulator
|
||||
# * \authors <ul>
|
||||
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
# * </ul>
|
||||
# *
|
||||
# * ORC code that implements an accumulator of char values
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# *
|
||||
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
# *
|
||||
# * GNSS-SDR is a software defined Global Navigation
|
||||
# * Satellite Systems receiver
|
||||
# *
|
||||
# * This file is part of GNSS-SDR.
|
||||
# *
|
||||
# * GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
# * it under the terms of the GNU General Public License as published by
|
||||
# * the Free Software Foundation, either version 3 of the License, or
|
||||
# * at your option) any later version.
|
||||
# *
|
||||
# * GNSS-SDR is distributed in the hope that it will be useful,
|
||||
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# * GNU General Public License for more details.
|
||||
# *
|
||||
# * You should have received a copy of the GNU General Public License
|
||||
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# */
|
||||
|
||||
.function volk_gnsssdr_8i_accumulator_s8i_a_orc_impl
|
||||
.source 1 src1
|
||||
.accumulator 2 acc
|
||||
.temp 2 sum
|
||||
mergebw sum, 0, src1
|
||||
accw acc, sum
|
@ -0,0 +1,39 @@
|
||||
#/*!
|
||||
# * \file volk_gnsssdr_8i_x2_add_8i.orc
|
||||
# * \brief ORC implementation: adds pairs of 8 bits (char) scalars
|
||||
# * \authors <ul>
|
||||
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
# * </ul>
|
||||
# *
|
||||
# * ORC code that adds pairs of 8 bits (char) scalars
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# *
|
||||
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
# *
|
||||
# * GNSS-SDR is a software defined Global Navigation
|
||||
# * Satellite Systems receiver
|
||||
# *
|
||||
# * This file is part of GNSS-SDR.
|
||||
# *
|
||||
# * GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
# * it under the terms of the GNU General Public License as published by
|
||||
# * the Free Software Foundation, either version 3 of the License, or
|
||||
# * at your option) any later version.
|
||||
# *
|
||||
# * GNSS-SDR is distributed in the hope that it will be useful,
|
||||
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# * GNU General Public License for more details.
|
||||
# *
|
||||
# * You should have received a copy of the GNU General Public License
|
||||
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# */
|
||||
|
||||
.function volk_gnsssdr_8i_x2_add_8i_a_orc_impl
|
||||
.dest 1 dst
|
||||
.source 1 src1
|
||||
.source 1 src2
|
||||
addb dst, src1, src2
|
@ -0,0 +1,42 @@
|
||||
#/*!
|
||||
# * \file volk_gnsssdr_8ic_conjugate_8ic.orc
|
||||
# * \brief ORC implementation: calculates the conjugate of a 16 bits vector
|
||||
# * \authors <ul>
|
||||
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
# * </ul>
|
||||
# *
|
||||
# * ORC code that calculates the conjugate of a
|
||||
# * 16 bits vector (8 bits the real part and 8 bits the imaginary part)
|
||||
# * result = (real*real) + (imag*imag)
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# *
|
||||
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
# *
|
||||
# * GNSS-SDR is a software defined Global Navigation
|
||||
# * Satellite Systems receiver
|
||||
# *
|
||||
# * This file is part of GNSS-SDR.
|
||||
# *
|
||||
# * GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
# * it under the terms of the GNU General Public License as published by
|
||||
# * the Free Software Foundation, either version 3 of the License, or
|
||||
# * at your option) any later version.
|
||||
# *
|
||||
# * GNSS-SDR is distributed in the hope that it will be useful,
|
||||
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# * GNU General Public License for more details.
|
||||
# *
|
||||
# * You should have received a copy of the GNU General Public License
|
||||
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# */
|
||||
|
||||
.function volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl
|
||||
.source 2 src1
|
||||
.dest 2 dst
|
||||
.temp 2 merged
|
||||
mergebw merged, 1, -1
|
||||
x2 mullb dst, merged, src1
|
@ -0,0 +1,45 @@
|
||||
#/*!
|
||||
# * \file volk_gnsssdr_8ic_magnitude_squared_8i.orc
|
||||
# * \brief ORC implementation: calculates the magnitude squared of a 16 bits vector
|
||||
# * \authors <ul>
|
||||
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
# * </ul>
|
||||
# *
|
||||
# * ORC code that calculates the magnitude squared of a
|
||||
# * 16 bits vector (8 bits the real part and 8 bits the imaginary part)
|
||||
# * result = (real*real) + (imag*imag)
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# *
|
||||
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
# *
|
||||
# * GNSS-SDR is a software defined Global Navigation
|
||||
# * Satellite Systems receiver
|
||||
# *
|
||||
# * This file is part of GNSS-SDR.
|
||||
# *
|
||||
# * GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
# * it under the terms of the GNU General Public License as published by
|
||||
# * the Free Software Foundation, either version 3 of the License, or
|
||||
# * at your option) any later version.
|
||||
# *
|
||||
# * GNSS-SDR is distributed in the hope that it will be useful,
|
||||
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# * GNU General Public License for more details.
|
||||
# *
|
||||
# * You should have received a copy of the GNU General Public License
|
||||
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# */
|
||||
|
||||
.function volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl
|
||||
.source 2 src1
|
||||
.dest 1 dst
|
||||
.temp 2 iqprod
|
||||
.temp 1 ac
|
||||
.temp 1 bd
|
||||
x2 mullb iqprod, src1, src1
|
||||
splitwb bd, ac, iqprod
|
||||
addb dst, ac, bd
|
@ -0,0 +1,58 @@
|
||||
#/*!
|
||||
# * \file volk_gnsssdr_8ic_s8ic_multiply_8ic.orc
|
||||
# * \brief ORC implementation: multiplies a group of 16 bits vectors by one constant vector
|
||||
# * \authors <ul>
|
||||
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
# * </ul>
|
||||
# *
|
||||
# * ORC code that multiplies a group of 16 bits vectors
|
||||
# * (8 bits the real part and 8 bits the imaginary part) by one constant vector
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# *
|
||||
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
# *
|
||||
# * GNSS-SDR is a software defined Global Navigation
|
||||
# * Satellite Systems receiver
|
||||
# *
|
||||
# * This file is part of GNSS-SDR.
|
||||
# *
|
||||
# * GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
# * it under the terms of the GNU General Public License as published by
|
||||
# * the Free Software Foundation, either version 3 of the License, or
|
||||
# * at your option) any later version.
|
||||
# *
|
||||
# * GNSS-SDR is distributed in the hope that it will be useful,
|
||||
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# * GNU General Public License for more details.
|
||||
# *
|
||||
# * You should have received a copy of the GNU General Public License
|
||||
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# */
|
||||
|
||||
.function volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl
|
||||
.source 2 src1
|
||||
.param 2 src2real
|
||||
.param 2 src2imag
|
||||
.dest 2 dst
|
||||
.temp 2 iqprod
|
||||
.temp 1 real
|
||||
.temp 1 imag
|
||||
.temp 1 rr
|
||||
.temp 1 ii
|
||||
.temp 1 ri
|
||||
.temp 1 ir
|
||||
x2 mullb iqprod, src1, src2real
|
||||
splitwb ir, rr, iqprod
|
||||
x2 mullb iqprod, src1, src2imag
|
||||
splitwb ii, ri, iqprod
|
||||
subb real, rr, ii
|
||||
addb imag, ri, ir
|
||||
mergebw dst, real, imag
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,59 @@
|
||||
#/*!
|
||||
# * \file volk_gnsssdr_8ic_x2_dot_prod_8ic.orc
|
||||
# * \brief ORC implementation: multiplies two 16 bits vectors and accumulates them
|
||||
# * \authors <ul>
|
||||
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
# * </ul>
|
||||
# *
|
||||
# * ORC code that multiplies two 16 bits vectors (8 bits the real part
|
||||
# * and 8 bits the imaginary part) and accumulates them
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# *
|
||||
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
# *
|
||||
# * GNSS-SDR is a software defined Global Navigation
|
||||
# * Satellite Systems receiver
|
||||
# *
|
||||
# * This file is part of GNSS-SDR.
|
||||
# *
|
||||
# * GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
# * it under the terms of the GNU General Public License as published by
|
||||
# * the Free Software Foundation, either version 3 of the License, or
|
||||
# * at your option) any later version.
|
||||
# *
|
||||
# * GNSS-SDR is distributed in the hope that it will be useful,
|
||||
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# * GNU General Public License for more details.
|
||||
# *
|
||||
# * You should have received a copy of the GNU General Public License
|
||||
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# */
|
||||
|
||||
.function volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl
|
||||
.source 2 src1
|
||||
.source 2 src2
|
||||
.accumulator 2 accreal
|
||||
.accumulator 2 accimag
|
||||
.temp 2 iqprod
|
||||
.temp 1 real
|
||||
.temp 1 imag
|
||||
.temp 2 real2
|
||||
.temp 2 imag2
|
||||
.temp 1 ac
|
||||
.temp 1 bd
|
||||
.temp 2 swapped
|
||||
x2 mullb iqprod, src1, src2
|
||||
splitwb bd, ac, iqprod
|
||||
subb real, ac, bd
|
||||
swapw swapped, src1
|
||||
x2 mullb iqprod, swapped, src2
|
||||
splitwb bd, ac, iqprod
|
||||
addb imag, ac, bd
|
||||
mergebw real2, 0, real
|
||||
accw accreal, real2
|
||||
mergebw imag2, 0, imag
|
||||
accw accimag, imag2
|
@ -0,0 +1,57 @@
|
||||
#/*!
|
||||
# * \file volk_gnsssdr_8ic_x2_multiply_8ic.orc
|
||||
# * \brief ORC implementation: multiplies two 16 bits vectors
|
||||
# * \authors <ul>
|
||||
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
# * </ul>
|
||||
# *
|
||||
# * ORC code that multiplies two 16 bits vectors (8 bits the real part
|
||||
# * and 8 bits the imaginary part)
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# *
|
||||
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
# *
|
||||
# * GNSS-SDR is a software defined Global Navigation
|
||||
# * Satellite Systems receiver
|
||||
# *
|
||||
# * This file is part of GNSS-SDR.
|
||||
# *
|
||||
# * GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
# * it under the terms of the GNU General Public License as published by
|
||||
# * the Free Software Foundation, either version 3 of the License, or
|
||||
# * at your option) any later version.
|
||||
# *
|
||||
# * GNSS-SDR is distributed in the hope that it will be useful,
|
||||
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# * GNU General Public License for more details.
|
||||
# *
|
||||
# * You should have received a copy of the GNU General Public License
|
||||
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# */
|
||||
|
||||
.function volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl
|
||||
.source 2 src1
|
||||
.source 2 src2
|
||||
.dest 2 dst
|
||||
.temp 2 iqprod
|
||||
.temp 1 real
|
||||
.temp 1 imag
|
||||
.temp 1 ac
|
||||
.temp 1 bd
|
||||
.temp 2 swapped
|
||||
x2 mullb iqprod, src1, src2
|
||||
splitwb bd, ac, iqprod
|
||||
subb real, ac, bd
|
||||
swapw swapped, src1
|
||||
x2 mullb iqprod, swapped, src2
|
||||
splitwb bd, ac, iqprod
|
||||
addb imag, ac, bd
|
||||
mergebw dst, real, imag
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,139 @@
|
||||
#/*!
|
||||
# * \file volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.orc
|
||||
# * \brief ORC implementation: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors
|
||||
# * \authors <ul>
|
||||
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
# * </ul>
|
||||
# *
|
||||
# * ORC code that performs the carrier wipe-off mixing and the
|
||||
# * Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
|
||||
# * real part and 8 bits the imaginary part):
|
||||
# * - The carrier wipe-off is done by multiplying the input signal by the
|
||||
# * carrier (multiplication of 16 bits vectors) It returns the input
|
||||
# * signal in base band (BB)
|
||||
# * - Early values are calculated by multiplying the input signal in BB by the
|
||||
# * early code (multiplication of 16 bits vectors), accumulating the results
|
||||
# * - Prompt values are calculated by multiplying the input signal in BB by the
|
||||
# * prompt code (multiplication of 16 bits vectors), accumulating the results
|
||||
# * - Late values are calculated by multiplying the input signal in BB by the
|
||||
# * late code (multiplication of 16 bits vectors), accumulating the results
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# *
|
||||
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
# *
|
||||
# * GNSS-SDR is a software defined Global Navigation
|
||||
# * Satellite Systems receiver
|
||||
# *
|
||||
# * This file is part of GNSS-SDR.
|
||||
# *
|
||||
# * GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
# * it under the terms of the GNU General Public License as published by
|
||||
# * the Free Software Foundation, either version 3 of the License, or
|
||||
# * at your option) any later version.
|
||||
# *
|
||||
# * GNSS-SDR is distributed in the hope that it will be useful,
|
||||
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# * GNU General Public License for more details.
|
||||
# *
|
||||
# * You should have received a copy of the GNU General Public License
|
||||
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# */
|
||||
|
||||
.function volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl
|
||||
.source 2 input
|
||||
.source 2 carrier
|
||||
.source 2 E_code
|
||||
.source 2 P_code
|
||||
.accumulator 2 E_out_real
|
||||
.accumulator 2 E_out_imag
|
||||
.accumulator 2 P_out_real
|
||||
.accumulator 2 P_out_imag
|
||||
.temp 2 bb_signal_sample
|
||||
.temp 2 iqprod
|
||||
.temp 1 real
|
||||
.temp 1 imag
|
||||
.temp 1 ac
|
||||
.temp 1 bd
|
||||
.temp 2 swapped
|
||||
|
||||
.temp 2 real2
|
||||
.temp 2 imag2
|
||||
|
||||
x2 mullb iqprod, input, carrier
|
||||
splitwb bd, ac, iqprod
|
||||
subb real, ac, bd
|
||||
swapw swapped, input
|
||||
x2 mullb iqprod, swapped, carrier
|
||||
splitwb bd, ac, iqprod
|
||||
addb imag, ac, bd
|
||||
mergebw bb_signal_sample, real, imag
|
||||
|
||||
swapw swapped, bb_signal_sample
|
||||
|
||||
x2 mullb iqprod, bb_signal_sample, E_code
|
||||
splitwb bd, ac, iqprod
|
||||
subb real, ac, bd
|
||||
x2 mullb iqprod, swapped, E_code
|
||||
splitwb bd, ac, iqprod
|
||||
addb imag, ac, bd
|
||||
mergebw real2, 0, real
|
||||
mergebw imag2, 0, imag
|
||||
accw E_out_real, real2
|
||||
accw E_out_imag, imag2
|
||||
|
||||
x2 mullb iqprod, bb_signal_sample, P_code
|
||||
splitwb bd, ac, iqprod
|
||||
subb real, ac, bd
|
||||
x2 mullb iqprod, swapped, P_code
|
||||
splitwb bd, ac, iqprod
|
||||
addb imag, ac, bd
|
||||
mergebw real2, 0, real
|
||||
mergebw imag2, 0, imag
|
||||
accw P_out_real, real2
|
||||
accw P_out_imag, imag2
|
||||
|
||||
.function volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl
|
||||
.source 2 input
|
||||
.source 2 carrier
|
||||
.source 2 L_code
|
||||
.accumulator 2 L_out_real
|
||||
.accumulator 2 L_out_imag
|
||||
|
||||
.temp 2 bb_signal_sample
|
||||
.temp 2 iqprod
|
||||
.temp 1 real
|
||||
.temp 1 imag
|
||||
.temp 1 ac
|
||||
.temp 1 bd
|
||||
.temp 2 swapped
|
||||
|
||||
.temp 2 real2
|
||||
.temp 2 imag2
|
||||
|
||||
x2 mullb iqprod, input, carrier
|
||||
splitwb bd, ac, iqprod
|
||||
subb real, ac, bd
|
||||
swapw swapped, input
|
||||
x2 mullb iqprod, swapped, carrier
|
||||
splitwb bd, ac, iqprod
|
||||
addb imag, ac, bd
|
||||
mergebw bb_signal_sample, real, imag
|
||||
|
||||
swapw swapped, bb_signal_sample
|
||||
|
||||
x2 mullb iqprod, bb_signal_sample, L_code
|
||||
splitwb bd, ac, iqprod
|
||||
subb real, ac, bd
|
||||
x2 mullb iqprod, swapped, L_code
|
||||
splitwb bd, ac, iqprod
|
||||
addb imag, ac, bd
|
||||
mergebw real2, 0, real
|
||||
mergebw imag2, 0, imag
|
||||
accw L_out_real, real2
|
||||
accw L_out_imag, imag2
|
||||
|
||||
|
@ -0,0 +1,39 @@
|
||||
#/*!
|
||||
# * \file volk_gnsssdr_8u_x2_multiply_8u.orc
|
||||
# * \brief ORC implementation: multiplies unsigned char values
|
||||
# * \authors <ul>
|
||||
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
# * </ul>
|
||||
# *
|
||||
# * ORC code that multiplies unsigned char values (8 bits data)
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# *
|
||||
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||
# *
|
||||
# * GNSS-SDR is a software defined Global Navigation
|
||||
# * Satellite Systems receiver
|
||||
# *
|
||||
# * This file is part of GNSS-SDR.
|
||||
# *
|
||||
# * GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
# * it under the terms of the GNU General Public License as published by
|
||||
# * the Free Software Foundation, either version 3 of the License, or
|
||||
# * at your option) any later version.
|
||||
# *
|
||||
# * GNSS-SDR is distributed in the hope that it will be useful,
|
||||
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# * GNU General Public License for more details.
|
||||
# *
|
||||
# * You should have received a copy of the GNU General Public License
|
||||
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
# *
|
||||
# * -------------------------------------------------------------------------
|
||||
# */
|
||||
|
||||
.function volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl
|
||||
.source 1 src1
|
||||
.source 1 src2
|
||||
.dest 1 dst
|
||||
mullb dst, src1, src2
|
@ -0,0 +1,39 @@
|
||||
# Copyright 2013 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GNU Radio
|
||||
#
|
||||
# GNU Radio is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# GNU Radio is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GNU Radio; see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
|
||||
########################################################################
|
||||
# Install python files and apps
|
||||
########################################################################
|
||||
include(GrPython)
|
||||
|
||||
VOLK_PYTHON_INSTALL(
|
||||
FILES
|
||||
__init__.py
|
||||
cfg.py
|
||||
volk_gnsssdr_modtool_generate.py
|
||||
DESTINATION ${VOLK_PYTHON_DIR}/volk_gnsssdr_modtool
|
||||
COMPONENT "volk_gnsssdr"
|
||||
)
|
||||
|
||||
VOLK_PYTHON_INSTALL(
|
||||
PROGRAMS
|
||||
volk_gnsssdr_modtool
|
||||
DESTINATION ${VOLK_RUNTIME_DIR}
|
||||
COMPONENT "volk_gnsssdr"
|
||||
)
|
@ -0,0 +1,114 @@
|
||||
The volk_gnsssdr_modtool tool is installed along with VOLK as a way of helping
|
||||
to construct, add to, and interogate the VOLK library or companion
|
||||
libraries.
|
||||
|
||||
volk_gnsssdr_modtool is installed into $prefix/bin.
|
||||
|
||||
VOLK modtool enables creating standalone (out-of-tree) VOLK modules
|
||||
and provides a few tools for sharing VOLK kernels between VOLK
|
||||
modules. If you need to design or work with VOLK kernels away from
|
||||
the canonical VOLK library, this is the tool. If you need to tailor
|
||||
your own VOLK library for whatever reason, this is the tool.
|
||||
|
||||
The canonical VOLK library installs a volk_gnsssdr.h and a libvolk_gnsssdr.so. Your
|
||||
own library will install volk_gnsssdr_$name.h and libvolk_gnsssdr_$name.so. Ya Gronk?
|
||||
Good.
|
||||
|
||||
There isn't a substantial difference between the canonical VOLK
|
||||
module and any other VOLK module. They're all peers. Any module
|
||||
created via VOLK modtool will come complete with a default
|
||||
volk_gnsssdr_modtool.cfg file associating the module with the base from which
|
||||
it came, its distinctive $name and its destination (or path). These
|
||||
values (created from user input if VOLK modtool runs without a
|
||||
user-supplied config file or a default config file) serve as default
|
||||
values for some VOLK modtool actions. It's more or less intended for
|
||||
the user to change directories to the top level of a created VOLK
|
||||
module and then run volk_gnsssdr_modtool to take advantage of the values
|
||||
stored in the default volk_gnsssdr_modtool.cfg file.
|
||||
|
||||
Apart from creating new VOLK modules, VOLK modtool allows you to list
|
||||
the names of kernels in other modules, list the names of kernels in
|
||||
the current module, add kernels from another module into the current
|
||||
module, and remove kernels from the current module. When moving
|
||||
kernels between modules, VOLK modtool does its best to keep the qa
|
||||
and profiling code for those kernels intact. If the base has a test
|
||||
or a profiling call for some kernel, those calls will follow the
|
||||
kernel when VOLK modtool adds that kernel. If QA or profiling
|
||||
requires a puppet kernel, the puppet kernel will follow the original
|
||||
kernel when VOLK modtool adds that original kernel. VOLK modtool
|
||||
respects puppets.
|
||||
|
||||
======================================================================
|
||||
|
||||
Installing a new VOLK Library:
|
||||
|
||||
Run the command "volk_gnsssdr_modtool -i". This will ask you three questions:
|
||||
|
||||
name: // the name to give your VOLK library: volk_gnsssdr_<name>
|
||||
destination: // directory new source tree is built under -- must exists.
|
||||
// It will create <directory>/volk_gnsssdr_<name>
|
||||
base: // the directory containing the original VOLK source code
|
||||
|
||||
The name provided must be alphanumeric (and cannot start with a
|
||||
number). No special characters including dashes and underscores are
|
||||
allowed.
|
||||
|
||||
This will build a new skeleton directory in the destination provided
|
||||
with the name volk_gnsssdr_<name>. It will contain the necessary structure to
|
||||
build:
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DCMAKE_INSTALL_PREFIX=/opt/volk_gnsssdr ../
|
||||
make
|
||||
sudo make install
|
||||
|
||||
Right now, the library is empty and contains no kernels. Kernels can
|
||||
be added from another VOLK library using the '-a' option. If not
|
||||
specified, the kernel will be extracted from the base VOLK
|
||||
directory. Using the '-b' allows us to specify another VOLK library to
|
||||
use for this purpose.
|
||||
|
||||
volk_gnsssdr_modtool -a -n 32fc_x2_conjugate_dot_prod_32fc
|
||||
|
||||
This will put the code for the new kernel into
|
||||
<destination>/volk_gnsssdr_<name>/kernels/volk_gnsssdr_<name>/
|
||||
|
||||
Other kernels must be added by hand. See the following webpages for
|
||||
more information about creating VOLK kernels:
|
||||
http://gnuradio.org/doc/doxygen/volk_gnsssdr_guide.html
|
||||
http://gnuradio.org/redmine/projects/gnuradio/wiki/Volk
|
||||
|
||||
|
||||
======================================================================
|
||||
|
||||
OPTIONS
|
||||
|
||||
Options for Adding and Removing Kernels:
|
||||
-a, --add_kernel
|
||||
Add kernel from existing VOLK module. Uses the base VOLK module
|
||||
unless -b is used. Use -n to specify the kernel name.
|
||||
Requires: -n.
|
||||
Optional: -b
|
||||
|
||||
-A, --add_all_kernels
|
||||
Add all kernels from existing VOLK module. Uses the base VOLK
|
||||
module unless -b is used.
|
||||
Optional: -b
|
||||
|
||||
-x, --remove_kernel
|
||||
Remove kernel from module.
|
||||
Required: -n.
|
||||
Optional: -b
|
||||
|
||||
Options for Listing Kernels:
|
||||
-l, --list
|
||||
Lists all kernels available in the base VOLK module.
|
||||
|
||||
-k, --kernels
|
||||
Lists all kernels in this VOLK module.
|
||||
|
||||
-r, --remote-list
|
||||
Lists all kernels in another VOLK module that is specified
|
||||
using the -b option.
|
||||
|
@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2013 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GNU Radio
|
||||
#
|
||||
# GNU Radio is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# GNU Radio is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GNU Radio; see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
#
|
||||
|
||||
from cfg import volk_gnsssdr_modtool_config
|
||||
from volk_gnsssdr_modtool_generate import volk_gnsssdr_modtool
|
Binary file not shown.
@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2013 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GNU Radio
|
||||
#
|
||||
# GNU Radio is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# GNU Radio is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GNU Radio; see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
#
|
||||
|
||||
import ConfigParser
|
||||
import sys
|
||||
import os
|
||||
import exceptions
|
||||
import re
|
||||
|
||||
|
||||
class volk_gnsssdr_modtool_config:
|
||||
def key_val_sub(self, num, stuff, section):
|
||||
return re.sub('\$' + 'k' + str(num), stuff[num][0], (re.sub('\$' + str(num), stuff[num][1], section[1][num])));
|
||||
|
||||
def verify(self):
|
||||
for i in self.verification:
|
||||
self.verify_section(i)
|
||||
def remap(self):
|
||||
for i in self.remapification:
|
||||
self.verify_section(i)
|
||||
|
||||
def verify_section(self, section):
|
||||
stuff = self.cfg.items(section[0])
|
||||
for i in range(len(section[1])):
|
||||
eval(self.key_val_sub(i, stuff, section))
|
||||
try:
|
||||
val = eval(self.key_val_sub(i, stuff, section))
|
||||
if val == False:
|
||||
raise exceptions.ValueError
|
||||
except ValueError:
|
||||
raise exceptions.ValueError('Verification function returns False... key:%s, val:%s'%(stuff[i][0], stuff[i][1]))
|
||||
except:
|
||||
raise exceptions.IOError('bad configuration... key:%s, val:%s'%(stuff[i][0], stuff[i][1]))
|
||||
|
||||
|
||||
def __init__(self, cfg=None):
|
||||
self.config_name = 'config'
|
||||
self.config_defaults = ['name', 'destination', 'base']
|
||||
self.config_defaults_remap = ['1',
|
||||
'self.cfg.set(self.config_name, \'$k1\', os.path.realpath(os.path.expanduser(\'$1\')))',
|
||||
'self.cfg.set(self.config_name, \'$k2\', os.path.realpath(os.path.expanduser(\'$2\')))']
|
||||
|
||||
self.config_defaults_verify = ['re.match(\'[a-zA-Z0-9]+$\', \'$0\')',
|
||||
'os.path.exists(\'$1\')',
|
||||
'os.path.exists(\'$2\')']
|
||||
self.remapification = [(self.config_name, self.config_defaults_remap)]
|
||||
self.verification = [(self.config_name, self.config_defaults_verify)]
|
||||
default = os.path.join(os.getcwd(), 'volk_gnsssdr_modtool.cfg')
|
||||
icfg = ConfigParser.RawConfigParser()
|
||||
if cfg:
|
||||
icfg.read(cfg)
|
||||
elif os.path.exists(default):
|
||||
icfg.read(default)
|
||||
else:
|
||||
print "Initializing config file..."
|
||||
icfg.add_section(self.config_name)
|
||||
for kn in self.config_defaults:
|
||||
rv = raw_input("%s: "%(kn))
|
||||
icfg.set(self.config_name, kn, rv)
|
||||
self.cfg = icfg
|
||||
self.remap()
|
||||
self.verify()
|
||||
|
||||
|
||||
|
||||
def read_map(self, name, inp):
|
||||
if self.cfg.has_section(name):
|
||||
self.cfg.remove_section(name)
|
||||
self.cfg.add_section(name)
|
||||
for i in inp:
|
||||
self.cfg.set(name, i, inp[i])
|
||||
|
||||
def get_map(self, name):
|
||||
retval = {}
|
||||
stuff = self.cfg.items(name)
|
||||
for i in stuff:
|
||||
retval[i[0]] = i[1]
|
||||
return retval
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Binary file not shown.
@ -0,0 +1,128 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2013 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GNU Radio
|
||||
#
|
||||
# GNU Radio is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# GNU Radio is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GNU Radio; see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
#
|
||||
|
||||
from volk_gnsssdr_modtool import volk_gnsssdr_modtool, volk_gnsssdr_modtool_config
|
||||
from optparse import OptionParser, OptionGroup
|
||||
|
||||
import exceptions
|
||||
import os
|
||||
import sys
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = OptionParser();
|
||||
actions = OptionGroup(parser, 'Actions');
|
||||
actions.add_option('-i', '--install', action='store_true',
|
||||
help='Create a new volk_gnsssdr module.')
|
||||
parser.add_option('-b', '--base_path', action='store', default=None,
|
||||
help='Base path for action. By default, volk_gnsssdr_modtool.cfg loads this value.')
|
||||
parser.add_option('-n', '--kernel_name', action='store', default=None,
|
||||
help='Kernel name for action. No default')
|
||||
parser.add_option('-c', '--config', action='store', dest='config_file', default=None,
|
||||
help='Config file for volk_gnsssdr_modtool. By default, volk_gnsssdr_modtool.cfg in the local directory will be used/created.')
|
||||
actions.add_option('-a', '--add_kernel', action='store_true',
|
||||
help='Add kernel from existing volk_gnsssdr module. Requires: -n. Optional: -b')
|
||||
actions.add_option('-A', '--add_all_kernels', action='store_true',
|
||||
help='Add all kernels from existing volk_gnsssdr module. Optional: -b')
|
||||
actions.add_option('-x', '--remove_kernel', action='store_true',
|
||||
help='Remove kernel from module. Required: -n. Optional: -b')
|
||||
actions.add_option('-l', '--list', action='store_true',
|
||||
help='List all kernels in the base.')
|
||||
actions.add_option('-k', '--kernels', action='store_true',
|
||||
help='List all kernels in the module.')
|
||||
actions.add_option('-r', '--remote_list', action='store_true',
|
||||
help='List all available kernels in remote volk_gnsssdr module. Requires: -b.')
|
||||
actions.add_option('-m', '--moo', action='store_true',
|
||||
help='Have you mooed today?')
|
||||
parser.add_option_group(actions)
|
||||
|
||||
(options, args) = parser.parse_args();
|
||||
if len(sys.argv) < 2:
|
||||
parser.print_help()
|
||||
|
||||
elif options.moo:
|
||||
print " (__) "
|
||||
print " (oo) "
|
||||
print " /------\/ "
|
||||
print " / | || "
|
||||
print " * /\---/\ "
|
||||
print " ~~ ~~ "
|
||||
|
||||
else:
|
||||
my_cfg = volk_gnsssdr_modtool_config(options.config_file);
|
||||
|
||||
my_modtool = volk_gnsssdr_modtool(my_cfg.get_map(my_cfg.config_name));
|
||||
|
||||
|
||||
if options.install:
|
||||
my_modtool.make_module_skeleton();
|
||||
my_modtool.write_default_cfg(my_cfg.cfg);
|
||||
|
||||
|
||||
if options.add_kernel:
|
||||
if not options.kernel_name:
|
||||
raise exceptions.IOError("This action requires the -n option.");
|
||||
else:
|
||||
name = options.kernel_name;
|
||||
if options.base_path:
|
||||
base = options.base_path;
|
||||
else:
|
||||
base = my_cfg.cfg.get(my_cfg.config_name, 'base');
|
||||
my_modtool.import_kernel(name, base);
|
||||
|
||||
if options.remove_kernel:
|
||||
if not options.kernel_name:
|
||||
raise exceptions.IOError("This action requires the -n option.");
|
||||
else:
|
||||
name = options.kernel_name;
|
||||
my_modtool.remove_kernel(name);
|
||||
|
||||
if options.add_all_kernels:
|
||||
|
||||
if options.base_path:
|
||||
base = options.base_path;
|
||||
else:
|
||||
base = my_cfg.cfg.get(my_cfg.config_name, 'base');
|
||||
kernelset = my_modtool.get_current_kernels(base);
|
||||
for i in kernelset:
|
||||
my_modtool.import_kernel(i, base);
|
||||
|
||||
if options.remote_list:
|
||||
if not options.base_path:
|
||||
raise exceptions.IOError("This action requires the -b option. Try -l or -k for listing kernels in the base or the module.")
|
||||
else:
|
||||
base = options.base_path;
|
||||
kernelset = my_modtool.get_current_kernels(base);
|
||||
for i in kernelset:
|
||||
print i;
|
||||
|
||||
if options.list:
|
||||
kernelset = my_modtool.get_current_kernels();
|
||||
for i in kernelset:
|
||||
print i;
|
||||
|
||||
if options.kernels:
|
||||
dest = my_cfg.cfg.get(my_cfg.config_name, 'destination');
|
||||
name = my_cfg.cfg.get(my_cfg.config_name, 'name');
|
||||
base = os.path.join(dest, 'volk_gnsssdr_' + name);
|
||||
kernelset = my_modtool.get_current_kernels(base);
|
||||
for i in kernelset:
|
||||
print i;
|
@ -0,0 +1,330 @@
|
||||
#
|
||||
# Copyright 2013 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of GNU Radio
|
||||
#
|
||||
# GNU Radio is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3, or (at your option)
|
||||
# any later version.
|
||||
#
|
||||
# GNU Radio is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GNU Radio; see the file COPYING. If not, write to
|
||||
# the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
# Boston, MA 02110-1301, USA.
|
||||
#
|
||||
|
||||
import os
|
||||
import glob
|
||||
import sys
|
||||
import re
|
||||
import glob
|
||||
import shutil
|
||||
import exceptions
|
||||
from sets import Set
|
||||
|
||||
class volk_gnsssdr_modtool:
|
||||
def __init__(self, cfg):
|
||||
self.volk_gnsssdr = re.compile('volk_gnsssdr');
|
||||
self.remove_after_underscore = re.compile("_.*");
|
||||
self.volk_gnsssdr_run_tests = re.compile('^\s*VOLK_RUN_TESTS.*\n', re.MULTILINE);
|
||||
self.volk_gnsssdr_profile = re.compile('^\s*(VOLK_PROFILE|VOLK_PUPPET_PROFILE).*\n', re.MULTILINE);
|
||||
self.my_dict = cfg;
|
||||
self.lastline = re.compile('\s*char path\[1024\];.*');
|
||||
self.badassert = re.compile('^\s*assert\(toked\[0\] == "volk_gnsssdr_.*\n', re.MULTILINE);
|
||||
self.goodassert = ' assert(toked[0] == "volk_gnsssdr");\n'
|
||||
self.baderase = re.compile('^\s*toked.erase\(toked.begin\(\)\);.*\n', re.MULTILINE);
|
||||
self.gooderase = ' toked.erase(toked.begin());\n toked.erase(toked.begin());\n';
|
||||
|
||||
def get_basename(self, base=None):
|
||||
if not base:
|
||||
base = self.my_dict['base']
|
||||
candidate = base.split('/')[-1];
|
||||
if len(candidate.split('_')) == 1:
|
||||
return '';
|
||||
else:
|
||||
return candidate.split('_')[-1];
|
||||
|
||||
def get_current_kernels(self, base=None):
|
||||
if not base:
|
||||
base = self.my_dict['base']
|
||||
name = self.get_basename();
|
||||
else:
|
||||
name = self.get_basename(base);
|
||||
if name == '':
|
||||
hdr_files = glob.glob(os.path.join(base, "kernels/volk_gnsssdr/*.h"));
|
||||
begins = re.compile("(?<=volk_gnsssdr_).*")
|
||||
else:
|
||||
hdr_files = glob.glob(os.path.join(base, "kernels/volk_gnsssdr_" + name + "/*.h"));
|
||||
begins = re.compile("(?<=volk_gnsssdr_" + name + "_).*")
|
||||
|
||||
datatypes = [];
|
||||
functions = [];
|
||||
|
||||
|
||||
for line in hdr_files:
|
||||
|
||||
subline = re.search(".*\.h.*", os.path.basename(line))
|
||||
if subline:
|
||||
subsubline = begins.search(subline.group(0));
|
||||
if subsubline:
|
||||
dtype = self.remove_after_underscore.sub("", subsubline.group(0));
|
||||
subdtype = re.search("[0-9]+[A-z]+", dtype);
|
||||
if subdtype:
|
||||
datatypes.append(subdtype.group(0));
|
||||
|
||||
|
||||
datatypes = set(datatypes);
|
||||
|
||||
for line in hdr_files:
|
||||
for dt in datatypes:
|
||||
if dt in line:
|
||||
#subline = re.search("(?<=volk_gnsssdr_)" + dt + ".*(?=\.h)", line);
|
||||
subline = re.search(begins.pattern[:-2] + dt + ".*(?=\.h)", line);
|
||||
if subline:
|
||||
functions.append(subline.group(0));
|
||||
|
||||
return set(functions);
|
||||
|
||||
def make_module_skeleton(self):
|
||||
|
||||
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'])
|
||||
if os.path.exists(dest):
|
||||
raise exceptions.IOError("Destination %s already exits!"%(dest));
|
||||
|
||||
if not os.path.exists(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'kernels/volk_gnsssdr_' + self.my_dict['name'])):
|
||||
os.makedirs(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'kernels/volk_gnsssdr_' + self.my_dict['name']))
|
||||
|
||||
current_kernel_names = self.get_current_kernels();
|
||||
|
||||
for root, dirnames, filenames in os.walk(self.my_dict['base']):
|
||||
for name in filenames:
|
||||
t_table = map(lambda a: re.search(a, name), current_kernel_names);
|
||||
t_table = set(t_table);
|
||||
if t_table == set([None]):
|
||||
infile = os.path.join(root, name);
|
||||
instring = open(infile, 'r').read();
|
||||
outstring = re.sub(self.volk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], instring);
|
||||
newname = re.sub(self.volk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], name);
|
||||
relpath = os.path.relpath(infile, self.my_dict['base']);
|
||||
newrelpath = re.sub(self.volk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], relpath);
|
||||
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], os.path.dirname(newrelpath), newname);
|
||||
|
||||
if not os.path.exists(os.path.dirname(dest)):
|
||||
os.makedirs(os.path.dirname(dest))
|
||||
open(dest, 'w+').write(outstring);
|
||||
|
||||
|
||||
infile = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/testqa.cc');
|
||||
instring = open(infile, 'r').read();
|
||||
outstring = re.sub(self.volk_gnsssdr_run_tests, '', instring);
|
||||
open(infile, 'w+').write(outstring);
|
||||
|
||||
infile = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'apps/volk_gnsssdr_' + self.my_dict['name'] + '_profile.cc');
|
||||
instring = open(infile, 'r').read();
|
||||
outstring = re.sub(self.volk_gnsssdr_profile, '', instring);
|
||||
open(infile, 'w+').write(outstring);
|
||||
|
||||
infile = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/qa_utils.cc');
|
||||
instring = open(infile, 'r').read();
|
||||
outstring = re.sub(self.badassert, self.goodassert, instring);
|
||||
outstring = re.sub(self.baderase, self.gooderase, outstring);
|
||||
open(infile, 'w+').write(outstring);
|
||||
|
||||
def write_default_cfg(self, cfg):
|
||||
outfile = open(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'volk_gnsssdr_modtool.cfg'), 'wb');
|
||||
cfg.write(outfile);
|
||||
outfile.close();
|
||||
|
||||
|
||||
def convert_kernel(self, oldvolk_gnsssdr, name, base, inpath, top):
|
||||
infile = os.path.join(inpath, 'kernels/' + top[:-1] + '/' + top + name + '.h');
|
||||
instring = open(infile, 'r').read();
|
||||
outstring = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], instring);
|
||||
newname = 'volk_gnsssdr_' + self.my_dict['name'] + '_' + name + '.h';
|
||||
relpath = os.path.relpath(infile, base);
|
||||
newrelpath = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], relpath);
|
||||
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], os.path.dirname(newrelpath), newname);
|
||||
|
||||
if not os.path.exists(os.path.dirname(dest)):
|
||||
os.makedirs(os.path.dirname(dest))
|
||||
open(dest, 'w+').write(outstring);
|
||||
|
||||
# copy orc proto-kernels if they exist
|
||||
for orcfile in glob.glob(inpath + '/orc/' + top + name + '*.orc'):
|
||||
if os.path.isfile(orcfile):
|
||||
instring = open(orcfile, 'r').read();
|
||||
outstring = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], instring);
|
||||
newname = 'volk_gnsssdr_' + self.my_dict['name'] + '_' + name + '.orc';
|
||||
relpath = os.path.relpath(orcfile, base);
|
||||
newrelpath = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], relpath);
|
||||
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], os.path.dirname(newrelpath), newname);
|
||||
if not os.path.exists(os.path.dirname(dest)):
|
||||
os.makedirs(os.path.dirname(dest));
|
||||
open(dest, 'w+').write(outstring)
|
||||
|
||||
|
||||
def remove_kernel(self, name):
|
||||
basename = self.my_dict['name'];
|
||||
if len(basename) > 0:
|
||||
top = 'volk_gnsssdr_' + basename + '_';
|
||||
else:
|
||||
top = 'volk_gnsssdr_'
|
||||
base = os.path.join(self.my_dict['destination'], top[:-1]) ;
|
||||
|
||||
if not name in self.get_current_kernels():
|
||||
|
||||
raise exceptions.IOError("Requested kernel %s is not in module %s"%(name,base));
|
||||
|
||||
|
||||
|
||||
inpath = os.path.abspath(base);
|
||||
|
||||
|
||||
kernel = re.compile(name)
|
||||
search_kernels = Set([kernel])
|
||||
profile = re.compile('^\s*VOLK_PROFILE')
|
||||
puppet = re.compile('^\s*VOLK_PUPPET')
|
||||
src_dest = os.path.join(inpath, 'apps/', top[:-1] + '_profile.cc');
|
||||
infile = open(src_dest);
|
||||
otherlines = infile.readlines();
|
||||
open(src_dest, 'w+').write('');
|
||||
|
||||
for otherline in otherlines:
|
||||
write_okay = True;
|
||||
if kernel.search(otherline):
|
||||
write_okay = False;
|
||||
if puppet.match(otherline):
|
||||
args = re.search("(?<=VOLK_PUPPET_PROFILE).*", otherline)
|
||||
m_func = args.group(0).split(',')[0];
|
||||
func = re.search('(?<=' + top + ').*', m_func);
|
||||
search_kernels.add(re.compile(func.group(0)));
|
||||
if write_okay:
|
||||
open(src_dest, 'a').write(otherline);
|
||||
|
||||
|
||||
src_dest = os.path.join(inpath, 'lib/testqa.cc')
|
||||
infile = open(src_dest);
|
||||
otherlines = infile.readlines();
|
||||
open(src_dest, 'w+').write('');
|
||||
|
||||
for otherline in otherlines:
|
||||
write_okay = True;
|
||||
|
||||
for kernel in search_kernels:
|
||||
if kernel.search(otherline):
|
||||
write_okay = False;
|
||||
|
||||
if write_okay:
|
||||
open(src_dest, 'a').write(otherline);
|
||||
|
||||
for kernel in search_kernels:
|
||||
infile = os.path.join(inpath, 'kernels/' + top[:-1] + '/' + top + kernel.pattern + '.h');
|
||||
print "Removing kernel %s"%(kernel.pattern)
|
||||
if os.path.exists(infile):
|
||||
os.remove(infile);
|
||||
# remove the orc proto-kernels if they exist. There are no puppets here
|
||||
# so just need to glob for files matching kernel name
|
||||
print glob.glob(inpath + '/orc/' + top + name + '*.orc');
|
||||
for orcfile in glob.glob(inpath + '/orc/' + top + name + '*.orc'):
|
||||
print orcfile
|
||||
if(os.path.exists(orcfile)):
|
||||
os.remove(orcfile);
|
||||
|
||||
def import_kernel(self, name, base):
|
||||
if not (base):
|
||||
base = self.my_dict['base'];
|
||||
basename = self.getbasename();
|
||||
else:
|
||||
basename = self.get_basename(base);
|
||||
if not name in self.get_current_kernels(base):
|
||||
raise exceptions.IOError("Requested kernel %s is not in module %s"%(name,base));
|
||||
|
||||
inpath = os.path.abspath(base);
|
||||
if len(basename) > 0:
|
||||
top = 'volk_gnsssdr_' + basename + '_';
|
||||
else:
|
||||
top = 'volk_gnsssdr_'
|
||||
oldvolk_gnsssdr = re.compile(top[:-1]);
|
||||
|
||||
self.convert_kernel(oldvolk_gnsssdr, name, base, inpath, top);
|
||||
|
||||
kernel = re.compile(name)
|
||||
search_kernels = Set([kernel])
|
||||
|
||||
profile = re.compile('^\s*VOLK_PROFILE')
|
||||
puppet = re.compile('^\s*VOLK_PUPPET')
|
||||
infile = open(os.path.join(inpath, 'apps/', oldvolk_gnsssdr.pattern + '_profile.cc'));
|
||||
otherinfile = open(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'apps/volk_gnsssdr_' + self.my_dict['name'] + '_profile.cc'));
|
||||
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'apps/volk_gnsssdr_' + self.my_dict['name'] + '_profile.cc');
|
||||
lines = infile.readlines();
|
||||
otherlines = otherinfile.readlines();
|
||||
open(dest, 'w+').write('');
|
||||
insert = False;
|
||||
inserted = False
|
||||
for otherline in otherlines:
|
||||
|
||||
if self.lastline.match(otherline):
|
||||
insert = True;
|
||||
if insert and not inserted:
|
||||
inserted = True;
|
||||
for line in lines:
|
||||
if kernel.search(line):
|
||||
if profile.match(line):
|
||||
outline = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], line);
|
||||
open(dest, 'a').write(outline);
|
||||
elif puppet.match(line):
|
||||
outline = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], line);
|
||||
open(dest, 'a').write(outline);
|
||||
args = re.search("(?<=VOLK_PUPPET_PROFILE).*", line)
|
||||
m_func = args.group(0).split(',')[0];
|
||||
func = re.search('(?<=' + top + ').*', m_func);
|
||||
search_kernels.add(re.compile(func.group(0)));
|
||||
self.convert_kernel(oldvolk_gnsssdr, func.group(0), base, inpath, top);
|
||||
write_okay = True;
|
||||
for kernel in search_kernels:
|
||||
if kernel.search(otherline):
|
||||
write_okay = False
|
||||
if write_okay:
|
||||
open(dest, 'a').write(otherline);
|
||||
|
||||
for kernel in search_kernels:
|
||||
print "Adding kernel %s from module %s"%(kernel.pattern,base)
|
||||
|
||||
infile = open(os.path.join(inpath, 'lib/testqa.cc'));
|
||||
otherinfile = open(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/testqa.cc'));
|
||||
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/testqa.cc');
|
||||
lines = infile.readlines();
|
||||
otherlines = otherinfile.readlines();
|
||||
open(dest, 'w+').write('');
|
||||
inserted = False;
|
||||
insert = False
|
||||
for otherline in otherlines:
|
||||
|
||||
if (re.match('\s*', otherline) == None or re.match('\s*#.*', otherline) == None):
|
||||
|
||||
insert = True;
|
||||
if insert and not inserted:
|
||||
inserted = True;
|
||||
for line in lines:
|
||||
for kernel in search_kernels:
|
||||
if kernel.search(line):
|
||||
if self.volk_gnsssdr_run_tests.match(line):
|
||||
outline = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], line);
|
||||
open(dest, 'a').write(outline);
|
||||
write_okay = True;
|
||||
for kernel in search_kernels:
|
||||
if kernel.search(otherline):
|
||||
write_okay = False
|
||||
if write_okay:
|
||||
open(dest, 'a').write(otherline);
|
||||
|
||||
|
||||
|
||||
|
||||
|
Binary file not shown.
212
src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.c
Normal file
212
src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.c
Normal file
@ -0,0 +1,212 @@
|
||||
/*
|
||||
* Copyright 2011-2012 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include "volk_gnsssdr_machines.h"
|
||||
#include <volk_gnsssdr/volk_gnsssdr_typedefs.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_cpu.h>
|
||||
#include "volk_gnsssdr_rank_archs.h"
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
static size_t __alignment = 0;
|
||||
static intptr_t __alignment_mask = 0;
|
||||
|
||||
struct volk_gnsssdr_machine *get_machine(void)
|
||||
{
|
||||
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
|
||||
extern unsigned int n_volk_gnsssdr_machines;
|
||||
static struct volk_gnsssdr_machine *machine = NULL;
|
||||
|
||||
if(machine != NULL)
|
||||
return machine;
|
||||
else {
|
||||
unsigned int max_score = 0;
|
||||
unsigned int i;
|
||||
struct volk_gnsssdr_machine *max_machine = NULL;
|
||||
for(i=0; i<n_volk_gnsssdr_machines; i++) {
|
||||
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
|
||||
if(volk_gnsssdr_machines[i]->caps > max_score) {
|
||||
max_score = volk_gnsssdr_machines[i]->caps;
|
||||
max_machine = volk_gnsssdr_machines[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
machine = max_machine;
|
||||
printf("Using Volk machine: %s\n", machine->name);
|
||||
__alignment = machine->alignment;
|
||||
__alignment_mask = (intptr_t)(__alignment-1);
|
||||
return machine;
|
||||
}
|
||||
}
|
||||
|
||||
void volk_gnsssdr_list_machines(void)
|
||||
{
|
||||
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
|
||||
extern unsigned int n_volk_gnsssdr_machines;
|
||||
|
||||
unsigned int i;
|
||||
for(i=0; i<n_volk_gnsssdr_machines; i++) {
|
||||
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
|
||||
printf("%s;", volk_gnsssdr_machines[i]->name);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
const char* volk_gnsssdr_get_machine(void)
|
||||
{
|
||||
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
|
||||
extern unsigned int n_volk_gnsssdr_machines;
|
||||
static struct volk_gnsssdr_machine *machine = NULL;
|
||||
|
||||
if(machine != NULL)
|
||||
return machine->name;
|
||||
else {
|
||||
unsigned int max_score = 0;
|
||||
unsigned int i;
|
||||
struct volk_gnsssdr_machine *max_machine = NULL;
|
||||
for(i=0; i<n_volk_gnsssdr_machines; i++) {
|
||||
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
|
||||
if(volk_gnsssdr_machines[i]->caps > max_score) {
|
||||
max_score = volk_gnsssdr_machines[i]->caps;
|
||||
max_machine = volk_gnsssdr_machines[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
machine = max_machine;
|
||||
return machine->name;
|
||||
}
|
||||
}
|
||||
|
||||
size_t volk_gnsssdr_get_alignment(void)
|
||||
{
|
||||
get_machine(); //ensures alignment is set
|
||||
return __alignment;
|
||||
}
|
||||
|
||||
bool volk_gnsssdr_is_aligned(const void *ptr)
|
||||
{
|
||||
return ((intptr_t)(ptr) & __alignment_mask) == 0;
|
||||
}
|
||||
|
||||
#define LV_HAVE_GENERIC
|
||||
#define LV_HAVE_DISPATCHER
|
||||
|
||||
#for $kern in $kernels
|
||||
|
||||
#if $kern.has_dispatcher
|
||||
#include <volk_gnsssdr/$(kern.name).h> //pulls in the dispatcher
|
||||
#end if
|
||||
|
||||
static inline void __$(kern.name)_d($kern.arglist_full)
|
||||
{
|
||||
#if $kern.has_dispatcher
|
||||
$(kern.name)_dispatcher($kern.arglist_names);
|
||||
return;
|
||||
#end if
|
||||
|
||||
if (volk_gnsssdr_is_aligned(
|
||||
#set $num_open_parens = 0
|
||||
#for $arg_type, $arg_name in $kern.args
|
||||
#if '*' in $arg_type
|
||||
VOLK_OR_PTR($arg_name,
|
||||
#set $num_open_parens += 1
|
||||
#end if
|
||||
#end for
|
||||
0$(')'*$num_open_parens)
|
||||
)){
|
||||
$(kern.name)_a($kern.arglist_names);
|
||||
}
|
||||
else{
|
||||
$(kern.name)_u($kern.arglist_names);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __init_$(kern.name)(void)
|
||||
{
|
||||
const char *name = get_machine()->$(kern.name)_name;
|
||||
const char **impl_names = get_machine()->$(kern.name)_impl_names;
|
||||
const int *impl_deps = get_machine()->$(kern.name)_impl_deps;
|
||||
const bool *alignment = get_machine()->$(kern.name)_impl_alignment;
|
||||
const size_t n_impls = get_machine()->$(kern.name)_n_impls;
|
||||
const size_t index_a = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, true/*aligned*/);
|
||||
const size_t index_u = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, false/*unaligned*/);
|
||||
$(kern.name)_a = get_machine()->$(kern.name)_impls[index_a];
|
||||
$(kern.name)_u = get_machine()->$(kern.name)_impls[index_u];
|
||||
|
||||
assert($(kern.name)_a);
|
||||
assert($(kern.name)_u);
|
||||
|
||||
$(kern.name) = &__$(kern.name)_d;
|
||||
}
|
||||
|
||||
static inline void __$(kern.name)_a($kern.arglist_full)
|
||||
{
|
||||
__init_$(kern.name)();
|
||||
$(kern.name)_a($kern.arglist_names);
|
||||
}
|
||||
|
||||
static inline void __$(kern.name)_u($kern.arglist_full)
|
||||
{
|
||||
__init_$(kern.name)();
|
||||
$(kern.name)_u($kern.arglist_names);
|
||||
}
|
||||
|
||||
static inline void __$(kern.name)($kern.arglist_full)
|
||||
{
|
||||
__init_$(kern.name)();
|
||||
$(kern.name)($kern.arglist_names);
|
||||
}
|
||||
|
||||
$kern.pname $(kern.name)_a = &__$(kern.name)_a;
|
||||
$kern.pname $(kern.name)_u = &__$(kern.name)_u;
|
||||
$kern.pname $(kern.name) = &__$(kern.name);
|
||||
|
||||
void $(kern.name)_manual($kern.arglist_full, const char* impl_name)
|
||||
{
|
||||
const int index = volk_gnsssdr_get_index(
|
||||
get_machine()->$(kern.name)_impl_names,
|
||||
get_machine()->$(kern.name)_n_impls,
|
||||
impl_name
|
||||
);
|
||||
get_machine()->$(kern.name)_impls[index](
|
||||
$kern.arglist_names
|
||||
);
|
||||
}
|
||||
|
||||
volk_gnsssdr_func_desc_t $(kern.name)_get_func_desc(void) {
|
||||
const char **impl_names = get_machine()->$(kern.name)_impl_names;
|
||||
const int *impl_deps = get_machine()->$(kern.name)_impl_deps;
|
||||
const bool *alignment = get_machine()->$(kern.name)_impl_alignment;
|
||||
const size_t n_impls = get_machine()->$(kern.name)_n_impls;
|
||||
volk_gnsssdr_func_desc_t desc = {
|
||||
impl_names,
|
||||
impl_deps,
|
||||
alignment,
|
||||
n_impls
|
||||
};
|
||||
return desc;
|
||||
}
|
||||
|
||||
#end for
|
94
src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.h
Normal file
94
src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.h
Normal file
@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Copyright 2011-2012 Free Software Foundation, Inc.
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_VOLK_RUNTIME
|
||||
#define INCLUDED_VOLK_RUNTIME
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_typedefs.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_config_fixed.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
__VOLK_DECL_BEGIN
|
||||
|
||||
typedef struct volk_gnsssdr_func_desc
|
||||
{
|
||||
const char **impl_names;
|
||||
const int *impl_deps;
|
||||
const bool *impl_alignment;
|
||||
const size_t n_impls;
|
||||
} volk_gnsssdr_func_desc_t;
|
||||
|
||||
//! Prints a list of machines available
|
||||
VOLK_API void volk_gnsssdr_list_machines(void);
|
||||
|
||||
//! Returns the name of the machine this instance will use
|
||||
VOLK_API const char* volk_gnsssdr_get_machine(void);
|
||||
|
||||
//! Get the machine alignment in bytes
|
||||
VOLK_API size_t volk_gnsssdr_get_alignment(void);
|
||||
|
||||
/*!
|
||||
* The VOLK_OR_PTR macro is a convenience macro
|
||||
* for checking the alignment of a set of pointers.
|
||||
* Example usage:
|
||||
* volk_gnsssdr_is_aligned(VOLK_OR_PTR((VOLK_OR_PTR(p0, p1), p2)))
|
||||
*/
|
||||
#define VOLK_OR_PTR(ptr0, ptr1) \
|
||||
(const void *)(((intptr_t)(ptr0)) | ((intptr_t)(ptr1)))
|
||||
|
||||
/*!
|
||||
* Is the pointer on a machine alignment boundary?
|
||||
*
|
||||
* Note: for performance reasons, this function
|
||||
* is not usable until another volk_gnsssdr API call is made
|
||||
* which will perform certain initialization tasks.
|
||||
*
|
||||
* \param ptr the pointer to some memory buffer
|
||||
* \return 1 for alignment boundary, else 0
|
||||
*/
|
||||
VOLK_API bool volk_gnsssdr_is_aligned(const void *ptr);
|
||||
|
||||
#for $kern in $kernels
|
||||
|
||||
//! A function pointer to the dispatcher implementation
|
||||
extern VOLK_API $kern.pname $kern.name;
|
||||
|
||||
//! A function pointer to the fastest aligned implementation
|
||||
extern VOLK_API $kern.pname $(kern.name)_a;
|
||||
|
||||
//! A function pointer to the fastest unaligned implementation
|
||||
extern VOLK_API $kern.pname $(kern.name)_u;
|
||||
|
||||
//! Call into a specific implementation given by name
|
||||
extern VOLK_API void $(kern.name)_manual($kern.arglist_full, const char* impl_name);
|
||||
|
||||
//! Get description paramaters for this kernel
|
||||
extern VOLK_API volk_gnsssdr_func_desc_t $(kern.name)_get_func_desc(void);
|
||||
#end for
|
||||
|
||||
__VOLK_DECL_END
|
||||
|
||||
#endif /*INCLUDED_VOLK_RUNTIME*/
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user