Master branch + volk_gnsssdr module

This commit is contained in:
Andrés Cecilia Luque 2014-09-08 01:56:09 +02:00
parent 490879c33a
commit 60cc3777b6
109 changed files with 15955 additions and 178 deletions

View File

@ -16,26 +16,58 @@
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
message(FATAL_ERROR "Prevented in-tree build. This is bad practice. Try 'cd build && cmake ../' ")
endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
########################################################################
# Project setup
########################################################################
if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
message(FATAL_ERROR "Prevented in-tree build. This is bad practice. Try 'cd build && cmake ../' ")
endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
cmake_minimum_required(VERSION 2.8)
project(gnss-sdr CXX C)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
file(RELATIVE_PATH RELATIVE_CMAKE_CALL ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
########################################################################
# Determine optional blocks/libraries to be built (default: not built)
# Enable them here or at the command line by doing 'cmake -DENABLE_XXX=ON ../'
########################################################################
option(ENABLE_GN3S "Enable the use of the GN3S dongle as signal source (experimental)" OFF)
option(ENABLE_ARRAY "Enable the use of CTTC's antenna array front-end as signal source (experimental)" OFF)
option(ENABLE_RTLSDR "Enable the use of RTL dongles as signal source (experimental)" OFF)
option(ENABLE_OPENCL "Enable building of processing blocks implemented with OpenCL (experimental)" OFF)
option(ENABLE_GPERFTOOLS "Enable linking to Gperftools libraries (tcmalloc and profiler)" OFF)
option(ENABLE_GENERIC_ARCH "Builds a portable binary" OFF)
option(ENABLE_VOLK_GNSSSDR "Enable building of volk_gnsssdr module: some volk protokernels coded by gnss-sdr" OFF)
###############################
# GNSS-SDR version information
###############################
# Get the current working branch
execute_process(
COMMAND git rev-parse --abbrev-ref HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_BRANCH
OUTPUT_STRIP_TRAILING_WHITESPACE
)
# Get the latest abbreviated commit hash of the working branch
execute_process(
COMMAND git log -1 --format=%h
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_COMMIT_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE
)
# Set the version information here
set(VERSION_INFO_MAJOR_VERSION 0)
set(VERSION_INFO_API_COMPAT 0)
set(VERSION_INFO_MINOR_VERSION 3)
set(VERSION_INFO_MINOR_VERSION 3.git-${GIT_BRANCH}-${GIT_COMMIT_HASH})
set(VERSION ${VERSION_INFO_MAJOR_VERSION}.${VERSION_INFO_API_COMPAT}.${VERSION_INFO_MINOR_VERSION})
file(RELATIVE_PATH RELATIVE_CMAKE_CALL ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
########################################################################
# Environment setup
@ -156,10 +188,16 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
endif(${DARWIN_VERSION} MATCHES "10")
endif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
#select the release build type by default to get optimization flags
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
message(STATUS "Build type not specified: defaulting to Release.")
if(ENABLE_GPERFTOOLS)
set(CMAKE_BUILD_TYPE "RelWithDebInfo")
message(STATUS "Build type not specified: defaulting to RelWithDebInfo.")
else(ENABLE_GPERFTOOLS)
set(CMAKE_BUILD_TYPE "Release")
message(STATUS "Build type not specified: defaulting to Release.")
endif(ENABLE_GPERFTOOLS)
endif(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "")
@ -182,6 +220,7 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
endif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
endif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
################################################################################
# Googletest - http://code.google.com/p/googletest/
################################################################################
@ -203,7 +242,6 @@ endif(GTEST_DIR)
################################################################################
# Boost - http://www.boost.org
################################################################################
if(UNIX AND EXISTS "/usr/lib64")
list(APPEND BOOST_LIBRARYDIR "/usr/lib64") # Fedora 64-bit fix
endif(UNIX AND EXISTS "/usr/lib64")
@ -231,9 +269,7 @@ endif(NOT Boost_FOUND)
################################################################################
# GNU Radio - http://gnuradio.org/redmine/projects/gnuradio/wiki
################################################################################
find_package(Gnuradio)
if(NOT GNURADIO_RUNTIME_FOUND)
message(STATUS "CMake cannot find GNU Radio >= 3.7")
if(OS_IS_LINUX)
@ -281,6 +317,40 @@ if(NOT GNURADIO_TRELLIS_FOUND)
endif()
###############################################################################
# Volk_gnsssdr module
#In order to use volk_gnsssr module it is necessary to add:
# 1) include_directories(..${VOLK_GNSSSDR_INCLUDE_DIRS}..)
# 2) target_link_libraries(..${VOLK_GNSSSDR_LIBRARIES}..)
###############################################################################
if(ENABLE_VOLK_GNSSSDR)
message(STATUS "The volk_gnsssdr module with custom protokernels coded by gnss-sdr will be compiled.")
message(STATUS "You can disable it with 'cmake -DENABLE_VOLK_GNSSSDR=OFF ../'" )
else(ENABLE_VOLK_GNSSSDR)
message(STATUS "The volk_gnsssdr module with custom protokernels coded by gnss-sdr is not enabled. Some configurations that use custom protokernels will not work." )
message(STATUS "Enable it with 'cmake -D ENABLE_VOLK_GNSSSDR=ON ../'." )
endif(ENABLE_VOLK_GNSSSDR)
if(ENABLE_VOLK_GNSSSDR)
set(VOLK_GNSSSDR_BASE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/src/algorithms/libs/volk_gnsssdr)
add_subdirectory(${VOLK_GNSSSDR_BASE_PATH})
set(VOLK_GNSSSDR_INCLUDE_DIRS
${VOLK_GNSSSDR_BASE_PATH}/include
${CMAKE_CURRENT_BINARY_DIR}/src/algorithms/libs/volk_gnsssdr/include
)
set(VOLK_GNSSSDR_LIBRARIES
#Path to libs of volk_gnsssdr target: ${VOLK_GNSSSDR_BASE_PATH}/lib/Debug/libvolk_gnsssdr.dylib
volk_gnsssdr
)
message(" * INCLUDES: ${VOLK_GNSSSDR_INCLUDE_DIRS} ")
message(" * LIBS: ${VOLK_GNSSSDR_LIBRARIES} ")
message("-- END OF: Setup volk_gnsssdr as a subproject.")
endif(ENABLE_VOLK_GNSSSDR)
################################################################################
# gflags - http://code.google.com/p/gflags/
@ -356,7 +426,6 @@ endif(NOT GFlags_FOUND OR LOCAL_GLOG)
################################################################################
# glog - http://code.google.com/p/google-glog/
################################################################################
find_package(GLOG)
set(glog_RELEASE 0.3.3)
if (NOT GLOG_FOUND OR LOCAL_GFLAGS)
@ -458,97 +527,14 @@ endif(NOT GLOG_FOUND OR LOCAL_GFLAGS)
################################################################################
# GPerftools - http://code.google.com/p/gperftools/
################################################################################
set(GCC_GPERFTOOLS_FLAGS "")
find_package(Gperftools)
if ( NOT GPERFTOOLS_FOUND )
message(STATUS "The optional library GPerftools has not been found.")
else( NOT GPERFTOOLS_FOUND )
message (STATUS "GPerftools library found." )
link_libraries(${GPERFTOOLS_PROFILER} ${GPERFTOOLS_TCMALLOC})
endif( NOT GPERFTOOLS_FOUND )
list(APPEND CMAKE_CXX_FLAGS ${GCC_GPERFTOOLS_FLAGS})
################################################################################
# Doxygen - http://www.stack.nl/~dimitri/doxygen/index.html
################################################################################
find_package(Doxygen)
if(DOXYGEN_FOUND)
message(STATUS "Doxygen found.")
message(STATUS "You can build the documentation with 'make doc'." )
message(STATUS "When done, point your browser to ${CMAKE_SOURCE_DIR}/html/index.html")
set(HAVE_DOT ${DOXYGEN_DOT_FOUND})
file(TO_NATIVE_PATH ${CMAKE_SOURCE_DIR} top_srcdir)
file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} top_builddir)
find_package(LATEX)
if (PDFLATEX_COMPILER)
set(GENERATE_PDF_DOCUMENTATION "YES")
set(GNSSSDR_USE_MATHJAX "NO")
else(PDFLATEX_COMPILER)
set(GENERATE_PDF_DOCUMENTATION "NO")
set(GNSSSDR_USE_MATHJAX "YES")
endif(PDFLATEX_COMPILER)
configure_file(${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile.in
${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile
@ONLY
)
add_custom_target(doc
${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
COMMENT "Generating API documentation with Doxygen." VERBATIM
)
if(LATEX_COMPILER)
message(STATUS "'make pdfmanual' will generate a manual at ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf")
add_custom_target(pdfmanual
COMMAND ${CMAKE_MAKE_PROGRAM}
COMMAND ${CMAKE_COMMAND} -E copy refman.pdf ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf
COMMAND ${CMAKE_MAKE_PROGRAM} clean
DEPENDS doc
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/docs/latex
COMMENT "Generating PDF manual with Doxygen." VERBATIM
)
endif(LATEX_COMPILER)
message(STATUS "'make doc-clean' will clean the documentation.")
add_custom_target(doc-clean
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/html
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/latex
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf
COMMENT "Cleaning documentation." VERBATIM
)
else(DOXYGEN_FOUND)
message(STATUS " Doxygen has not been found in your system.")
message(STATUS " You can get nice code documentation by using it!")
message(STATUS " Get it from http://www.stack.nl/~dimitri/doxygen/index.html")
if(OS_IS_LINUX)
if(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
message(" or simply by doing 'sudo yum install doxygen-latex'.")
else(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
message(" or simply by doing 'sudo apt-get install doxygen-latex'.")
endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
endif(OS_IS_LINUX)
if(OS_IS_MACOSX)
message(STATUS " or simply by doing 'sudo port install doxygen +latex'.")
endif(OS_IS_MACOSX)
endif(DOXYGEN_FOUND)
################################################################################
# Armadillo - http://arma.sourceforge.net/
################################################################################
if(OS_IS_LINUX)
#############################################
#############################################################################
# Check that LAPACK is found in the system
#############################################
# LAPACK is required for matrix decompositions (eg. SVD) and matrix inverse.
#############################################################################
find_library(LAPACK lapack)
if(NOT LAPACK)
message(" The LAPACK library has not been found.")
@ -562,9 +548,11 @@ if(OS_IS_LINUX)
endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
message(FATAL_ERROR "LAPACK is required to build gnss-sdr")
endif(NOT LAPACK)
#############################################
#############################################################################
# Check that BLAS is found in the system
#############################################
# BLAS is used for matrix multiplication.
# Without BLAS, matrix multiplication will still work, but might be slower.
#############################################################################
find_library(BLAS blas)
if(NOT BLAS)
message(" The BLAS library has not been found.")
@ -641,31 +629,22 @@ if(NOT ARMADILLO_FOUND)
endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
message(FATAL_ERROR "The patch command is required to download and build armadillo")
endif(NOT PATCH_EXECUTABLE)
set(armadillo_RELEASE 4.300.9)
set(armadillo_MD5 "d51d1beb2a335f3002702d112c4814f3")
set(armadillo_RELEASE 4.400.0)
set(armadillo_MD5 "616744dbc96af1c5d6d32c6c69f6fe94")
if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}/armadillo-${armadillo_RELEASE}.tar.gz)
set(ARMADILLO_PATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_no.patch)
file(WRITE ${ARMADILLO_PATCH_FILE} "")
set(ARMADILLO_PATCH_FILE2 ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_no2.patch)
file(WRITE ${ARMADILLO_PATCH_FILE2} "")
else(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}/armadillo-${armadillo_RELEASE}.tar.gz)
set(ARMADILLO_PATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_staticlib.patch)
set(ARMADILLO_PATCH_FILE2 ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_enable_lapack.patch)
set(ARMADILLO_PATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_enable_lapack.patch)
file(WRITE ${ARMADILLO_PATCH_FILE}
"30c30
< set(ARMA_USE_LAPACK false)
---
> set(ARMA_USE_LAPACK true)
312c312
< add_library( armadillo SHARED \${PROJECT_SOURCE_DIR}/src/wrapper.cpp )
---
> add_library( armadillo STATIC \${PROJECT_SOURCE_DIR}/src/wrapper.cpp )
")
file(WRITE ${ARMADILLO_PATCH_FILE2}
"12c12
< // #define ARMA_USE_LAPACK
---
> #define ARMA_USE_LAPACK
> #define ARMA_USE_LAPACK
19c19
< // #define ARMA_USE_BLAS
---
> #define ARMA_USE_BLAS
")
endif(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}/armadillo-${armadillo_RELEASE}.tar.gz)
ExternalProject_Add(
@ -673,9 +652,9 @@ if(NOT ARMADILLO_FOUND)
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}
URL http://sourceforge.net/projects/arma/files/armadillo-${armadillo_RELEASE}.tar.gz
DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}
URL_MD5 ${armadillo_MD5}
PATCH_COMMAND patch -N <BINARY_DIR>/CMakeLists.txt ${ARMADILLO_PATCH_FILE} && patch -N <BINARY_DIR>/include/armadillo_bits/config.hpp ${ARMADILLO_PATCH_FILE2}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
URL_MD5 ${armadillo_MD5}
PATCH_COMMAND patch -N <BINARY_DIR>/include/armadillo_bits/config.hpp ${ARMADILLO_PATCH_FILE}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DBUILD_SHARED_LIBS=OFF
BUILD_IN_SOURCE 1
BUILD_COMMAND make
UPDATE_COMMAND ""
@ -686,7 +665,14 @@ if(NOT ARMADILLO_FOUND)
ExternalProject_Get_Property(armadillo-${armadillo_RELEASE} binary_dir)
set(ARMADILLO_INCLUDE_DIRS ${binary_dir}/include )
find_library(LAPACK NAMES lapack HINTS /usr/lib /usr/local/lib /usr/lib64)
set(ARMADILLO_LIBRARIES ${LAPACK} ${GFORTRAN} ${binary_dir}/${CMAKE_FIND_LIBRARY_PREFIXES}armadillo.a)
if(OS_IS_MACOSX)
find_library(BLAS blas)
endif(OS_IS_MACOSX)
find_package(OpenBLAS)
if(OPENBLAS_FOUND)
set(BLAS ${OPENBLAS})
endif(OPENBLAS_FOUND)
set(ARMADILLO_LIBRARIES ${BLAS} ${LAPACK} ${GFORTRAN} ${binary_dir}/${CMAKE_FIND_LIBRARY_PREFIXES}armadillo.a)
set(LOCAL_ARMADILLO true CACHE STRING "Armadillo downloaded and built automatically" FORCE)
# Save a copy at the thirdparty folder
file(COPY ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}
@ -700,27 +686,6 @@ endif(NOT ARMADILLO_FOUND)
###############################################################################
# OpenCL
###############################################################################
find_package(OpenCL)
if($ENV{DISABLE_OPENCL})
set(DISABLE_OPENCL TRUE)
endif($ENV{DISABLE_OPENCL})
if(DISABLE_OPENCL)
set(OPENCL_FOUND FALSE)
else(DISABLE_OPENCL)
if(OPENCL_FOUND)
message(STATUS "OpenCL has been found and will be used by some processing blocks")
message(STATUS "You can disable OpenCL use by doing 'cmake -DDISABLE_OPENCL=1 ../' ")
endif(OPENCL_FOUND)
endif(DISABLE_OPENCL)
if(NOT OPENCL_FOUND)
message(STATUS "Processing blocks using OpenCL will not be built.")
endif(NOT OPENCL_FOUND)
################################################################################
# OpenSSL - http://www.openssl.org
################################################################################
@ -742,41 +707,173 @@ if(NOT OPENSSL_FOUND)
endif(NOT OPENSSL_FOUND)
################################################################################
# Doxygen - http://www.stack.nl/~dimitri/doxygen/index.html (OPTIONAL)
################################################################################
find_package(Doxygen)
if(DOXYGEN_FOUND)
message(STATUS "Doxygen found.")
message(STATUS "You can build the documentation with 'make doc'." )
message(STATUS "When done, point your browser to ${CMAKE_SOURCE_DIR}/html/index.html")
set(HAVE_DOT ${DOXYGEN_DOT_FOUND})
file(TO_NATIVE_PATH ${CMAKE_SOURCE_DIR} top_srcdir)
file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} top_builddir)
find_package(LATEX)
if (PDFLATEX_COMPILER)
set(GENERATE_PDF_DOCUMENTATION "YES")
set(GNSSSDR_USE_MATHJAX "NO")
else(PDFLATEX_COMPILER)
set(GENERATE_PDF_DOCUMENTATION "NO")
set(GNSSSDR_USE_MATHJAX "YES")
endif(PDFLATEX_COMPILER)
configure_file(${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile.in
${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile
@ONLY
)
add_custom_target(doc
${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
COMMENT "Generating API documentation with Doxygen." VERBATIM
)
if(LATEX_COMPILER)
message(STATUS "'make pdfmanual' will generate a manual at ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf")
add_custom_target(pdfmanual
COMMAND ${CMAKE_MAKE_PROGRAM}
COMMAND ${CMAKE_COMMAND} -E copy refman.pdf ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf
COMMAND ${CMAKE_MAKE_PROGRAM} clean
DEPENDS doc
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/docs/latex
COMMENT "Generating PDF manual with Doxygen." VERBATIM
)
endif(LATEX_COMPILER)
message(STATUS "'make doc-clean' will clean the documentation.")
add_custom_target(doc-clean
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/html
COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/latex
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf
COMMENT "Cleaning documentation." VERBATIM
)
else(DOXYGEN_FOUND)
message(STATUS " Doxygen has not been found in your system.")
message(STATUS " You can get nice code documentation by using it!")
message(STATUS " Get it from http://www.stack.nl/~dimitri/doxygen/index.html")
if(OS_IS_LINUX)
if(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
message(" or simply by doing 'sudo yum install doxygen-latex'.")
else(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
message(" or simply by doing 'sudo apt-get install doxygen-latex'.")
endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat")
endif(OS_IS_LINUX)
if(OS_IS_MACOSX)
message(STATUS " or simply by doing 'sudo port install doxygen +latex'.")
endif(OS_IS_MACOSX)
endif(DOXYGEN_FOUND)
###############################################################################
# OpenCL (OPTIONAL)
###############################################################################
if(ENABLE_OPENCL)
find_package(OpenCL)
if($ENV{DISABLE_OPENCL})
set(DISABLE_OPENCL TRUE)
endif($ENV{DISABLE_OPENCL})
if(DISABLE_OPENCL)
set(OPENCL_FOUND FALSE)
else(DISABLE_OPENCL)
if(OPENCL_FOUND)
message(STATUS "OpenCL has been found and will be used by some processing blocks")
message(STATUS "You can disable OpenCL use by doing 'cmake -DENABLE_OPENCL=OFF ../' ")
endif(OPENCL_FOUND)
endif(DISABLE_OPENCL)
if(ENABLE_GENERIC_ARCH)
set(OPENCL_FOUND FALSE)
message(STATUS "ENABLE_GENERIC_ARCH is set to ON so the use of OpenCL has been disabled.")
endif(ENABLE_GENERIC_ARCH)
if(NOT OPENCL_FOUND)
message(STATUS "Processing blocks using OpenCL will not be built.")
endif(NOT OPENCL_FOUND)
else(ENABLE_OPENCL)
set(OPENCL_FOUND FALSE)
endif(ENABLE_OPENCL)
################################################################################
# GPerftools - http://code.google.com/p/gperftools/ (OPTIONAL)
################################################################################
if(ENABLE_GPERFTOOLS)
find_package(Gperftools)
if ( NOT GPERFTOOLS_FOUND )
message(STATUS "Although ENABLE_GPERFTOOLS has been set to ON, GPerftools has not been found.")
message(STATUS "Binaries will be compiled without 'tcmalloc' and 'profiler' libraries.")
message(STATUS "You can install GPerftools from http://code.google.com/p/gperftools/")
else( NOT GPERFTOOLS_FOUND )
message(STATUS "GPerftools libraries found." )
message(STATUS "Binaries will be compiled with 'tcmalloc' and 'profiler' libraries.")
endif( NOT GPERFTOOLS_FOUND )
endif(ENABLE_GPERFTOOLS)
################################################################################
# Setup of optional drivers
################################################################################
if( $ENV{GN3S_DRIVER} )
message(STATUS "GN3S_DRIVER variable found." )
# copy firmware to install folder
# Build project gr-gn3s
else( $ENV{GN3S_DRIVER} )
if( GN3S_DRIVER )
message(STATUS "GN3S driver will be compiled")
else( GNSS_DRIVER )
message(STATUS "GN3S_DRIVER is not defined." )
message(STATUS "Define it with 'export GN3S_DRIVER=1' to add support for the GN3S dongle." )
endif( GN3S_DRIVER )
endif($ENV{GN3S_DRIVER} )
if( $ENV{RAW_ARRAY_DRIVER} )
message(STATUS "RAW_ARRAY_DRIVER variable found." )
if($ENV{GN3S_DRIVER})
message(STATUS "GN3S_DRIVER environment variable found." )
set(ENABLE_GN3S ON)
endif($ENV{GN3S_DRIVER})
if(GN3S_DRIVER)
set(ENABLE_GN3S ON)
endif(GN3S_DRIVER)
if(ENABLE_GN3S)
message(STATUS "The GN3S driver will be compiled.")
message(STATUS "You can disable it with 'cmake -DENABLE_GN3S=OFF ../'" )
else(ENABLE_GN3S)
message(STATUS "The (optional and experimental) GN3S driver is not enabled." )
message(STATUS "Enable it with 'cmake -DENABLE_GN3S=ON ../' to add support for the GN3S dongle." )
endif(ENABLE_GN3S)
if($ENV{RAW_ARRAY_DRIVER})
message(STATUS "RAW_ARRAY_DRIVER environment variable found." )
set(ENABLE_ARRAY ON)
endif($ENV{RAW_ARRAY_DRIVER})
if(RAW_ARRAY_DRIVER)
set(ENABLE_ARRAY ON)
endif(RAW_ARRAY_DRIVER)
if(ENABLE_ARRAY)
message(STATUS "CTTC's Antenna Array front-end driver will be compiled." )
message(STATUS "You can disable it with 'cmake -DENABLE_ARRAY=OFF ../'" )
# copy firmware to install folder
# Build project gr-dbfcttc
else( $ENV{RAW_ARRAY_DRIVER} )
if( RAW_ARRAY_DRIVER )
message(STATUS "RAW_ARRAY_DRIVER driver will be compiled")
else( RAW_ARRAY_DRIVER )
message(STATUS "RAW_ARRAY_DRIVER is not defined." )
message(STATUS "Define it with 'export RAW_ARRAY_DRIVER=1' to add support for the CTTC experimental array front-end." )
endif( RAW_ARRAY_DRIVER )
endif($ENV{RAW_ARRAY_DRIVER} )
else(ENABLE_ARRAY)
message(STATUS "The (optional) CTTC's Antenna Array front-end driver is not enabled." )
message(STATUS "Enable it with 'cmake -DENABLE_ARRAY=ON ../' to add support for the CTTC experimental array front-end." )
endif(ENABLE_ARRAY)
if( $ENV{RTLSDR_DRIVER} )
message(STATUS "RTLSDR_DRIVER variable found." )
if($ENV{RTLSDR_DRIVER})
message(STATUS "RTLSDR_DRIVER environment variable found." )
set(ENABLE_RTLSDR ON)
endif($ENV{RTLSDR_DRIVER})
if(RAW_ARRAY_DRIVER)
set(ENABLE_RTLSDR ON)
endif(RAW_ARRAY_DRIVER)
if(ENABLE_RTLSDR)
message(STATUS "The driver for RTL-based dongles will be compiled." )
message(STATUS "You can disable it with 'cmake -DENABLE_RTLSDR=OFF ../'" )
# find libosmosdr (done in src/algorithms/signal_sources/adapters)
# find gr-osmosdr (done in src/algorithms/signal_sources/adapters)
endif($ENV{RTLSDR_DRIVER} )
else(ENABLE_RTLSDR)
message(STATUS "The (optional) driver for RTL-based dongles is not enabled." )
message(STATUS "Enable it with 'cmake -DENABLE_RTLSDR=ON ../' to add support for Realtek's RTL2832U-based USB dongles." )
endif(ENABLE_RTLSDR)
########################################################################
@ -802,7 +899,11 @@ if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
if(OS_IS_MACOSX)
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -march=corei7 -mfpmath=sse")
else(OS_IS_MACOSX)
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -march=native -mfpmath=sse")
if(ENABLE_GENERIC_ARCH)
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -mtune=generic")
else(ENABLE_GENERIC_ARCH)
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -march=native -mfpmath=sse")
endif(ENABLE_GENERIC_ARCH)
endif(OS_IS_MACOSX)
endif(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
@ -811,13 +912,18 @@ if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
add_definitions(-fvisibility=hidden)
endif()
# Set GPerftools related flags if it is available
# See http://gperftools.googlecode.com/svn/trunk/README
if(GPERFTOOLS_FOUND)
if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free")
endif(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
endif(GPERFTOOLS_FOUND)
if(ENABLE_GPERFTOOLS)
# Set GPerftools related flags if it is available
# See http://gperftools.googlecode.com/svn/trunk/README
if(GPERFTOOLS_FOUND)
if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free")
endif(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -fno-builtin")
endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
endif(GPERFTOOLS_FOUND)
endif(ENABLE_GPERFTOOLS)
list(APPEND CMAKE_CXX_FLAGS ${MY_CXX_FLAGS})

View File

@ -0,0 +1,183 @@
#
# Copyright 2011 Free Software Foundation, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
# Project setup
########################################################################
cmake_minimum_required(VERSION 2.6)
if(NOT DEFINED CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "Choose build type: None Debug Release RelWithDebInfo MinSizeRel")
project(volk_gnsssdr)
enable_language(CXX)
enable_language(C)
enable_testing()
set(VERSION 0.1)
set(LIBVER 0.0.0)
set(CMAKE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) #allows this to be a sub-project
set(CMAKE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) #allows this to be a sub-project
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) #location for custom "Modules"
########################################################################
# Environment setup
########################################################################
IF(NOT DEFINED BOOST_ROOT)
SET(BOOST_ROOT ${CMAKE_INSTALL_PREFIX})
ENDIF()
IF(NOT DEFINED CROSSCOMPILE_MULTILIB)
SET(CROSSCOMPILE_MULTILIB "")
ENDIF()
SET(CROSSCOMPILE_MULTILIB ${CROSSCOMPILE_MULTILIB} CACHE STRING "Define \"true\" if you have and want to use multiple C development libs installed for cross compile")
########################################################################
# Dependencies setup
########################################################################
include(GrPython) #sets PYTHON_EXECUTABLE and PYTHON_DASH_B
VOLK_PYTHON_CHECK_MODULE("python >= 2.5" sys "sys.version.split()[0] >= '2.5'" PYTHON_MIN_VER_FOUND)
VOLK_PYTHON_CHECK_MODULE("Cheetah >= 2.0.0" Cheetah "Cheetah.Version >= '2.0.0'" CHEETAH_FOUND)
if(NOT PYTHON_MIN_VER_FOUND)
message(FATAL_ERROR "Python 2.5 or greater required to build VOLK")
endif()
if(NOT CHEETAH_FOUND)
message(FATAL_ERROR "Cheetah templates required to build VOLK")
endif()
if(MSVC)
if (NOT DEFINED BOOST_ALL_DYN_LINK)
set(BOOST_ALL_DYN_LINK TRUE)
endif()
set(BOOST_ALL_DYN_LINK "${BOOST_ALL_DYN_LINK}" CACHE BOOL "boost enable dynamic linking")
if(BOOST_ALL_DYN_LINK)
add_definitions(-DBOOST_ALL_DYN_LINK) #setup boost auto-linking in msvc
else(BOOST_ALL_DYN_LINK)
unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link
endif(BOOST_ALL_DYN_LINK)
endif(MSVC)
include(VolkBoost)
if(NOT Boost_FOUND)
message(FATAL_ERROR "VOLK Requires boost to build")
endif()
option(ENABLE_ORC "Enable Orc" True)
if(ENABLE_ORC)
find_package(ORC)
else(ENABLE_ORC)
message(STATUS "Disabling use of ORC")
endif(ENABLE_ORC)
########################################################################
# Setup the package config file
########################################################################
#set variables found in the pc.in file
set(prefix ${CMAKE_INSTALL_PREFIX})
set(exec_prefix "\${prefix}")
set(libdir "\${exec_prefix}/lib${LIB_SUFFIX}")
set(includedir "\${prefix}/include")
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr.pc.in
${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.pc
@ONLY)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.pc
DESTINATION lib${LIB_SUFFIX}/pkgconfig
COMPONENT "volk_gnsssdr_devel"
)
########################################################################
# Install all headers in the include directories
########################################################################
set(VOLK_RUNTIME_DIR bin)
set(VOLK_LIBRARY_DIR lib${LIB_SUFFIX})
set(VOLK_INCLUDE_DIR include)
install(
DIRECTORY ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr
DESTINATION include COMPONENT "volk_gnsssdr_devel"
FILES_MATCHING PATTERN "*.h"
)
install(FILES
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_prefs.h
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_complex.h
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_common.h
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h
${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_typedefs.h
${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_malloc.h
DESTINATION include/volk_gnsssdr
COMPONENT "volk_gnsssdr_devel"
)
########################################################################
# Install cmake search routine for external use
########################################################################
if(NOT CMAKE_MODULES_DIR)
set(CMAKE_MODULES_DIR lib${LIB_SUFFIX}/cmake)
endif(NOT CMAKE_MODULES_DIR)
install(
FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/VolkConfig.cmake
DESTINATION ${CMAKE_MODULES_DIR}/volk_gnsssdr
COMPONENT "volk_gnsssdr_devel"
)
########################################################################
# On Apple only, set install name and use rpath correctly, if not already set
########################################################################
if(APPLE)
if(NOT CMAKE_INSTALL_NAME_DIR)
set(CMAKE_INSTALL_NAME_DIR
${CMAKE_INSTALL_PREFIX}/${GR_LIBRARY_DIR} CACHE
PATH "Library Install Name Destination Directory" FORCE)
endif(NOT CMAKE_INSTALL_NAME_DIR)
if(NOT CMAKE_INSTALL_RPATH)
set(CMAKE_INSTALL_RPATH
${CMAKE_INSTALL_PREFIX}/${GR_LIBRARY_DIR} CACHE
PATH "Library Install RPath" FORCE)
endif(NOT CMAKE_INSTALL_RPATH)
if(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON CACHE
BOOL "Do Build Using Library Install RPath" FORCE)
endif(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
endif(APPLE)
########################################################################
# Setup the library
########################################################################
add_subdirectory(lib)
########################################################################
# And the utility apps
########################################################################
add_subdirectory(apps)
add_subdirectory(python/volk_gnsssdr_modtool)
########################################################################
# Print summary
########################################################################
message(STATUS "Using install prefix: ${CMAKE_INSTALL_PREFIX}")

View File

@ -0,0 +1,61 @@
#
# Copyright 2011-2013 Free Software Foundation, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
# Setup profiler
########################################################################
if(Boost_FOUND)
if(MSVC)
include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc)
endif(MSVC)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_SOURCE_DIR}/include
${CMAKE_BINARY_DIR}/include
${CMAKE_SOURCE_DIR}/lib
${CMAKE_BINARY_DIR}/lib
${Boost_INCLUDE_DIRS}
)
# MAKE volk_gnsssdr_profile
add_executable(volk_gnsssdr_profile
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_profile.cc
${CMAKE_SOURCE_DIR}/lib/qa_utils.cc
)
target_link_libraries(volk_gnsssdr_profile volk_gnsssdr ${Boost_LIBRARIES})
install(
TARGETS volk_gnsssdr_profile
DESTINATION bin
COMPONENT "volk_gnsssdr"
)
# MAKE volk_gnsssdr-config-info
add_executable(volk_gnsssdr-config-info volk_gnsssdr-config-info.cc)
target_link_libraries(volk_gnsssdr-config-info volk_gnsssdr ${Boost_LIBRARIES})
install(
TARGETS volk_gnsssdr-config-info
DESTINATION bin
COMPONENT "volk_gnsssdr"
)
endif(Boost_FOUND)

View File

@ -0,0 +1,96 @@
/* -*- c++ -*- */
/*
* Copyright 2013 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include <config.h>
#endif
#include <volk_gnsssdr/constants.h>
#include "volk_gnsssdr/volk_gnsssdr.h"
#include <boost/program_options.hpp>
#include <iostream>
namespace po = boost::program_options;
int
main(int argc, char **argv)
{
po::options_description desc("Program options: volk_gnsssdr-config-info [options]");
po::variables_map vm;
desc.add_options()
("help,h", "print help message")
("prefix", "print VOLK installation prefix")
("builddate", "print VOLK build date (RFC2822 format)")
("cc", "print VOLK C compiler version")
("cflags", "print VOLK CFLAGS")
("all-machines", "print VOLK machines built into library")
("avail-machines", "print VOLK machines the current platform can use")
("machine", "print the VOLK machine that will be used")
("version,v", "print VOLK version")
;
try {
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);
}
catch (po::error& error){
std::cerr << "Error: " << error.what() << std::endl << std::endl;
std::cerr << desc << std::endl;
return 1;
}
if(vm.size() == 0 || vm.count("help")) {
std::cout << desc << std::endl;
return 1;
}
if(vm.count("prefix"))
std::cout << volk_gnsssdr_prefix() << std::endl;
if(vm.count("builddate"))
std::cout << volk_gnsssdr_build_date() << std::endl;
if(vm.count("version"))
std::cout << volk_gnsssdr_version() << std::endl;
if(vm.count("cc"))
std::cout << volk_gnsssdr_c_compiler() << std::endl;
if(vm.count("cflags"))
std::cout << volk_gnsssdr_compiler_flags() << std::endl;
// stick an extra ';' to make output of this and avail-machines the
// same structure for easier parsing
if(vm.count("all-machines"))
std::cout << volk_gnsssdr_available_machines() << ";" << std::endl;
if(vm.count("avail-machines")) {
volk_gnsssdr_list_machines();
}
if(vm.count("machine")) {
std::cout << volk_gnsssdr_get_machine() << std::endl;
}
return 0;
}

View File

@ -0,0 +1,163 @@
/* -*- c++ -*- */
/*
* Copyright 2012-2014 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#include "qa_utils.h"
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <volk_gnsssdr/volk_gnsssdr_prefs.h>
#include <ciso646>
#include <vector>
#include <boost/foreach.hpp>
#include <boost/filesystem.hpp>
#include <boost/program_options.hpp>
#include <iostream>
#include <fstream>
#include <sys/stat.h>
#include <sys/types.h>
namespace fs = boost::filesystem;
int main(int argc, char *argv[]) {
// Adding program options
boost::program_options::options_description desc("Options");
desc.add_options()
("help,h", "Print help messages")
("benchmark,b",
boost::program_options::value<bool>()->default_value( false )
->implicit_value( true ),
"Run all kernels (benchmark mode)")
("tests-regex,R",
boost::program_options::value<std::string>(),
"Run tests matching regular expression.")
;
// Handle the options that were given
boost::program_options::variables_map vm;
bool benchmark_mode;
std::string kernel_regex;
bool store_results = true;
try {
boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm);
boost::program_options::notify(vm);
benchmark_mode = vm.count("benchmark")?vm["benchmark"].as<bool>():false;
if ( vm.count("tests-regex" ) ) {
kernel_regex = vm["tests-regex"].as<std::string>();
store_results = false;
std::cout << "Warning: using a regexp will not save results to a config" << std::endl;
}
else {
kernel_regex = ".*";
store_results = true;
}
} catch (boost::program_options::error& error) {
std::cerr << "Error: " << error.what() << std::endl << std::endl;
std::cerr << desc << std::endl;
return 1;
}
/** --help option
*/
if ( vm.count("help") )
{
std::cout << "The VOLK profiler." << std::endl
<< desc << std::endl;
return 0;
}
// Run tests
std::vector<std::string> results;
//VOLK_PROFILE(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_max_star_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_64u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
//VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, lv_32fc_t(1.0, 0.5), 204602, 1000, &results, benchmark_mode, kernel_regex);
// Until we can update the config on a kernel by kernel basis
// do not overwrite volk_gnsssdr_config when using a regex.
//GNSS-SDR PROTO-KERNELS
//lv_32fc_t sfv = lv_cmake((float)1, (float)2);
//VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, sfv, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16i_s32f_convert_32f, 1e-4, 32768.0, 204602, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32f_accumulator_s32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32f_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8i_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8i_max_s8i, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32f_x2_add_32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_conjugate_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_magnitude_squared_32f, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_x2_dot_prod_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_x2_multiply_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
if(store_results) {
char path[1024];
volk_gnsssdr_get_config_path(path);
const fs::path config_path(path);
if (not fs::exists(config_path.branch_path()))
{
std::cout << "Creating " << config_path.branch_path() << "..." << std::endl;
fs::create_directories(config_path.branch_path());
}
std::cout << "Writing " << config_path << "..." << std::endl;
std::ofstream config(config_path.string().c_str());
if(!config.is_open()) { //either we don't have write access or we don't have the dir yet
std::cout << "Error opening file " << config_path << std::endl;
}
config << "\
#this file is generated by volk_gnsssdr_profile.\n\
#the function name is followed by the preferred architecture.\n\
";
BOOST_FOREACH(std::string result, results) {
config << result << std::endl;
}
config.close();
}
else {
std::cout << "Warning: config not generated" << std::endl;
}
}

View File

@ -0,0 +1,138 @@
# CMAKE_PARSE_ARGUMENTS(<prefix> <options> <one_value_keywords> <multi_value_keywords> args...)
#
# CMAKE_PARSE_ARGUMENTS() is intended to be used in macros or functions for
# parsing the arguments given to that macro or function.
# It processes the arguments and defines a set of variables which hold the
# values of the respective options.
#
# The <options> argument contains all options for the respective macro,
# i.e. keywords which can be used when calling the macro without any value
# following, like e.g. the OPTIONAL keyword of the install() command.
#
# The <one_value_keywords> argument contains all keywords for this macro
# which are followed by one value, like e.g. DESTINATION keyword of the
# install() command.
#
# The <multi_value_keywords> argument contains all keywords for this macro
# which can be followed by more than one value, like e.g. the TARGETS or
# FILES keywords of the install() command.
#
# When done, CMAKE_PARSE_ARGUMENTS() will have defined for each of the
# keywords listed in <options>, <one_value_keywords> and
# <multi_value_keywords> a variable composed of the given <prefix>
# followed by "_" and the name of the respective keyword.
# These variables will then hold the respective value from the argument list.
# For the <options> keywords this will be TRUE or FALSE.
#
# All remaining arguments are collected in a variable
# <prefix>_UNPARSED_ARGUMENTS, this can be checked afterwards to see whether
# your macro was called with unrecognized parameters.
#
# As an example here a my_install() macro, which takes similar arguments as the
# real install() command:
#
# function(MY_INSTALL)
# set(options OPTIONAL FAST)
# set(oneValueArgs DESTINATION RENAME)
# set(multiValueArgs TARGETS CONFIGURATIONS)
# cmake_parse_arguments(MY_INSTALL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )
# ...
#
# Assume my_install() has been called like this:
# my_install(TARGETS foo bar DESTINATION bin OPTIONAL blub)
#
# After the cmake_parse_arguments() call the macro will have set the following
# variables:
# MY_INSTALL_OPTIONAL = TRUE
# MY_INSTALL_FAST = FALSE (this option was not used when calling my_install()
# MY_INSTALL_DESTINATION = "bin"
# MY_INSTALL_RENAME = "" (was not used)
# MY_INSTALL_TARGETS = "foo;bar"
# MY_INSTALL_CONFIGURATIONS = "" (was not used)
# MY_INSTALL_UNPARSED_ARGUMENTS = "blub" (no value expected after "OPTIONAL"
#
# You can the continue and process these variables.
#
# Keywords terminate lists of values, e.g. if directly after a one_value_keyword
# another recognized keyword follows, this is interpreted as the beginning of
# the new option.
# E.g. my_install(TARGETS foo DESTINATION OPTIONAL) would result in
# MY_INSTALL_DESTINATION set to "OPTIONAL", but MY_INSTALL_DESTINATION would
# be empty and MY_INSTALL_OPTIONAL would be set to TRUE therefor.
#=============================================================================
# Copyright 2010 Alexander Neundorf <neundorf@kde.org>
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
if(__CMAKE_PARSE_ARGUMENTS_INCLUDED)
return()
endif()
set(__CMAKE_PARSE_ARGUMENTS_INCLUDED TRUE)
function(CMAKE_PARSE_ARGUMENTS prefix _optionNames _singleArgNames _multiArgNames)
# first set all result variables to empty/FALSE
foreach(arg_name ${_singleArgNames} ${_multiArgNames})
set(${prefix}_${arg_name})
endforeach(arg_name)
foreach(option ${_optionNames})
set(${prefix}_${option} FALSE)
endforeach(option)
set(${prefix}_UNPARSED_ARGUMENTS)
set(insideValues FALSE)
set(currentArgName)
# now iterate over all arguments and fill the result variables
foreach(currentArg ${ARGN})
list(FIND _optionNames "${currentArg}" optionIndex) # ... then this marks the end of the arguments belonging to this keyword
list(FIND _singleArgNames "${currentArg}" singleArgIndex) # ... then this marks the end of the arguments belonging to this keyword
list(FIND _multiArgNames "${currentArg}" multiArgIndex) # ... then this marks the end of the arguments belonging to this keyword
if(${optionIndex} EQUAL -1 AND ${singleArgIndex} EQUAL -1 AND ${multiArgIndex} EQUAL -1)
if(insideValues)
if("${insideValues}" STREQUAL "SINGLE")
set(${prefix}_${currentArgName} ${currentArg})
set(insideValues FALSE)
elseif("${insideValues}" STREQUAL "MULTI")
list(APPEND ${prefix}_${currentArgName} ${currentArg})
endif()
else(insideValues)
list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg})
endif(insideValues)
else()
if(NOT ${optionIndex} EQUAL -1)
set(${prefix}_${currentArg} TRUE)
set(insideValues FALSE)
elseif(NOT ${singleArgIndex} EQUAL -1)
set(currentArgName ${currentArg})
set(${prefix}_${currentArgName})
set(insideValues "SINGLE")
elseif(NOT ${multiArgIndex} EQUAL -1)
set(currentArgName ${currentArg})
set(${prefix}_${currentArgName})
set(insideValues "MULTI")
endif()
endif()
endforeach(currentArg)
# propagate the result variables to the caller:
foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames})
set(${prefix}_${arg_name} ${${prefix}_${arg_name}} PARENT_SCOPE)
endforeach(arg_name)
set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} PARENT_SCOPE)
endfunction(CMAKE_PARSE_ARGUMENTS _options _singleArgs _multiArgs)

View File

@ -0,0 +1,36 @@
FIND_PACKAGE(PkgConfig)
PKG_CHECK_MODULES(PC_ORC "orc-0.4 > 0.4.11")
FIND_PROGRAM(ORCC_EXECUTABLE orcc
HINTS ${PC_ORC_TOOLSDIR}
PATHS ${ORC_ROOT}/bin ${CMAKE_INSTALL_PREFIX}/bin)
FIND_PATH(ORC_INCLUDE_DIR NAMES orc/orc.h
HINTS ${PC_ORC_INCLUDEDIR}
PATHS ${ORC_ROOT}/include/orc-0.4 ${CMAKE_INSTALL_PREFIX}/include/orc-0.4)
FIND_PATH(ORC_LIBRARY_DIR NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}orc-0.4${CMAKE_SHARED_LIBRARY_SUFFIX}
HINTS ${PC_ORC_LIBDIR}
PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX})
FIND_LIBRARY(ORC_LIB orc-0.4
HINTS ${PC_ORC_LIBRARY_DIRS}
PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX})
LIST(APPEND ORC_LIBRARY
${ORC_LIB}
)
SET(ORC_INCLUDE_DIRS ${ORC_INCLUDE_DIR})
SET(ORC_LIBRARIES ${ORC_LIBRARY})
SET(ORC_LIBRARY_DIRS ${ORC_LIBRARY_DIR})
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(ORC "orc files" ORC_LIBRARY ORC_INCLUDE_DIR ORCC_EXECUTABLE)
mark_as_advanced(ORC_INCLUDE_DIR ORC_LIBRARY ORCC_EXECUTABLE)

View File

@ -0,0 +1,234 @@
# Copyright 2010-2011,2013 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
if(DEFINED __INCLUDED_VOLK_PYTHON_CMAKE)
return()
endif()
set(__INCLUDED_VOLK_PYTHON_CMAKE TRUE)
########################################################################
# Setup the python interpreter:
# This allows the user to specify a specific interpreter,
# or finds the interpreter via the built-in cmake module.
########################################################################
#this allows the user to override PYTHON_EXECUTABLE
if(PYTHON_EXECUTABLE)
set(PYTHONINTERP_FOUND TRUE)
#otherwise if not set, try to automatically find it
else(PYTHON_EXECUTABLE)
#use the built-in find script
find_package(PythonInterp 2)
#and if that fails use the find program routine
if(NOT PYTHONINTERP_FOUND)
find_program(PYTHON_EXECUTABLE NAMES python python2 python2.7 python2.6 python2.5)
if(PYTHON_EXECUTABLE)
set(PYTHONINTERP_FOUND TRUE)
endif(PYTHON_EXECUTABLE)
endif(NOT PYTHONINTERP_FOUND)
endif(PYTHON_EXECUTABLE)
#make the path to the executable appear in the cmake gui
set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} CACHE FILEPATH "python interpreter")
#make sure we can use -B with python (introduced in 2.6)
if(PYTHON_EXECUTABLE)
execute_process(
COMMAND ${PYTHON_EXECUTABLE} -B -c ""
OUTPUT_QUIET ERROR_QUIET
RESULT_VARIABLE PYTHON_HAS_DASH_B_RESULT
)
if(PYTHON_HAS_DASH_B_RESULT EQUAL 0)
set(PYTHON_DASH_B "-B")
endif()
endif(PYTHON_EXECUTABLE)
########################################################################
# Check for the existence of a python module:
# - desc a string description of the check
# - mod the name of the module to import
# - cmd an additional command to run
# - have the result variable to set
########################################################################
macro(VOLK_PYTHON_CHECK_MODULE desc mod cmd have)
message(STATUS "")
message(STATUS "Python checking for ${desc}")
execute_process(
COMMAND ${PYTHON_EXECUTABLE} -c "
#########################################
try: import ${mod}
except:
try: ${mod}
except: exit(-1)
try: assert ${cmd}
except: exit(-1)
#########################################"
RESULT_VARIABLE ${have}
)
if(${have} EQUAL 0)
message(STATUS "Python checking for ${desc} - found")
set(${have} TRUE)
else(${have} EQUAL 0)
message(STATUS "Python checking for ${desc} - not found")
set(${have} FALSE)
endif(${have} EQUAL 0)
endmacro(VOLK_PYTHON_CHECK_MODULE)
########################################################################
# Sets the python installation directory VOLK_PYTHON_DIR
########################################################################
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "
from distutils import sysconfig
print sysconfig.get_python_lib(plat_specific=True, prefix='')
" OUTPUT_VARIABLE VOLK_PYTHON_DIR OUTPUT_STRIP_TRAILING_WHITESPACE
)
file(TO_CMAKE_PATH ${VOLK_PYTHON_DIR} VOLK_PYTHON_DIR)
########################################################################
# Create an always-built target with a unique name
# Usage: VOLK_UNIQUE_TARGET(<description> <dependencies list>)
########################################################################
function(VOLK_UNIQUE_TARGET desc)
file(RELATIVE_PATH reldir ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import re, hashlib
unique = hashlib.md5('${reldir}${ARGN}').hexdigest()[:5]
print(re.sub('\\W', '_', '${desc} ${reldir} ' + unique))"
OUTPUT_VARIABLE _target OUTPUT_STRIP_TRAILING_WHITESPACE)
add_custom_target(${_target} ALL DEPENDS ${ARGN})
endfunction(VOLK_UNIQUE_TARGET)
########################################################################
# Install python sources (also builds and installs byte-compiled python)
########################################################################
function(VOLK_PYTHON_INSTALL)
include(CMakeParseArgumentsCopy)
CMAKE_PARSE_ARGUMENTS(VOLK_PYTHON_INSTALL "" "DESTINATION;COMPONENT" "FILES;PROGRAMS" ${ARGN})
####################################################################
if(VOLK_PYTHON_INSTALL_FILES)
####################################################################
install(${ARGN}) #installs regular python files
#create a list of all generated files
unset(pysrcfiles)
unset(pycfiles)
unset(pyofiles)
foreach(pyfile ${VOLK_PYTHON_INSTALL_FILES})
get_filename_component(pyfile ${pyfile} ABSOLUTE)
list(APPEND pysrcfiles ${pyfile})
#determine if this file is in the source or binary directory
file(RELATIVE_PATH source_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${pyfile})
string(LENGTH "${source_rel_path}" source_rel_path_len)
file(RELATIVE_PATH binary_rel_path ${CMAKE_CURRENT_BINARY_DIR} ${pyfile})
string(LENGTH "${binary_rel_path}" binary_rel_path_len)
#and set the generated path appropriately
if(${source_rel_path_len} GREATER ${binary_rel_path_len})
set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${binary_rel_path})
else()
set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${source_rel_path})
endif()
list(APPEND pycfiles ${pygenfile}c)
list(APPEND pyofiles ${pygenfile}o)
#ensure generation path exists
get_filename_component(pygen_path ${pygenfile} PATH)
file(MAKE_DIRECTORY ${pygen_path})
endforeach(pyfile)
#the command to generate the pyc files
add_custom_command(
DEPENDS ${pysrcfiles} OUTPUT ${pycfiles}
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pycfiles}
)
#the command to generate the pyo files
add_custom_command(
DEPENDS ${pysrcfiles} OUTPUT ${pyofiles}
COMMAND ${PYTHON_EXECUTABLE} -O ${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pyofiles}
)
#create install rule and add generated files to target list
set(python_install_gen_targets ${pycfiles} ${pyofiles})
install(FILES ${python_install_gen_targets}
DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION}
COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT}
)
####################################################################
elseif(VOLK_PYTHON_INSTALL_PROGRAMS)
####################################################################
file(TO_NATIVE_PATH ${PYTHON_EXECUTABLE} pyexe_native)
if (CMAKE_CROSSCOMPILING)
set(pyexe_native "/usr/bin/env python")
endif()
foreach(pyfile ${VOLK_PYTHON_INSTALL_PROGRAMS})
get_filename_component(pyfile_name ${pyfile} NAME)
get_filename_component(pyfile ${pyfile} ABSOLUTE)
string(REPLACE "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" pyexefile "${pyfile}.exe")
list(APPEND python_install_gen_targets ${pyexefile})
get_filename_component(pyexefile_path ${pyexefile} PATH)
file(MAKE_DIRECTORY ${pyexefile_path})
add_custom_command(
OUTPUT ${pyexefile} DEPENDS ${pyfile}
COMMAND ${PYTHON_EXECUTABLE} -c
"open('${pyexefile}','w').write('\#!${pyexe_native}\\n'+open('${pyfile}').read())"
COMMENT "Shebangin ${pyfile_name}"
VERBATIM
)
#on windows, python files need an extension to execute
get_filename_component(pyfile_ext ${pyfile} EXT)
if(WIN32 AND NOT pyfile_ext)
set(pyfile_name "${pyfile_name}.py")
endif()
install(PROGRAMS ${pyexefile} RENAME ${pyfile_name}
DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION}
COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT}
)
endforeach(pyfile)
endif()
VOLK_UNIQUE_TARGET("pygen" ${python_install_gen_targets})
endfunction(VOLK_PYTHON_INSTALL)
########################################################################
# Write the python helper script that generates byte code files
########################################################################
file(WRITE ${CMAKE_BINARY_DIR}/python_compile_helper.py "
import sys, py_compile
files = sys.argv[1:]
srcs, gens = files[:len(files)/2], files[len(files)/2:]
for src, gen in zip(srcs, gens):
py_compile.compile(file=src, cfile=gen, doraise=True)
")

View File

@ -0,0 +1,98 @@
# Copyright 2010-2011 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
if(DEFINED __INCLUDED_VOLK_BOOST_CMAKE)
return()
endif()
set(__INCLUDED_VOLK_BOOST_CMAKE TRUE)
########################################################################
# Setup Boost and handle some system specific things
########################################################################
set(BOOST_REQUIRED_COMPONENTS
filesystem
system
unit_test_framework
program_options
)
if(UNIX AND NOT BOOST_ROOT AND EXISTS "/usr/lib64")
list(APPEND BOOST_LIBRARYDIR "/usr/lib64") #fedora 64-bit fix
endif(UNIX AND NOT BOOST_ROOT AND EXISTS "/usr/lib64")
if(MSVC)
set(BOOST_REQUIRED_COMPONENTS ${BOOST_REQUIRED_COMPONENTS} chrono)
if (NOT DEFINED BOOST_ALL_DYN_LINK)
set(BOOST_ALL_DYN_LINK TRUE)
endif()
set(BOOST_ALL_DYN_LINK "${BOOST_ALL_DYN_LINK}" CACHE BOOL "boost enable dynamic linking")
if(BOOST_ALL_DYN_LINK)
add_definitions(-DBOOST_ALL_DYN_LINK) #setup boost auto-linking in msvc
else(BOOST_ALL_DYN_LINK)
unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link
endif(BOOST_ALL_DYN_LINK)
endif(MSVC)
find_package(Boost "1.35" COMPONENTS ${BOOST_REQUIRED_COMPONENTS})
# This does not allow us to disable specific versions. It is used
# internally by cmake to know the formation newer versions. As newer
# Boost version beyond what is shown here are produced, we must extend
# this list. To disable Boost versions, see below.
set(Boost_ADDITIONAL_VERSIONS
"1.35.0" "1.35" "1.36.0" "1.36" "1.37.0" "1.37" "1.38.0" "1.38" "1.39.0" "1.39"
"1.40.0" "1.40" "1.41.0" "1.41" "1.42.0" "1.42" "1.43.0" "1.43" "1.44.0" "1.44"
"1.45.0" "1.45" "1.46.0" "1.46" "1.47.0" "1.47" "1.48.0" "1.48" "1.49.0" "1.49"
"1.50.0" "1.50" "1.51.0" "1.51" "1.52.0" "1.52" "1.53.0" "1.53" "1.54.0" "1.54"
"1.55.0" "1.55" "1.56.0" "1.56" "1.57.0" "1.57" "1.58.0" "1.58" "1.59.0" "1.59"
"1.60.0" "1.60" "1.61.0" "1.61" "1.62.0" "1.62" "1.63.0" "1.63" "1.64.0" "1.64"
"1.65.0" "1.65" "1.66.0" "1.66" "1.67.0" "1.67" "1.68.0" "1.68" "1.69.0" "1.69"
)
# Boost 1.52 disabled, see https://svn.boost.org/trac/boost/ticket/7669
# Similar problems with Boost 1.46 and 1.47.
OPTION(ENABLE_BAD_BOOST "Enable known bad versions of Boost" OFF)
if(ENABLE_BAD_BOOST)
MESSAGE(STATUS "Enabling use of known bad versions of Boost.")
endif(ENABLE_BAD_BOOST)
# For any unsuitable Boost version, add the version number below in
# the following format: XXYYZZ
# Where:
# XX is the major version ('10' for version 1)
# YY is the minor version number ('46' for 1.46)
# ZZ is the patcher version number (typically just '00')
set(Boost_NOGO_VERSIONS
104600 104601 104700 105200
)
foreach(ver ${Boost_NOGO_VERSIONS})
if(${Boost_VERSION} EQUAL ${ver})
if(NOT ENABLE_BAD_BOOST)
MESSAGE(STATUS "WARNING: Found a known bad version of Boost (v${Boost_VERSION}). Disabling.")
set(Boost_FOUND FALSE)
else(NOT ENABLE_BAD_BOOST)
MESSAGE(STATUS "WARNING: Found a known bad version of Boost (v${Boost_VERSION}). Continuing anyway.")
set(Boost_FOUND TRUE)
endif(NOT ENABLE_BAD_BOOST)
endif(${Boost_VERSION} EQUAL ${ver})
endforeach(ver)

View File

@ -0,0 +1,26 @@
INCLUDE(FindPkgConfig)
PKG_CHECK_MODULES(PC_VOLK volk_gnsssdr)
FIND_PATH(
VOLK_INCLUDE_DIRS
NAMES volk_gnsssdr/volk_gnsssdr.h
HINTS $ENV{VOLK_DIR}/include
${PC_VOLK_INCLUDEDIR}
PATHS /usr/local/include
/usr/include
)
FIND_LIBRARY(
VOLK_LIBRARIES
NAMES volk_gnsssdr
HINTS $ENV{VOLK_DIR}/lib
${PC_VOLK_LIBDIR}
PATHS /usr/local/lib
/usr/local/lib64
/usr/lib
/usr/lib64
)
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS)

View File

@ -0,0 +1,58 @@
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_CONFIG_H_ // [
#define _MSC_CONFIG_H_
////////////////////////////////////////////////////////////////////////
// enable inline functions for C code
////////////////////////////////////////////////////////////////////////
#ifndef __cplusplus
# define inline __inline
#endif
////////////////////////////////////////////////////////////////////////
// signed size_t
////////////////////////////////////////////////////////////////////////
#include <stddef.h>
typedef ptrdiff_t ssize_t;
////////////////////////////////////////////////////////////////////////
// rint functions
////////////////////////////////////////////////////////////////////////
#include <math.h>
static inline long lrint(double x){return (long)(x > 0.0 ? x + 0.5 : x - 0.5);}
static inline long lrintf(float x){return (long)(x > 0.0f ? x + 0.5f : x - 0.5f);}
static inline long long llrint(double x){return (long long)(x > 0.0 ? x + 0.5 : x - 0.5);}
static inline long long llrintf(float x){return (long long)(x > 0.0f ? x + 0.5f : x - 0.5f);}
static inline double rint(double x){return (x > 0.0)? floor(x + 0.5) : ceil(x - 0.5);}
static inline float rintf(float x){return (x > 0.0f)? floorf(x + 0.5f) : ceilf(x - 0.5f);}
////////////////////////////////////////////////////////////////////////
// math constants
////////////////////////////////////////////////////////////////////////
#define INFINITY HUGE_VAL
# define M_E 2.7182818284590452354 /* e */
# define M_LOG2E 1.4426950408889634074 /* log_2 e */
# define M_LOG10E 0.43429448190325182765 /* log_10 e */
# define M_LN2 0.69314718055994530942 /* log_e 2 */
# define M_LN10 2.30258509299404568402 /* log_e 10 */
# define M_PI 3.14159265358979323846 /* pi */
# define M_PI_2 1.57079632679489661923 /* pi/2 */
# define M_PI_4 0.78539816339744830962 /* pi/4 */
# define M_1_PI 0.31830988618379067154 /* 1/pi */
# define M_2_PI 0.63661977236758134308 /* 2/pi */
# define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
# define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
# define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
////////////////////////////////////////////////////////////////////////
// random and srandom
////////////////////////////////////////////////////////////////////////
#include <stdlib.h>
static inline long int random (void) { return rand(); }
static inline void srandom (unsigned int seed) { srand(seed); }
#endif // _MSC_CONFIG_H_ ]

View File

@ -0,0 +1,301 @@
// ISO C9x compliant inttypes.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
//
// Copyright (c) 2006 Alexander Chemeris
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. The name of the author may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_INTTYPES_H_ // [
#define _MSC_INTTYPES_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include <stdint.h>
// 7.8 Format conversion of integer types
typedef struct {
intmax_t quot;
intmax_t rem;
} imaxdiv_t;
// 7.8.1 Macros for format specifiers
// The fprintf macros for signed integers are:
#define PRId8 "d"
#define PRIi8 "i"
#define PRIdLEAST8 "d"
#define PRIiLEAST8 "i"
#define PRIdFAST8 "d"
#define PRIiFAST8 "i"
#define PRId16 "hd"
#define PRIi16 "hi"
#define PRIdLEAST16 "hd"
#define PRIiLEAST16 "hi"
#define PRIdFAST16 "hd"
#define PRIiFAST16 "hi"
#define PRId32 "I32d"
#define PRIi32 "I32i"
#define PRIdLEAST32 "I32d"
#define PRIiLEAST32 "I32i"
#define PRIdFAST32 "I32d"
#define PRIiFAST32 "I32i"
#define PRId64 "I64d"
#define PRIi64 "I64i"
#define PRIdLEAST64 "I64d"
#define PRIiLEAST64 "I64i"
#define PRIdFAST64 "I64d"
#define PRIiFAST64 "I64i"
#define PRIdMAX "I64d"
#define PRIiMAX "I64i"
#define PRIdPTR "Id"
#define PRIiPTR "Ii"
// The fprintf macros for unsigned integers are:
#define PRIo8 "o"
#define PRIu8 "u"
#define PRIx8 "x"
#define PRIX8 "X"
#define PRIoLEAST8 "o"
#define PRIuLEAST8 "u"
#define PRIxLEAST8 "x"
#define PRIXLEAST8 "X"
#define PRIoFAST8 "o"
#define PRIuFAST8 "u"
#define PRIxFAST8 "x"
#define PRIXFAST8 "X"
#define PRIo16 "ho"
#define PRIu16 "hu"
#define PRIx16 "hx"
#define PRIX16 "hX"
#define PRIoLEAST16 "ho"
#define PRIuLEAST16 "hu"
#define PRIxLEAST16 "hx"
#define PRIXLEAST16 "hX"
#define PRIoFAST16 "ho"
#define PRIuFAST16 "hu"
#define PRIxFAST16 "hx"
#define PRIXFAST16 "hX"
#define PRIo32 "I32o"
#define PRIu32 "I32u"
#define PRIx32 "I32x"
#define PRIX32 "I32X"
#define PRIoLEAST32 "I32o"
#define PRIuLEAST32 "I32u"
#define PRIxLEAST32 "I32x"
#define PRIXLEAST32 "I32X"
#define PRIoFAST32 "I32o"
#define PRIuFAST32 "I32u"
#define PRIxFAST32 "I32x"
#define PRIXFAST32 "I32X"
#define PRIo64 "I64o"
#define PRIu64 "I64u"
#define PRIx64 "I64x"
#define PRIX64 "I64X"
#define PRIoLEAST64 "I64o"
#define PRIuLEAST64 "I64u"
#define PRIxLEAST64 "I64x"
#define PRIXLEAST64 "I64X"
#define PRIoFAST64 "I64o"
#define PRIuFAST64 "I64u"
#define PRIxFAST64 "I64x"
#define PRIXFAST64 "I64X"
#define PRIoMAX "I64o"
#define PRIuMAX "I64u"
#define PRIxMAX "I64x"
#define PRIXMAX "I64X"
#define PRIoPTR "Io"
#define PRIuPTR "Iu"
#define PRIxPTR "Ix"
#define PRIXPTR "IX"
// The fscanf macros for signed integers are:
#define SCNd8 "d"
#define SCNi8 "i"
#define SCNdLEAST8 "d"
#define SCNiLEAST8 "i"
#define SCNdFAST8 "d"
#define SCNiFAST8 "i"
#define SCNd16 "hd"
#define SCNi16 "hi"
#define SCNdLEAST16 "hd"
#define SCNiLEAST16 "hi"
#define SCNdFAST16 "hd"
#define SCNiFAST16 "hi"
#define SCNd32 "ld"
#define SCNi32 "li"
#define SCNdLEAST32 "ld"
#define SCNiLEAST32 "li"
#define SCNdFAST32 "ld"
#define SCNiFAST32 "li"
#define SCNd64 "I64d"
#define SCNi64 "I64i"
#define SCNdLEAST64 "I64d"
#define SCNiLEAST64 "I64i"
#define SCNdFAST64 "I64d"
#define SCNiFAST64 "I64i"
#define SCNdMAX "I64d"
#define SCNiMAX "I64i"
#ifdef _WIN64 // [
# define SCNdPTR "I64d"
# define SCNiPTR "I64i"
#else // _WIN64 ][
# define SCNdPTR "ld"
# define SCNiPTR "li"
#endif // _WIN64 ]
// The fscanf macros for unsigned integers are:
#define SCNo8 "o"
#define SCNu8 "u"
#define SCNx8 "x"
#define SCNX8 "X"
#define SCNoLEAST8 "o"
#define SCNuLEAST8 "u"
#define SCNxLEAST8 "x"
#define SCNXLEAST8 "X"
#define SCNoFAST8 "o"
#define SCNuFAST8 "u"
#define SCNxFAST8 "x"
#define SCNXFAST8 "X"
#define SCNo16 "ho"
#define SCNu16 "hu"
#define SCNx16 "hx"
#define SCNX16 "hX"
#define SCNoLEAST16 "ho"
#define SCNuLEAST16 "hu"
#define SCNxLEAST16 "hx"
#define SCNXLEAST16 "hX"
#define SCNoFAST16 "ho"
#define SCNuFAST16 "hu"
#define SCNxFAST16 "hx"
#define SCNXFAST16 "hX"
#define SCNo32 "lo"
#define SCNu32 "lu"
#define SCNx32 "lx"
#define SCNX32 "lX"
#define SCNoLEAST32 "lo"
#define SCNuLEAST32 "lu"
#define SCNxLEAST32 "lx"
#define SCNXLEAST32 "lX"
#define SCNoFAST32 "lo"
#define SCNuFAST32 "lu"
#define SCNxFAST32 "lx"
#define SCNXFAST32 "lX"
#define SCNo64 "I64o"
#define SCNu64 "I64u"
#define SCNx64 "I64x"
#define SCNX64 "I64X"
#define SCNoLEAST64 "I64o"
#define SCNuLEAST64 "I64u"
#define SCNxLEAST64 "I64x"
#define SCNXLEAST64 "I64X"
#define SCNoFAST64 "I64o"
#define SCNuFAST64 "I64u"
#define SCNxFAST64 "I64x"
#define SCNXFAST64 "I64X"
#define SCNoMAX "I64o"
#define SCNuMAX "I64u"
#define SCNxMAX "I64x"
#define SCNXMAX "I64X"
#ifdef _WIN64 // [
# define SCNoPTR "I64o"
# define SCNuPTR "I64u"
# define SCNxPTR "I64x"
# define SCNXPTR "I64X"
#else // _WIN64 ][
# define SCNoPTR "lo"
# define SCNuPTR "lu"
# define SCNxPTR "lx"
# define SCNXPTR "lX"
#endif // _WIN64 ]
// 7.8.2 Functions for greatest-width integer types
// 7.8.2.1 The imaxabs function
#define imaxabs _abs64
// 7.8.2.2 The imaxdiv function
// This is modified version of div() function from Microsoft's div.c found
// in %MSVC.NET%\crt\src\div.c
#ifdef STATIC_IMAXDIV // [
static
#else // STATIC_IMAXDIV ][
_inline
#endif // STATIC_IMAXDIV ]
imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
{
imaxdiv_t result;
result.quot = numer / denom;
result.rem = numer % denom;
if (numer < 0 && result.rem > 0) {
// did division wrong; must fix up
++result.quot;
result.rem -= denom;
}
return result;
}
// 7.8.2.3 The strtoimax and strtoumax functions
#define strtoimax _strtoi64
#define strtoumax _strtoui64
// 7.8.2.4 The wcstoimax and wcstoumax functions
#define wcstoimax _wcstoi64
#define wcstoumax _wcstoui64
#endif // _MSC_INTTYPES_H_ ]

View File

@ -0,0 +1,45 @@
/*
* Copyright (C) 2005, 2006 Apple Computer, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef STDBOOL_WIN32_H
#define STDBOOL_WIN32_H
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef __cplusplus
typedef unsigned char bool;
#define true 1
#define false 0
#ifndef CASSERT
#define CASSERT(exp, name) typedef int dummy##name [(exp) ? 1 : -1];
#endif
CASSERT(sizeof(bool) == 1, bool_is_one_byte)
CASSERT(true, true_is_true)
CASSERT(!false, false_is_false)
#endif
#endif

View File

@ -0,0 +1,251 @@
// ISO C9x compliant stdint.h for Microsoft Visual Studio
// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
//
// Copyright (c) 2006-2008 Alexander Chemeris
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. The name of the author may be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_STDINT_H_ // [
#define _MSC_STDINT_H_
#if _MSC_VER > 1000
#pragma once
#endif
#include <limits.h>
// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
// or compiler give many errors like this:
// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
#ifdef __cplusplus
extern "C" {
#endif
# include <wchar.h>
#ifdef __cplusplus
}
#endif
// Define _W64 macros to mark types changing their size, like intptr_t.
#ifndef _W64
# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
# define _W64 __w64
# else
# define _W64
# endif
#endif
// 7.18.1 Integer types
// 7.18.1.1 Exact-width integer types
// Visual Studio 6 and Embedded Visual C++ 4 doesn't
// realize that, e.g. char has the same size as __int8
// so we give up on __intX for them.
#if (_MSC_VER < 1300)
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
#else
typedef signed __int8 int8_t;
typedef signed __int16 int16_t;
typedef signed __int32 int32_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
#endif
typedef signed __int64 int64_t;
typedef unsigned __int64 uint64_t;
// 7.18.1.2 Minimum-width integer types
typedef int8_t int_least8_t;
typedef int16_t int_least16_t;
typedef int32_t int_least32_t;
typedef int64_t int_least64_t;
typedef uint8_t uint_least8_t;
typedef uint16_t uint_least16_t;
typedef uint32_t uint_least32_t;
typedef uint64_t uint_least64_t;
// 7.18.1.3 Fastest minimum-width integer types
typedef int8_t int_fast8_t;
typedef int16_t int_fast16_t;
typedef int32_t int_fast32_t;
typedef int64_t int_fast64_t;
typedef uint8_t uint_fast8_t;
typedef uint16_t uint_fast16_t;
typedef uint32_t uint_fast32_t;
typedef uint64_t uint_fast64_t;
// 7.18.1.4 Integer types capable of holding object pointers
#ifdef _WIN64 // [
typedef signed __int64 intptr_t;
typedef unsigned __int64 uintptr_t;
#else // _WIN64 ][
typedef _W64 signed int intptr_t;
typedef _W64 unsigned int uintptr_t;
#endif // _WIN64 ]
// 7.18.1.5 Greatest-width integer types
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
// 7.18.2 Limits of specified-width integer types
#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
// 7.18.2.1 Limits of exact-width integer types
#define INT8_MIN ((int8_t)_I8_MIN)
#define INT8_MAX _I8_MAX
#define INT16_MIN ((int16_t)_I16_MIN)
#define INT16_MAX _I16_MAX
#define INT32_MIN ((int32_t)_I32_MIN)
#define INT32_MAX _I32_MAX
#define INT64_MIN ((int64_t)_I64_MIN)
#define INT64_MAX _I64_MAX
#define UINT8_MAX _UI8_MAX
#define UINT16_MAX _UI16_MAX
#define UINT32_MAX _UI32_MAX
#define UINT64_MAX _UI64_MAX
// 7.18.2.2 Limits of minimum-width integer types
#define INT_LEAST8_MIN INT8_MIN
#define INT_LEAST8_MAX INT8_MAX
#define INT_LEAST16_MIN INT16_MIN
#define INT_LEAST16_MAX INT16_MAX
#define INT_LEAST32_MIN INT32_MIN
#define INT_LEAST32_MAX INT32_MAX
#define INT_LEAST64_MIN INT64_MIN
#define INT_LEAST64_MAX INT64_MAX
#define UINT_LEAST8_MAX UINT8_MAX
#define UINT_LEAST16_MAX UINT16_MAX
#define UINT_LEAST32_MAX UINT32_MAX
#define UINT_LEAST64_MAX UINT64_MAX
// 7.18.2.3 Limits of fastest minimum-width integer types
#define INT_FAST8_MIN INT8_MIN
#define INT_FAST8_MAX INT8_MAX
#define INT_FAST16_MIN INT16_MIN
#define INT_FAST16_MAX INT16_MAX
#define INT_FAST32_MIN INT32_MIN
#define INT_FAST32_MAX INT32_MAX
#define INT_FAST64_MIN INT64_MIN
#define INT_FAST64_MAX INT64_MAX
#define UINT_FAST8_MAX UINT8_MAX
#define UINT_FAST16_MAX UINT16_MAX
#define UINT_FAST32_MAX UINT32_MAX
#define UINT_FAST64_MAX UINT64_MAX
// 7.18.2.4 Limits of integer types capable of holding object pointers
#ifdef _WIN64 // [
# define INTPTR_MIN INT64_MIN
# define INTPTR_MAX INT64_MAX
# define UINTPTR_MAX UINT64_MAX
#else // _WIN64 ][
# define INTPTR_MIN INT32_MIN
# define INTPTR_MAX INT32_MAX
# define UINTPTR_MAX UINT32_MAX
#endif // _WIN64 ]
// 7.18.2.5 Limits of greatest-width integer types
#define INTMAX_MIN INT64_MIN
#define INTMAX_MAX INT64_MAX
#define UINTMAX_MAX UINT64_MAX
// 7.18.3 Limits of other integer types
#ifdef _WIN64 // [
# define PTRDIFF_MIN _I64_MIN
# define PTRDIFF_MAX _I64_MAX
#else // _WIN64 ][
# define PTRDIFF_MIN _I32_MIN
# define PTRDIFF_MAX _I32_MAX
#endif // _WIN64 ]
#define SIG_ATOMIC_MIN INT_MIN
#define SIG_ATOMIC_MAX INT_MAX
#ifndef SIZE_MAX // [
# ifdef _WIN64 // [
# define SIZE_MAX _UI64_MAX
# else // _WIN64 ][
# define SIZE_MAX _UI32_MAX
# endif // _WIN64 ]
#endif // SIZE_MAX ]
// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
#ifndef WCHAR_MIN // [
# define WCHAR_MIN 0
#endif // WCHAR_MIN ]
#ifndef WCHAR_MAX // [
# define WCHAR_MAX _UI16_MAX
#endif // WCHAR_MAX ]
#define WINT_MIN 0
#define WINT_MAX _UI16_MAX
#endif // __STDC_LIMIT_MACROS ]
// 7.18.4 Limits of other integer types
#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
// 7.18.4.1 Macros for minimum-width integer constants
#define INT8_C(val) val##i8
#define INT16_C(val) val##i16
#define INT32_C(val) val##i32
#define INT64_C(val) val##i64
#define UINT8_C(val) val##ui8
#define UINT16_C(val) val##ui16
#define UINT32_C(val) val##ui32
#define UINT64_C(val) val##ui64
// 7.18.4.2 Macros for greatest-width integer constants
#ifndef INTMAX_C
#define INTMAX_C INT64_C
#endif
#ifndef UINTMAX_C
#define UINTMAX_C UINT64_C
#endif
#endif // __STDC_CONSTANT_MACROS ]
#endif // _MSC_STDINT_H_ ]

View File

@ -0,0 +1,204 @@
<!-- archs appear in order of significance for blind, de-facto version ordering -->
<grammar>
<arch name="generic"> <!-- name is required-->
</arch>
<arch name="altivec">
<flag compiler="gnu">-maltivec</flag>
<alignment>16</alignment>
<check name="has_ppc"></check>
</arch>
<arch name="softfp">
<flag compiler="gnu">-mfloat-abi=softfp</flag>
</arch>
<arch name="hardfp">
<flag compiler="gnu">-mfloat-abi=hard</flag>
</arch>
<arch name="neon">
<flag compiler="gnu">-mfpu=neon</flag>
<flag compiler="gnu">-funsafe-math-optimizations</flag>
<alignment>16</alignment>
<check name="has_neon"></check>
</arch>
<arch name="32">
<flag compiler="gnu">-m32</flag>
</arch>
<arch name="64">
<check name="check_extended_cpuid">
<param>0x80000001</param>
</check>
<check name="cpuid_x86_bit"> <!-- checks to see if a bit is set -->
<param>3</param> <!-- eax, ebx, ecx, [edx] -->
<param>0x80000001</param> <!-- cpuid operation -->
<param>29</param> <!-- bit shift -->
</check>
<flag compiler="gnu">-m64</flag>
<flag compiler="clang">-m64</flag>
</arch>
<arch name="3dnow">
<check name="cpuid_x86_bit">
<param>3</param>
<param>0x80000001</param>
<param>31</param>
</check>
<flag compiler="gnu">-m3dnow</flag>
<flag compiler="clang">-m3dnow</flag>
<alignment>8</alignment>
</arch>
<arch name="abm">
<check name="cpuid_x86_bit">
<param>3</param>
<param>0x80000001</param>
<param>5</param>
</check>
<flag compiler="gnu">-msse4.2</flag>
<flag compiler="clang">-msse4.2</flag>
<alignment>16</alignment>
</arch>
<arch name="popcount">
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>23</param>
</check>
<flag compiler="gnu">-mpopcnt</flag>
<flag compiler="clang">-mpopcnt</flag>
<flag compiler="msvc">/arch:AVX</flag>
</arch>
<arch name="mmx">
<check name="cpuid_x86_bit">
<param>3</param>
<param>0x00000001</param>
<param>23</param>
</check>
<flag compiler="gnu">-mmmx</flag>
<flag compiler="clang">-mmmx</flag>
<flag compiler="msvc">/arch:SSE</flag>
<alignment>8</alignment>
</arch>
<arch name="sse">
<check name="cpuid_x86_bit">
<param>3</param>
<param>0x00000001</param>
<param>25</param>
</check>
<flag compiler="gnu">-msse</flag>
<flag compiler="clang">-msse</flag>
<flag compiler="msvc">/arch:SSE</flag>
<environment>_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);</environment>
<include>xmmintrin.h</include>
<alignment>16</alignment>
</arch>
<arch name="sse2">
<check name="cpuid_x86_bit">
<param>3</param>
<param>0x00000001</param>
<param>26</param>
</check>
<flag compiler="gnu">-msse2</flag>
<flag compiler="clang">-msse2</flag>
<flag compiler="msvc">/arch:SSE2</flag>
<alignment>16</alignment>
</arch>
<arch name="orc">
</arch>
<!-- it's here for overrule stuff. -->
<arch name="norc">
</arch>
<arch name="sse3">
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>0</param>
</check>
<flag compiler="gnu">-msse3</flag>
<flag compiler="clang">-msse3</flag>
<flag compiler="msvc">/arch:AVX</flag>
<environment>_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);</environment>
<include>pmmintrin.h</include>
<alignment>16</alignment>
</arch>
<arch name="ssse3">
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>9</param>
</check>
<flag compiler="gnu">-mssse3</flag>
<flag compiler="clang">-mssse3</flag>
<flag compiler="msvc">/arch:AVX</flag>
<alignment>16</alignment>
</arch>
<arch name="sse4_a">
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x80000001</param>
<param>6</param>
</check>
<flag compiler="gnu">-msse4a</flag>
<flag compiler="clang">-msse4a</flag>
<alignment>16</alignment>
</arch>
<arch name="sse4_1">
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>19</param>
</check>
<flag compiler="gnu">-msse4.1</flag>
<flag compiler="clang">-msse4.1</flag>
<flag compiler="msvc">/arch:AVX</flag>
<alignment>16</alignment>
</arch>
<arch name="sse4_2">
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>20</param>
</check>
<flag compiler="gnu">-msse4.2</flag>
<flag compiler="clang">-msse4.2</flag>
<flag compiler="msvc">/arch:AVX</flag>
<alignment>16</alignment>
</arch>
<arch name="avx">
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>28</param>
</check>
<!-- check to make sure that xgetbv is enabled in OS -->
<check name="cpuid_x86_bit">
<param>2</param>
<param>0x00000001</param>
<param>27</param>
</check>
<!-- check to see that the OS has enabled AVX -->
<check name="get_avx_enabled"></check>
<flag compiler="gnu">-mavx</flag>
<flag compiler="clang">-mavx</flag>
<flag compiler="msvc">/arch:AVX</flag>
<alignment>32</alignment>
</arch>
</grammar>

View File

@ -0,0 +1,55 @@
<grammar>
<machine name="generic">
<archs>generic orc|</archs>
</machine>
<!--
<machine name="mmx">
<archs>generic 32|64 mmx orc|</archs>
</machine>
<machine name="sse">
<archs>generic 32|64| mmx| sse orc|</archs>
</machine>
-->
<machine name="neon">
<archs>generic neon softfp|hardfp orc|</archs>
</machine>
<!-- trailing | bar means generate without either for MSVC -->
<machine name="sse2">
<archs>generic 32|64| mmx| sse sse2 orc|</archs>
</machine>
<machine name="sse3">
<archs>generic 32|64 mmx sse sse2 sse3 orc|</archs>
</machine>
<machine name="ssse3">
<archs>generic 32|64 mmx sse sse2 sse3 ssse3 orc|</archs>
</machine>
<machine name="sse4_a">
<archs>generic 32|64 mmx sse sse2 sse3 sse4_a popcount orc|</archs>
</machine>
<machine name="sse4_1">
<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 orc|</archs>
</machine>
<machine name="sse4_2">
<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount orc|</archs>
</machine>
<!-- trailing | bar means generate without either for MSVC -->
<machine name="avx">
<archs>generic 32|64| mmx| sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount avx orc|</archs>
</machine>
<machine name="altivec">
<archs>generic altivec</archs>
</machine>
</grammar>

View File

@ -0,0 +1,85 @@
#
# Copyright 2012 Free Software Foundation, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
archs = list()
arch_dict = dict()
class arch_class:
def __init__(self, flags, checks, **kwargs):
for key, cast, failval in (
('name', str, None),
('environment', str, None),
('include', str, None),
('alignment', int, 1)
):
try: setattr(self, key, cast(kwargs[key]))
except: setattr(self, key, failval)
self.checks = checks
assert(self.name)
self._flags = flags
def is_supported(self, compiler):
if not self._flags.keys(): return True
return compiler in self._flags.keys()
def get_flags(self, compiler):
try: return self._flags[compiler]
except KeyError: return list()
def __repr__(self): return self.name
def register_arch(**kwargs):
arch = arch_class(**kwargs)
archs.append(arch)
arch_dict[arch.name] = arch
########################################################################
# register the arches
########################################################################
#TODO skip the XML and put it here
from xml.dom import minidom
import os
gendir = os.path.dirname(__file__)
archs_xml = minidom.parse(os.path.join(gendir, 'archs.xml')).getElementsByTagName('arch')
for arch_xml in archs_xml:
kwargs = dict()
for attr in arch_xml.attributes.keys():
kwargs[attr] = arch_xml.attributes[attr].value
for node in arch_xml.childNodes:
try:
name = node.tagName
val = arch_xml.getElementsByTagName(name)[0].firstChild.data
kwargs[name] = val
except: pass
checks = list()
for check_xml in arch_xml.getElementsByTagName("check"):
name = check_xml.attributes["name"].value
params = list()
for param_xml in check_xml.getElementsByTagName("param"):
params.append(param_xml.firstChild.data)
checks.append([name, params])
flags = dict()
for flag_xml in arch_xml.getElementsByTagName("flag"):
name = flag_xml.attributes["compiler"].value
if not flags.has_key(name): flags[name] = list()
flags[name].append(flag_xml.firstChild.data)
#force kwargs keys to be of type str, not unicode for py25
kwargs = dict((str(k), v) for k, v in kwargs.iteritems())
register_arch(flags=flags, checks=checks, **kwargs)
if __name__ == '__main__':
print archs

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
#
# Copyright 2012 Free Software Foundation, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import optparse
import volk_gnsssdr_arch_defs
import volk_gnsssdr_machine_defs
def do_arch_flags_list(compiler):
output = list()
for arch in volk_gnsssdr_arch_defs.archs:
if not arch.is_supported(compiler): continue
fields = [arch.name] + arch.get_flags(compiler)
output.append(','.join(fields))
print ';'.join(output)
def do_machines_list(arch_names):
output = list()
for machine in volk_gnsssdr_machine_defs.machines:
machine_arch_set = set(machine.arch_names)
if set(arch_names).intersection(machine_arch_set) == machine_arch_set:
output.append(machine.name)
print ';'.join(output)
def do_machine_flags_list(compiler, machine_name):
output = list()
machine = volk_gnsssdr_machine_defs.machine_dict[machine_name]
for arch in machine.archs:
output.extend(arch.get_flags(compiler))
print ' '.join(output)
def main():
parser = optparse.OptionParser()
parser.add_option('--mode', type='string')
parser.add_option('--compiler', type='string')
parser.add_option('--archs', type='string')
parser.add_option('--machine', type='string')
(opts, args) = parser.parse_args()
if opts.mode == 'arch_flags': return do_arch_flags_list(opts.compiler.lower())
if opts.mode == 'machines': return do_machines_list(opts.archs.split(';'))
if opts.mode == 'machine_flags': return do_machine_flags_list(opts.compiler.lower(), opts.machine)
if __name__ == '__main__': main()

View File

@ -0,0 +1,209 @@
#
# Copyright 2011-2012 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
#
import os
import re
import sys
import glob
########################################################################
# Strip comments from a c/cpp file.
# Input is code string, output is code string without comments.
# http://stackoverflow.com/questions/241327/python-snippet-to-remove-c-and-c-comments
########################################################################
def comment_remover(text):
def replacer(match):
s = match.group(0)
if s.startswith('/'):
return ""
else:
return s
pattern = re.compile(
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
re.DOTALL | re.MULTILINE
)
return re.sub(pattern, replacer, text)
########################################################################
# Split code into nested sections according to ifdef preprocessor macros
########################################################################
def split_into_nested_ifdef_sections(code):
sections = list()
section = ''
header = 'text'
in_section_depth = 0
for i, line in enumerate(code.splitlines()):
m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line)
line_is = 'normal'
if m:
p0, p1, fcn, stuff = m.groups()
if fcn in ('if', 'ifndef', 'ifdef'): line_is = 'if'
if fcn in ('else', 'elif'): line_is = 'else'
if fcn in ('endif',): line_is = 'end'
if line_is == 'if': in_section_depth += 1
if line_is == 'end': in_section_depth -= 1
if in_section_depth == 1 and line_is == 'if':
sections.append((header, section))
section = ''
header = line
continue
if in_section_depth == 1 and line_is == 'else':
sections.append((header, section))
section = ''
header = line
continue
if in_section_depth == 0 and line_is == 'end':
sections.append((header, section))
section = ''
header = 'text'
continue
section += line + '\n'
sections.append((header, section)) #and pack remainder into sections
sections = [sec for sec in sections if sec[1].strip()] #filter empty sections
#recurse into non-text sections to fill subsections
for i, (header, section) in enumerate(sections):
if header == 'text': continue
sections[i] = (header, split_into_nested_ifdef_sections(section))
return sections
########################################################################
# Recursive print of sections to test code above
########################################################################
def print_sections(sections, indent = ' '):
for header, body in sections:
if header == 'text':
print indent, ('\n'+indent).join(body.splitlines())
continue
print indent.replace(' ', '-') + '>', header
print_sections(body, indent + ' ')
########################################################################
# Flatten a section to just body text
########################################################################
def flatten_section_text(sections):
output = ''
for hdr, bdy in sections:
if hdr != 'text': output += flatten_section_text(bdy)
else: output += bdy
return output
########################################################################
# Extract kernel info from section, represent as an implementation
########################################################################
class impl_class:
def __init__(self, kern_name, header, body):
#extract LV_HAVE_*
self.deps = set(map(str.lower, re.findall('LV_HAVE_(\w+)', header)))
#extract function suffix and args
body = flatten_section_text(body)
try:
fcn_matcher = re.compile('^.*(%s\\w*)\\s*\\((.*)$'%kern_name, re.DOTALL | re.MULTILINE)
body = body.split('{')[0].rsplit(')', 1)[0] #get the part before the open ){ bracket
m = fcn_matcher.match(body)
impl_name, the_rest = m.groups()
self.name = impl_name.replace(kern_name+'_', '')
self.args = list()
fcn_args = the_rest.split(',')
for fcn_arg in fcn_args:
arg_matcher = re.compile('^\s*(.*\\W)\s*(\w+)\s*$', re.DOTALL | re.MULTILINE)
m = arg_matcher.match(fcn_arg)
arg_type, arg_name = m.groups()
self.args.append((arg_type, arg_name))
except Exception as ex:
raise Exception, 'I cant parse the function prototype from: %s in %s\n%s'%(kern_name, body, ex)
assert self.name
self.is_aligned = self.name.startswith('a_')
def __repr__(self):
return self.name
########################################################################
# Get sets of LV_HAVE_* from the code
########################################################################
def extract_lv_haves(code):
haves = list()
for line in code.splitlines():
if not line.strip().startswith('#'): continue
have_set = set(map(str.lower, re.findall('LV_HAVE_(\w+)', line)))
if have_set: haves.append(have_set)
return haves
########################################################################
# Represent a processing kernel, parse from file
########################################################################
class kernel_class:
def __init__(self, kernel_file):
self.name = os.path.splitext(os.path.basename(kernel_file))[0]
self.pname = self.name.replace('volk_gnsssdr_', 'p_')
code = open(kernel_file, 'r').read()
code = comment_remover(code)
sections = split_into_nested_ifdef_sections(code)
self._impls = list()
for header, section in sections:
if 'ifndef' not in header.lower(): continue
for sub_hdr, body in section:
if 'if' not in sub_hdr.lower(): continue
if 'LV_HAVE_' not in sub_hdr: continue
self._impls.append(impl_class(
kern_name=self.name, header=sub_hdr, body=body,
))
assert(self._impls)
self.has_dispatcher = False
for impl in self._impls:
if impl.name == 'dispatcher':
self._impls.remove(impl)
self.has_dispatcher = True
break
self.args = self._impls[0].args
self.arglist_types = ', '.join([a[0] for a in self.args])
self.arglist_full = ', '.join(['%s %s'%a for a in self.args])
self.arglist_names = ', '.join([a[1] for a in self.args])
def get_impls(self, archs):
archs = set(archs)
impls = list()
for impl in self._impls:
if impl.deps.intersection(archs) == impl.deps:
impls.append(impl)
return impls
def __repr__(self):
return self.name
########################################################################
# Extract information from the VOLK kernels
########################################################################
__file__ = os.path.abspath(__file__)
srcdir = os.path.dirname(os.path.dirname(__file__))
kernel_files = glob.glob(os.path.join(srcdir, "kernels", "volk_gnsssdr", "*.h"))
kernels = map(kernel_class, kernel_files)
if __name__ == '__main__':
print kernels

View File

@ -0,0 +1,74 @@
#
# Copyright 2012 Free Software Foundation, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
from volk_gnsssdr_arch_defs import arch_dict
machines = list()
machine_dict = dict()
class machine_class:
def __init__(self, name, archs):
self.name = name
self.archs = list()
self.arch_names = list()
for arch_name in archs:
if not arch_name: continue
arch = arch_dict[arch_name]
self.archs.append(arch)
self.arch_names.append(arch_name)
self.alignment = max(map(lambda a: a.alignment, self.archs))
def __repr__(self): return self.name
def register_machine(name, archs):
for i, arch_name in enumerate(archs):
if '|' in arch_name: #handle special arch names with the '|'
for arch_sub in arch_name.split('|'):
if arch_sub:
register_machine(name+'_'+arch_sub, archs[:i] + [arch_sub] + archs[i+1:])
else:
register_machine(name, archs[:i] + archs[i+1:])
return
machine = machine_class(name=name, archs=archs)
machines.append(machine)
machine_dict[machine.name] = machine
########################################################################
# register the machines
########################################################################
#TODO skip the XML and put it here
from xml.dom import minidom
import os
gendir = os.path.dirname(__file__)
machines_xml = minidom.parse(os.path.join(gendir, 'machines.xml')).getElementsByTagName('machine')
for machine_xml in machines_xml:
kwargs = dict()
for attr in machine_xml.attributes.keys():
kwargs[attr] = machine_xml.attributes[attr].value
for node in machine_xml.childNodes:
try:
name = node.tagName
val = machine_xml.getElementsByTagName(name)[0].firstChild.data
kwargs[name] = val
except: pass
kwargs['archs'] = kwargs['archs'].split()
#force kwargs keys to be of type str, not unicode for py25
kwargs = dict((str(k), v) for k, v in kwargs.iteritems())
register_machine(**kwargs)
if __name__ == '__main__':
print machines

View File

@ -0,0 +1,74 @@
#!/usr/bin/env python
#
# Copyright 2012 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
#
import os
import re
import sys
import optparse
import volk_gnsssdr_arch_defs
import volk_gnsssdr_machine_defs
import volk_gnsssdr_kernel_defs
from Cheetah import Template
def __escape_pre_processor(code):
out = list()
for line in code.splitlines():
m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line)
if m:
p0, p1, fcn, stuff = m.groups()
conly = fcn in ('include', 'define', 'ifdef', 'ifndef', 'endif', 'elif', 'pragma')
both = fcn in ('if', 'else')
istmpl = '$' in stuff
if 'defined' in stuff: istmpl = False
if conly or (both and not istmpl):
line = '%s\\#%s%s%s'%(p0, p1, fcn, stuff)
out.append(line)
return '\n'.join(out)
def __parse_tmpl(_tmpl, **kwargs):
defs = {
'archs': volk_gnsssdr_arch_defs.archs,
'arch_dict': volk_gnsssdr_arch_defs.arch_dict,
'machines': volk_gnsssdr_machine_defs.machines,
'machine_dict': volk_gnsssdr_machine_defs.machine_dict,
'kernels': volk_gnsssdr_kernel_defs.kernels,
}
defs.update(kwargs)
_tmpl = __escape_pre_processor(_tmpl)
_tmpl = """
/* this file was generated by volk_gnsssdr template utils, do not edit! */
""" + _tmpl
return str(Template.Template(_tmpl, defs))
def main():
parser = optparse.OptionParser()
parser.add_option('--input', type='string')
parser.add_option('--output', type='string')
(opts, args) = parser.parse_args()
output = __parse_tmpl(open(opts.input).read(), args=args)
if opts.output: open(opts.output, 'w').write(output)
else: print output
if __name__ == '__main__': main()

View File

@ -0,0 +1,39 @@
/* -*- c++ -*- */
/*
* Copyright 2006,2009,2013 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#ifndef INCLUDED_VOLK_CONSTANTS_H
#define INCLUDED_VOLK_CONSTANTS_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
__VOLK_DECL_BEGIN
VOLK_API char* volk_gnsssdr_prefix();
VOLK_API char* volk_gnsssdr_build_date();
VOLK_API char* volk_gnsssdr_version();
VOLK_API char* volk_gnsssdr_c_compiler();
VOLK_API char* volk_gnsssdr_compiler_flags();
VOLK_API char* volk_gnsssdr_available_machines();
__VOLK_DECL_END
#endif /* INCLUDED_VOLK_CONSTANTS_H */

View File

@ -0,0 +1,96 @@
#ifndef INCLUDED_LIBVOLK_COMMON_H
#define INCLUDED_LIBVOLK_COMMON_H
////////////////////////////////////////////////////////////////////////
// Cross-platform attribute macros
////////////////////////////////////////////////////////////////////////
#if defined __GNUC__
# define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
# define __VOLK_ATTR_UNUSED __attribute__((unused))
# define __VOLK_ATTR_INLINE __attribute__((always_inline))
# define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
# if __GNUC__ >= 4
# define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
# define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
# else
# define __VOLK_ATTR_EXPORT
# define __VOLK_ATTR_IMPORT
# endif
#elif _MSC_VER
# define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
# define __VOLK_ATTR_UNUSED
# define __VOLK_ATTR_INLINE __forceinline
# define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
# define __VOLK_ATTR_EXPORT __declspec(dllexport)
# define __VOLK_ATTR_IMPORT __declspec(dllimport)
#else
# define __VOLK_ATTR_ALIGNED(x)
# define __VOLK_ATTR_UNUSED
# define __VOLK_ATTR_INLINE
# define __VOLK_ATTR_DEPRECATED
# define __VOLK_ATTR_EXPORT
# define __VOLK_ATTR_IMPORT
#endif
////////////////////////////////////////////////////////////////////////
// Ignore annoying warnings in MSVC
////////////////////////////////////////////////////////////////////////
#if defined(_MSC_VER)
# pragma warning(disable: 4244) //'conversion' conversion from 'type1' to 'type2', possible loss of data
# pragma warning(disable: 4305) //'identifier' : truncation from 'type1' to 'type2'
#endif
////////////////////////////////////////////////////////////////////////
// C-linkage declaration macros
// FIXME: due to the usage of complex.h, require gcc for c-linkage
////////////////////////////////////////////////////////////////////////
#if defined(__cplusplus) && (__GNUC__)
# define __VOLK_DECL_BEGIN extern "C" {
# define __VOLK_DECL_END }
#else
# define __VOLK_DECL_BEGIN
# define __VOLK_DECL_END
#endif
////////////////////////////////////////////////////////////////////////
// Define VOLK_API for library symbols
// http://gcc.gnu.org/wiki/Visibility
////////////////////////////////////////////////////////////////////////
#ifdef volk_gnsssdr_EXPORTS
# define VOLK_API __VOLK_ATTR_EXPORT
#else
# define VOLK_API __VOLK_ATTR_IMPORT
#endif
////////////////////////////////////////////////////////////////////////
// The bit128 union used by some
////////////////////////////////////////////////////////////////////////
#include <inttypes.h>
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
#endif
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
#endif
union bit128{
uint16_t i16[8];
uint32_t i[4];
float f[4];
double d[2];
#ifdef LV_HAVE_SSE
__m128 float_vec;
#endif
#ifdef LV_HAVE_SSE2
__m128i int_vec;
__m128d double_vec;
#endif
};
#define bit128_p(x) ((union bit128 *)(x))
#endif /*INCLUDED_LIBVOLK_COMMON_H*/

View File

@ -0,0 +1,86 @@
#ifndef INCLUDE_VOLK_COMPLEX_H
#define INCLUDE_VOLK_COMPLEX_H
/*!
* \brief Provide typedefs and operators for all complex types in C and C++.
*
* The typedefs encompass all signed integer and floating point types.
* Each operator function is intended to work across all data types.
* Under C++, these operators are defined as inline templates.
* Under C, these operators are defined as preprocessor macros.
* The use of macros makes the operators agnostic to the type.
*
* The following operator functions are defined:
* - lv_cmake - make a complex type from components
* - lv_creal - get the real part of the complex number
* - lv_cimag - get the imaginary part of the complex number
* - lv_conj - take the conjugate of the complex number
*/
#ifdef __cplusplus
#include <complex>
#include <stdint.h>
typedef std::complex<int8_t> lv_8sc_t;
typedef std::complex<int16_t> lv_16sc_t;
typedef std::complex<int32_t> lv_32sc_t;
typedef std::complex<int64_t> lv_64sc_t;
typedef std::complex<float> lv_32fc_t;
typedef std::complex<double> lv_64fc_t;
template <typename T> inline std::complex<T> lv_cmake(const T &r, const T &i){
return std::complex<T>(r, i);
}
template <typename T> inline typename T::value_type lv_creal(const T &x){
return x.real();
}
template <typename T> inline typename T::value_type lv_cimag(const T &x){
return x.imag();
}
template <typename T> inline T lv_conj(const T &x){
return std::conj(x);
}
#else /* __cplusplus */
#include <complex.h>
typedef char complex lv_8sc_t;
typedef short complex lv_16sc_t;
typedef long complex lv_32sc_t;
typedef long long complex lv_64sc_t;
typedef float complex lv_32fc_t;
typedef double complex lv_64fc_t;
#define lv_cmake(r, i) ((r) + _Complex_I*(i))
// When GNUC is available, use the complex extensions.
// The extensions always return the correct value type.
// http://gcc.gnu.org/onlinedocs/gcc/Complex.html
#ifdef __GNUC__
#define lv_creal(x) (__real__(x))
#define lv_cimag(x) (__imag__(x))
#define lv_conj(x) (~(x))
// When not available, use the c99 complex function family,
// which always returns double regardless of the input type.
#else /* __GNUC__ */
#define lv_creal(x) (creal(x))
#define lv_cimag(x) (cimag(x))
#define lv_conj(x) (conj(x))
#endif /* __GNUC__ */
#endif /* __cplusplus */
#endif /* INCLUDE_VOLK_COMPLEX_H */

View File

@ -0,0 +1,66 @@
/* -*- c -*- */
/*
* Copyright 2014 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#ifndef INCLUDED_VOLK_MALLOC_H
#define INCLUDED_VOLK_MALLOC_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <stdlib.h>
__VOLK_DECL_BEGIN
/*!
* \brief Allocate \p size bytes of data aligned to \p alignment.
*
* \details
* Because we don't have a standard method to allocate buffers in
* memory that are guaranteed to be on an alignment, VOLK handles this
* itself. The volk_gnsssdr_malloc function behaves like malloc in that it
* returns a pointer to the allocated memory. However, it also takes
* in an alignment specfication, which is usually something like 16 or
* 32 to ensure that the aligned memory is located on a particular
* byte boundary for use with SIMD.
*
* Internally, the volk_gnsssdr_malloc first checks if the compiler is C11
* compliant and uses the new aligned_alloc method. If not, it checks
* if the system is POSIX compliant and uses posix_memalign. If that
* fails, volk_gnsssdr_malloc handles the memory allocation and alignment
* internally.
*
* Because of the ways in which volk_gnsssdr_malloc may allocate memory, it is
* important to always free volk_gnsssdr_malloc pointers using volk_gnsssdr_free.
*
* \param size The number of bytes to allocate.
* \param alignment The byte alignment of the allocated memory.
* \return pointer to aligned memory.
*/
VOLK_API void *volk_gnsssdr_malloc(size_t size, size_t alignment);
/*!
* \brief Free's memory allocated by volk_gnsssdr_malloc.
* \param aptr The aligned pointer allocaed by volk_gnsssdr_malloc.
*/
VOLK_API void volk_gnsssdr_free(void *aptr);
__VOLK_DECL_END
#endif /* INCLUDED_VOLK_MALLOC_H */

View File

@ -0,0 +1,28 @@
#ifndef INCLUDED_VOLK_PREFS_H
#define INCLUDED_VOLK_PREFS_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <stdlib.h>
__VOLK_DECL_BEGIN
typedef struct volk_gnsssdr_arch_pref
{
char name[128]; //name of the kernel
char impl_a[128]; //best aligned impl
char impl_u[128]; //best unaligned impl
} volk_gnsssdr_arch_pref_t;
////////////////////////////////////////////////////////////////////////
// get path to volk_gnsssdr_config profiling info
////////////////////////////////////////////////////////////////////////
VOLK_API void volk_gnsssdr_get_config_path(char *);
////////////////////////////////////////////////////////////////////////
// load prefs into global prefs struct
////////////////////////////////////////////////////////////////////////
VOLK_API size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **);
__VOLK_DECL_END
#endif //INCLUDED_VOLK_PREFS_H

View File

@ -0,0 +1,67 @@
########################################################################
# How to create custom kernel dispatchers
########################################################################
A kernel dispatcher is kernel implementation that calls other kernel implementations.
By default, a dispatcher is generated by the build system for every kernel such that:
* the best aligned implemention is called when all pointer arguments are aligned,
* and otherwise the best unaligned implementation is called.
The author of a VOLK kernel may create a custom dispatcher,
to be called in place of the automatically generated one.
A custom dispatcher may be useful to handle head and tail cases,
or to implement different alignment and bounds checking logic.
########################################################################
# Code for an example dispatcher w/ tail case
########################################################################
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#ifdef LV_HAVE_DISPATCHER
static inline void volk_gnsssdr_32f_x2_add_32f_dispatcher(float* cVector, const float* aVector, const float* bVector, unsigned int num_points)
{
const unsigned int num_points_r = num_points%4;
const unsigned int num_points_x = num_points - num_points_r;
if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(cVector, VOLK_OR_PTR(aVector, bVector))))
{
volk_gnsssdr_32f_x2_add_32f_a(cVector, aVector, bVector, num_points_x);
}
else
{
volk_gnsssdr_32f_x2_add_32f_u(cVector, aVector, bVector, num_points_x);
}
volk_gnsssdr_32f_x2_add_32f_g(cVector+num_points_x, aVector+num_points_x, bVector+num_points_x, num_points_r);
}
#endif //LV_HAVE_DISPATCHER
########################################################################
# Code for an example dispatcher w/ tail case and accumulator
########################################################################
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#ifdef LV_HAVE_DISPATCHER
static inline void volk_gnsssdr_32f_x2_dot_prod_32f_dispatcher(float * result, const float * input, const float * taps, unsigned int num_points)
{
const unsigned int num_points_r = num_points%16;
const unsigned int num_points_x = num_points - num_points_r;
if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(input, taps)))
{
volk_gnsssdr_32f_x2_dot_prod_32f_a(result, input, taps, num_points_x);
}
else
{
volk_gnsssdr_32f_x2_dot_prod_32f_u(result, input, taps, num_points_x);
}
float result_tail = 0;
volk_gnsssdr_32f_x2_dot_prod_32f_g(&result_tail, input+num_points_x, taps+num_points_x, num_points_r);
*result += result_tail;
}
#endif //LV_HAVE_DISPATCHER

View File

@ -0,0 +1,241 @@
#ifndef INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H
#define INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
\param inputVector The 16 bit input data buffer
\param outputVector The floating point output data buffer
\param scalar The value divided against each point in the output buffer
\param num_points The number of data values to be converted
\note Output buffer does NOT need to be properly aligned
*/
static inline void volk_gnsssdr_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
unsigned int number = 0;
const unsigned int eighthPoints = num_points / 8;
float* outputVectorPtr = outputVector;
__m128 invScalar = _mm_set_ps1(1.0/scalar);
int16_t* inputPtr = (int16_t*)inputVector;
__m128i inputVal;
__m128i inputVal2;
__m128 ret;
for(;number < eighthPoints; number++){
// Load the 8 values
inputVal = _mm_loadu_si128((__m128i*)inputPtr);
// Shift the input data to the right by 64 bits ( 8 bytes )
inputVal2 = _mm_srli_si128(inputVal, 8);
// Convert the lower 4 values into 32 bit words
inputVal = _mm_cvtepi16_epi32(inputVal);
inputVal2 = _mm_cvtepi16_epi32(inputVal2);
ret = _mm_cvtepi32_ps(inputVal);
ret = _mm_mul_ps(ret, invScalar);
_mm_storeu_ps(outputVectorPtr, ret);
outputVectorPtr += 4;
ret = _mm_cvtepi32_ps(inputVal2);
ret = _mm_mul_ps(ret, invScalar);
_mm_storeu_ps(outputVectorPtr, ret);
outputVectorPtr += 4;
inputPtr += 8;
}
number = eighthPoints * 8;
for(; number < num_points; number++){
outputVector[number] =((float)(inputVector[number])) / scalar;
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
\param inputVector The 16 bit input data buffer
\param outputVector The floating point output data buffer
\param scalar The value divided against each point in the output buffer
\param num_points The number of data values to be converted
\note Output buffer does NOT need to be properly aligned
*/
static inline void volk_gnsssdr_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
float* outputVectorPtr = outputVector;
__m128 invScalar = _mm_set_ps1(1.0/scalar);
int16_t* inputPtr = (int16_t*)inputVector;
__m128 ret;
for(;number < quarterPoints; number++){
ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
ret = _mm_mul_ps(ret, invScalar);
_mm_storeu_ps(outputVectorPtr, ret);
inputPtr += 4;
outputVectorPtr += 4;
}
number = quarterPoints * 4;
for(; number < num_points; number++){
outputVector[number] = (float)(inputVector[number]) / scalar;
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
\param inputVector The 16 bit input data buffer
\param outputVector The floating point output data buffer
\param scalar The value divided against each point in the output buffer
\param num_points The number of data values to be converted
\note Output buffer does NOT need to be properly aligned
*/
static inline void volk_gnsssdr_16i_s32f_convert_32f_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
float* outputVectorPtr = outputVector;
const int16_t* inputVectorPtr = inputVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H */
#ifndef INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H
#define INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
\param inputVector The 16 bit input data buffer
\param outputVector The floating point output data buffer
\param scalar The value divided against each point in the output buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
unsigned int number = 0;
const unsigned int eighthPoints = num_points / 8;
float* outputVectorPtr = outputVector;
__m128 invScalar = _mm_set_ps1(1.0/scalar);
int16_t* inputPtr = (int16_t*)inputVector;
__m128i inputVal;
__m128i inputVal2;
__m128 ret;
for(;number < eighthPoints; number++){
// Load the 8 values
inputVal = _mm_loadu_si128((__m128i*)inputPtr);
// Shift the input data to the right by 64 bits ( 8 bytes )
inputVal2 = _mm_srli_si128(inputVal, 8);
// Convert the lower 4 values into 32 bit words
inputVal = _mm_cvtepi16_epi32(inputVal);
inputVal2 = _mm_cvtepi16_epi32(inputVal2);
ret = _mm_cvtepi32_ps(inputVal);
ret = _mm_mul_ps(ret, invScalar);
_mm_storeu_ps(outputVectorPtr, ret);
outputVectorPtr += 4;
ret = _mm_cvtepi32_ps(inputVal2);
ret = _mm_mul_ps(ret, invScalar);
_mm_storeu_ps(outputVectorPtr, ret);
outputVectorPtr += 4;
inputPtr += 8;
}
number = eighthPoints * 8;
for(; number < num_points; number++){
outputVector[number] =((float)(inputVector[number])) / scalar;
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
\param inputVector The 16 bit input data buffer
\param outputVector The floating point output data buffer
\param scalar The value divided against each point in the output buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
float* outputVectorPtr = outputVector;
__m128 invScalar = _mm_set_ps1(1.0/scalar);
int16_t* inputPtr = (int16_t*)inputVector;
__m128 ret;
for(;number < quarterPoints; number++){
ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0]));
ret = _mm_mul_ps(ret, invScalar);
_mm_storeu_ps(outputVectorPtr, ret);
inputPtr += 4;
outputVectorPtr += 4;
}
number = quarterPoints * 4;
for(; number < num_points; number++){
outputVector[number] = (float)(inputVector[number]) / scalar;
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value
\param inputVector The 16 bit input data buffer
\param outputVector The floating point output data buffer
\param scalar The value divided against each point in the output buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16i_s32f_convert_32f_a_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){
float* outputVectorPtr = outputVector;
const int16_t* inputVectorPtr = inputVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H */

View File

@ -0,0 +1,68 @@
#ifndef INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H
#define INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_32f_accumulator_s32f_a_sse(float* result, const float* inputBuffer, unsigned int num_points){
float returnValue = 0;
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
const float* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
__m128 accumulator = _mm_setzero_ps();
__m128 aVal = _mm_setzero_ps();
for(;number < quarterPoints; number++){
aVal = _mm_load_ps(aPtr);
accumulator = _mm_add_ps(accumulator, aVal);
aPtr += 4;
}
_mm_store_ps(tempBuffer,accumulator); // Store the results back into the C container
returnValue = tempBuffer[0];
returnValue += tempBuffer[1];
returnValue += tempBuffer[2];
returnValue += tempBuffer[3];
number = quarterPoints * 4;
for(;number < num_points; number++){
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_32f_accumulator_s32f_generic(float* result, const float* inputBuffer, unsigned int num_points){
const float* aPtr = inputBuffer;
unsigned int number = 0;
float returnValue = 0;
for(;number < num_points; number++){
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H */

View File

@ -0,0 +1,149 @@
#ifndef INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H
#define INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE4_1
#include<smmintrin.h>
static inline void volk_gnsssdr_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
if(num_points > 0){
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
float* inputPtr = (float*)src0;
__m128 indexIncrementValues = _mm_set1_ps(4);
__m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
float max = src0[0];
float index = 0;
__m128 maxValues = _mm_set1_ps(max);
__m128 maxValuesIndex = _mm_setzero_ps();
__m128 compareResults;
__m128 currentValues;
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
for(;number < quarterPoints; number++){
currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults);
}
// Calculate the largest value from the remaining 4 points
_mm_store_ps(maxValuesBuffer, maxValues);
_mm_store_ps(maxIndexesBuffer, maxValuesIndex);
for(number = 0; number < 4; number++){
if(maxValuesBuffer[number] > max){
index = maxIndexesBuffer[number];
max = maxValuesBuffer[number];
}
}
number = quarterPoints * 4;
for(;number < num_points; number++){
if(src0[number] > max){
index = number;
max = src0[number];
}
}
target[0] = (unsigned int)index;
}
}
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE
#include<xmmintrin.h>
static inline void volk_gnsssdr_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) {
if(num_points > 0){
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
float* inputPtr = (float*)src0;
__m128 indexIncrementValues = _mm_set1_ps(4);
__m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
float max = src0[0];
float index = 0;
__m128 maxValues = _mm_set1_ps(max);
__m128 maxValuesIndex = _mm_setzero_ps();
__m128 compareResults;
__m128 currentValues;
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
for(;number < quarterPoints; number++){
currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
}
// Calculate the largest value from the remaining 4 points
_mm_store_ps(maxValuesBuffer, maxValues);
_mm_store_ps(maxIndexesBuffer, maxValuesIndex);
for(number = 0; number < 4; number++){
if(maxValuesBuffer[number] > max){
index = maxIndexesBuffer[number];
max = maxValuesBuffer[number];
}
}
number = quarterPoints * 4;
for(;number < num_points; number++){
if(src0[number] > max){
index = number;
max = src0[number];
}
}
target[0] = (unsigned int)index;
}
}
#endif /*LV_HAVE_SSE*/
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_32f_index_max_16u_generic(unsigned int* target, const float* src0, unsigned int num_points) {
if(num_points > 0){
float max = src0[0];
unsigned int index = 0;
unsigned int i = 1;
for(; i < num_points; ++i) {
if(src0[i] > max){
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_GENERIC*/
#endif /*INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H*/

View File

@ -0,0 +1,147 @@
#ifndef INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H
#define INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
float* cPtr = cVector;
const float* aPtr = aVector;
const float* bPtr= bVector;
__m128 aVal, bVal, cVal;
for(;number < quarterPoints; number++){
aVal = _mm_loadu_ps(aPtr);
bVal = _mm_loadu_ps(bPtr);
cVal = _mm_add_ps(aVal, bVal);
_mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
aPtr += 4;
bPtr += 4;
cPtr += 4;
}
number = quarterPoints * 4;
for(;number < num_points; number++){
*cPtr++ = (*aPtr++) + (*bPtr++);
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_32f_x2_add_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
float* cPtr = cVector;
const float* aPtr = aVector;
const float* bPtr= bVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*cPtr++ = (*aPtr++) + (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H */
#ifndef INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H
#define INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
float* cPtr = cVector;
const float* aPtr = aVector;
const float* bPtr= bVector;
__m128 aVal, bVal, cVal;
for(;number < quarterPoints; number++){
aVal = _mm_load_ps(aPtr);
bVal = _mm_load_ps(bPtr);
cVal = _mm_add_ps(aVal, bVal);
_mm_store_ps(cPtr,cVal); // Store the results back into the C container
aPtr += 4;
bPtr += 4;
cPtr += 4;
}
number = quarterPoints * 4;
for(;number < num_points; number++){
*cPtr++ = (*aPtr++) + (*bPtr++);
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
float* cPtr = cVector;
const float* aPtr = aVector;
const float* bPtr= bVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*cPtr++ = (*aPtr++) + (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
extern void volk_gnsssdr_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
static inline void volk_gnsssdr_32f_x2_add_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
volk_gnsssdr_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points);
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H */

View File

@ -0,0 +1,127 @@
#ifndef INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H
#define INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Takes the conjugate of a complex vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
__m128 x;
lv_32fc_t* c = cVector;
const lv_32fc_t* a = aVector;
__m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
for(;number < halfPoints; number++){
x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
x = _mm_xor_ps(x, conjugator); // conjugate register
_mm_storeu_ps((float*)c,x); // Store the results back into the C container
a += 2;
c += 2;
}
if((num_points % 2) != 0) {
*c = lv_conj(*a);
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Takes the conjugate of a complex vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_conjugate_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
lv_32fc_t* cPtr = cVector;
const lv_32fc_t* aPtr = aVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*cPtr++ = lv_conj(*aPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H
#define INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Takes the conjugate of a complex vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
__m128 x;
lv_32fc_t* c = cVector;
const lv_32fc_t* a = aVector;
__m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
for(;number < halfPoints; number++){
x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
x = _mm_xor_ps(x, conjugator); // conjugate register
_mm_store_ps((float*)c,x); // Store the results back into the C container
a += 2;
c += 2;
}
if((num_points % 2) != 0) {
*c = lv_conj(*a);
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Takes the conjugate of a complex vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of complex values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){
lv_32fc_t* cPtr = cVector;
const lv_32fc_t* aPtr = aVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*cPtr++ = lv_conj(*aPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H */

View File

@ -0,0 +1,228 @@
#ifndef INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_u_H
#define INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_u_H
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
const float* complexVectorPtr = (float*)complexVector;
float* magnitudeVectorPtr = magnitudeVector;
__m128 cplxValue1, cplxValue2, result;
for(;number < quarterPoints; number++){
cplxValue1 = _mm_loadu_ps(complexVectorPtr);
complexVectorPtr += 4;
cplxValue2 = _mm_loadu_ps(complexVectorPtr);
complexVectorPtr += 4;
cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
_mm_storeu_ps(magnitudeVectorPtr, result);
magnitudeVectorPtr += 4;
}
number = quarterPoints * 4;
for(; number < num_points; number++){
float val1Real = *complexVectorPtr++;
float val1Imag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
const float* complexVectorPtr = (float*)complexVector;
float* magnitudeVectorPtr = magnitudeVector;
__m128 cplxValue1, cplxValue2, iValue, qValue, result;
for(;number < quarterPoints; number++){
cplxValue1 = _mm_loadu_ps(complexVectorPtr);
complexVectorPtr += 4;
cplxValue2 = _mm_loadu_ps(complexVectorPtr);
complexVectorPtr += 4;
// Arrange in i1i2i3i4 format
iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
// Arrange in q1q2q3q4 format
qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
iValue = _mm_mul_ps(iValue, iValue); // Square the I values
qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
_mm_storeu_ps(magnitudeVectorPtr, result);
magnitudeVectorPtr += 4;
}
number = quarterPoints * 4;
for(; number < num_points; number++){
float val1Real = *complexVectorPtr++;
float val1Imag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
const float* complexVectorPtr = (float*)complexVector;
float* magnitudeVectorPtr = magnitudeVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
const float real = *complexVectorPtr++;
const float imag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (real*real) + (imag*imag);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_a_H
#define INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_a_H
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
const float* complexVectorPtr = (float*)complexVector;
float* magnitudeVectorPtr = magnitudeVector;
__m128 cplxValue1, cplxValue2, result;
for(;number < quarterPoints; number++){
cplxValue1 = _mm_load_ps(complexVectorPtr);
complexVectorPtr += 4;
cplxValue2 = _mm_load_ps(complexVectorPtr);
complexVectorPtr += 4;
cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
_mm_store_ps(magnitudeVectorPtr, result);
magnitudeVectorPtr += 4;
}
number = quarterPoints * 4;
for(; number < num_points; number++){
float val1Real = *complexVectorPtr++;
float val1Imag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
unsigned int number = 0;
const unsigned int quarterPoints = num_points / 4;
const float* complexVectorPtr = (float*)complexVector;
float* magnitudeVectorPtr = magnitudeVector;
__m128 cplxValue1, cplxValue2, iValue, qValue, result;
for(;number < quarterPoints; number++){
cplxValue1 = _mm_load_ps(complexVectorPtr);
complexVectorPtr += 4;
cplxValue2 = _mm_load_ps(complexVectorPtr);
complexVectorPtr += 4;
// Arrange in i1i2i3i4 format
iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
// Arrange in q1q2q3q4 format
qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
iValue = _mm_mul_ps(iValue, iValue); // Square the I values
qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
_mm_store_ps(magnitudeVectorPtr, result);
magnitudeVectorPtr += 4;
}
number = quarterPoints * 4;
for(; number < num_points; number++){
float val1Real = *complexVectorPtr++;
float val1Imag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
const float* complexVectorPtr = (float*)complexVector;
float* magnitudeVectorPtr = magnitudeVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
const float real = *complexVectorPtr++;
const float imag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (real*real) + (imag*imag);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */

View File

@ -0,0 +1,178 @@
#ifndef INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_u_H
#define INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
__m128 x, yl, yh, z, tmp1, tmp2;
lv_32fc_t* c = cVector;
const lv_32fc_t* a = aVector;
// Set up constant scalar vector
yl = _mm_set_ps1(lv_creal(scalar));
yh = _mm_set_ps1(lv_cimag(scalar));
for(;number < halfPoints; number++){
x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
_mm_storeu_ps((float*)c,z); // Store the results back into the C container
a += 2;
c += 2;
}
if((num_points % 2) != 0) {
*c = (*a) * scalar;
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
lv_32fc_t* cPtr = cVector;
const lv_32fc_t* aPtr = aVector;
unsigned int number = num_points;
// unwrap loop
while (number >= 8){
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
number -= 8;
}
// clean up any remaining
while (number-- > 0)
*cPtr++ = *aPtr++ * scalar;
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_a_H
#define INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
__m128 x, yl, yh, z, tmp1, tmp2;
lv_32fc_t* c = cVector;
const lv_32fc_t* a = aVector;
// Set up constant scalar vector
yl = _mm_set_ps1(lv_creal(scalar));
yh = _mm_set_ps1(lv_cimag(scalar));
for(;number < halfPoints; number++){
x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
_mm_store_ps((float*)c,z); // Store the results back into the C container
a += 2;
c += 2;
}
if((num_points % 2) != 0) {
*c = (*a) * scalar;
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
lv_32fc_t* cPtr = cVector;
const lv_32fc_t* aPtr = aVector;
unsigned int number = num_points;
// unwrap loop
while (number >= 8){
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
number -= 8;
}
// clean up any remaining
while (number-- > 0)
*cPtr++ = *aPtr++ * scalar;
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */

View File

@ -0,0 +1,763 @@
#ifndef INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H
#define INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <stdio.h>
#include <string.h>
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
float * res = (float*) result;
float * in = (float*) input;
float * tp = (float*) taps;
unsigned int n_2_ccomplex_blocks = num_points/2;
unsigned int isodd = num_points & 1;
float sum0[2] = {0,0};
float sum1[2] = {0,0};
unsigned int i = 0;
for(i = 0; i < n_2_ccomplex_blocks; ++i) {
sum0[0] += in[0] * tp[0] - in[1] * tp[1];
sum0[1] += in[0] * tp[1] + in[1] * tp[0];
sum1[0] += in[2] * tp[2] - in[3] * tp[3];
sum1[1] += in[2] * tp[3] + in[3] * tp[2];
in += 4;
tp += 4;
}
res[0] = sum0[0] + sum1[0];
res[1] = sum0[1] + sum1[1];
// Cleanup if we had an odd number of points
for(i = 0; i < isodd; ++i) {
*result += input[num_points - 1] * taps[num_points - 1];
}
}
#endif /*LV_HAVE_GENERIC*/
#if LV_HAVE_SSE && LV_HAVE_64
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
const unsigned int num_bytes = num_points*8;
unsigned int isodd = num_points & 1;
asm
(
"# ccomplex_dotprod_generic (float* result, const float *input,\n\t"
"# const float *taps, unsigned num_bytes)\n\t"
"# float sum0 = 0;\n\t"
"# float sum1 = 0;\n\t"
"# float sum2 = 0;\n\t"
"# float sum3 = 0;\n\t"
"# do {\n\t"
"# sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
"# sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
"# sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
"# sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
"# input += 4;\n\t"
"# taps += 4; \n\t"
"# } while (--n_2_ccomplex_blocks != 0);\n\t"
"# result[0] = sum0 + sum2;\n\t"
"# result[1] = sum1 + sum3;\n\t"
"# TODO: prefetch and better scheduling\n\t"
" xor %%r9, %%r9\n\t"
" xor %%r10, %%r10\n\t"
" movq %%rcx, %%rax\n\t"
" movq %%rcx, %%r8\n\t"
" movq %[rsi], %%r9\n\t"
" movq %[rdx], %%r10\n\t"
" xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
" movups 0(%%r9), %%xmm0\n\t"
" xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
" movups 0(%%r10), %%xmm2\n\t"
" shr $5, %%rax # rax = n_2_ccomplex_blocks / 2\n\t"
" shr $4, %%r8\n\t"
" jmp .%=L1_test\n\t"
" # 4 taps / loop\n\t"
" # something like ?? cycles / loop\n\t"
".%=Loop1: \n\t"
"# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
"# movups (%%r9), %%xmmA\n\t"
"# movups (%%r10), %%xmmB\n\t"
"# movups %%xmmA, %%xmmZ\n\t"
"# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
"# mulps %%xmmB, %%xmmA\n\t"
"# mulps %%xmmZ, %%xmmB\n\t"
"# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
"# xorps %%xmmPN, %%xmmA\n\t"
"# movups %%xmmA, %%xmmZ\n\t"
"# unpcklps %%xmmB, %%xmmA\n\t"
"# unpckhps %%xmmB, %%xmmZ\n\t"
"# movups %%xmmZ, %%xmmY\n\t"
"# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
"# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
"# addps %%xmmZ, %%xmmA\n\t"
"# addps %%xmmA, %%xmmC\n\t"
"# A=xmm0, B=xmm2, Z=xmm4\n\t"
"# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
" movups 16(%%r9), %%xmm1\n\t"
" movups %%xmm0, %%xmm4\n\t"
" mulps %%xmm2, %%xmm0\n\t"
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
" movups 16(%%r10), %%xmm3\n\t"
" movups %%xmm1, %%xmm5\n\t"
" addps %%xmm0, %%xmm6\n\t"
" mulps %%xmm3, %%xmm1\n\t"
" shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
" addps %%xmm1, %%xmm6\n\t"
" mulps %%xmm4, %%xmm2\n\t"
" movups 32(%%r9), %%xmm0\n\t"
" addps %%xmm2, %%xmm7\n\t"
" mulps %%xmm5, %%xmm3\n\t"
" add $32, %%r9\n\t"
" movups 32(%%r10), %%xmm2\n\t"
" addps %%xmm3, %%xmm7\n\t"
" add $32, %%r10\n\t"
".%=L1_test:\n\t"
" dec %%rax\n\t"
" jge .%=Loop1\n\t"
" # We've handled the bulk of multiplies up to here.\n\t"
" # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
" # If so, we've got 2 more taps to do.\n\t"
" and $1, %%r8\n\t"
" je .%=Leven\n\t"
" # The count was odd, do 2 more taps.\n\t"
" # Note that we've already got mm0/mm2 preloaded\n\t"
" # from the main loop.\n\t"
" movups %%xmm0, %%xmm4\n\t"
" mulps %%xmm2, %%xmm0\n\t"
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
" addps %%xmm0, %%xmm6\n\t"
" mulps %%xmm4, %%xmm2\n\t"
" addps %%xmm2, %%xmm7\n\t"
".%=Leven:\n\t"
" # neg inversor\n\t"
" xorps %%xmm1, %%xmm1\n\t"
" mov $0x80000000, %%r9\n\t"
" movd %%r9, %%xmm1\n\t"
" shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
" # pfpnacc\n\t"
" xorps %%xmm1, %%xmm6\n\t"
" movups %%xmm6, %%xmm2\n\t"
" unpcklps %%xmm7, %%xmm6\n\t"
" unpckhps %%xmm7, %%xmm2\n\t"
" movups %%xmm2, %%xmm3\n\t"
" shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
" shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
" addps %%xmm2, %%xmm6\n\t"
" # xmm6 = r1 i2 r3 i4\n\t"
" movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
" addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
" movlps %%xmm6, (%[rdi]) # store low 2x32 bits (complex) to memory\n\t"
:
:[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
:"rax", "r8", "r9", "r10"
);
if(isodd) {
*result += input[num_points - 1] * taps[num_points - 1];
}
return;
}
#endif /* LV_HAVE_SSE && LV_HAVE_64 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
lv_32fc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(float));
unsigned int number = 0;
const unsigned int halfPoints = num_points/2;
unsigned int isodd = num_points & 1;
__m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
const lv_32fc_t* a = input;
const lv_32fc_t* b = taps;
dotProdVal = _mm_setzero_ps();
for(;number < halfPoints; number++){
x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together
a += 2;
b += 2;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
_mm_storeu_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector
dotProduct += ( dotProductVector[0] + dotProductVector[1] );
if(isodd) {
dotProduct += input[num_points - 1] * taps[num_points - 1];
}
*result = dotProduct;
}
#endif /*LV_HAVE_SSE3*/
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
unsigned int i = 0;
const unsigned int qtr_points = num_points/4;
const unsigned int isodd = num_points & 3;
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
float *p_input, *p_taps;
__m64 *p_result;
p_result = (__m64*)result;
p_input = (float*)input;
p_taps = (float*)taps;
static const __m128i neg = {0x000000000000000080000000};
real0 = _mm_setzero_ps();
real1 = _mm_setzero_ps();
im0 = _mm_setzero_ps();
im1 = _mm_setzero_ps();
for(; i < qtr_points; ++i) {
xmm0 = _mm_loadu_ps(p_input);
xmm1 = _mm_loadu_ps(p_taps);
p_input += 4;
p_taps += 4;
xmm2 = _mm_loadu_ps(p_input);
xmm3 = _mm_loadu_ps(p_taps);
p_input += 4;
p_taps += 4;
xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
//imaginary vector from input
xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
//real vector from input
xmm3 = _mm_unpacklo_ps(xmm0, xmm4);
//imaginary vector from taps
xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
//real vector from taps
xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
real0 = _mm_add_ps(xmm4, real0);
real1 = _mm_add_ps(xmm5, real1);
im0 = _mm_add_ps(xmm6, im0);
im1 = _mm_add_ps(xmm7, im1);
}
real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec);
im0 = _mm_add_ps(im0, im1);
real0 = _mm_add_ps(real0, real1);
im0 = _mm_add_ps(im0, real0);
_mm_storel_pi(p_result, im0);
for(i = num_points-isodd; i < num_points; i++) {
*result += input[i] * taps[i];
}
}
#endif /*LV_HAVE_SSE4_1*/
#endif /*INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H*/
#ifndef INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H
#define INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <stdio.h>
#include <string.h>
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
const unsigned int num_bytes = num_points*8;
float * res = (float*) result;
float * in = (float*) input;
float * tp = (float*) taps;
unsigned int n_2_ccomplex_blocks = num_bytes >> 4;
unsigned int isodd = num_points & 1;
float sum0[2] = {0,0};
float sum1[2] = {0,0};
unsigned int i = 0;
for(i = 0; i < n_2_ccomplex_blocks; ++i) {
sum0[0] += in[0] * tp[0] - in[1] * tp[1];
sum0[1] += in[0] * tp[1] + in[1] * tp[0];
sum1[0] += in[2] * tp[2] - in[3] * tp[3];
sum1[1] += in[2] * tp[3] + in[3] * tp[2];
in += 4;
tp += 4;
}
res[0] = sum0[0] + sum1[0];
res[1] = sum0[1] + sum1[1];
for(i = 0; i < isodd; ++i) {
*result += input[num_points - 1] * taps[num_points - 1];
}
}
#endif /*LV_HAVE_GENERIC*/
#if LV_HAVE_SSE && LV_HAVE_64
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
const unsigned int num_bytes = num_points*8;
unsigned int isodd = num_points & 1;
asm
(
"# ccomplex_dotprod_generic (float* result, const float *input,\n\t"
"# const float *taps, unsigned num_bytes)\n\t"
"# float sum0 = 0;\n\t"
"# float sum1 = 0;\n\t"
"# float sum2 = 0;\n\t"
"# float sum3 = 0;\n\t"
"# do {\n\t"
"# sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
"# sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
"# sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
"# sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
"# input += 4;\n\t"
"# taps += 4; \n\t"
"# } while (--n_2_ccomplex_blocks != 0);\n\t"
"# result[0] = sum0 + sum2;\n\t"
"# result[1] = sum1 + sum3;\n\t"
"# TODO: prefetch and better scheduling\n\t"
" xor %%r9, %%r9\n\t"
" xor %%r10, %%r10\n\t"
" movq %%rcx, %%rax\n\t"
" movq %%rcx, %%r8\n\t"
" movq %[rsi], %%r9\n\t"
" movq %[rdx], %%r10\n\t"
" xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
" movaps 0(%%r9), %%xmm0\n\t"
" xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
" movaps 0(%%r10), %%xmm2\n\t"
" shr $5, %%rax # rax = n_2_ccomplex_blocks / 2\n\t"
" shr $4, %%r8\n\t"
" jmp .%=L1_test\n\t"
" # 4 taps / loop\n\t"
" # something like ?? cycles / loop\n\t"
".%=Loop1: \n\t"
"# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
"# movaps (%%r9), %%xmmA\n\t"
"# movaps (%%r10), %%xmmB\n\t"
"# movaps %%xmmA, %%xmmZ\n\t"
"# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
"# mulps %%xmmB, %%xmmA\n\t"
"# mulps %%xmmZ, %%xmmB\n\t"
"# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
"# xorps %%xmmPN, %%xmmA\n\t"
"# movaps %%xmmA, %%xmmZ\n\t"
"# unpcklps %%xmmB, %%xmmA\n\t"
"# unpckhps %%xmmB, %%xmmZ\n\t"
"# movaps %%xmmZ, %%xmmY\n\t"
"# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
"# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
"# addps %%xmmZ, %%xmmA\n\t"
"# addps %%xmmA, %%xmmC\n\t"
"# A=xmm0, B=xmm2, Z=xmm4\n\t"
"# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
" movaps 16(%%r9), %%xmm1\n\t"
" movaps %%xmm0, %%xmm4\n\t"
" mulps %%xmm2, %%xmm0\n\t"
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
" movaps 16(%%r10), %%xmm3\n\t"
" movaps %%xmm1, %%xmm5\n\t"
" addps %%xmm0, %%xmm6\n\t"
" mulps %%xmm3, %%xmm1\n\t"
" shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
" addps %%xmm1, %%xmm6\n\t"
" mulps %%xmm4, %%xmm2\n\t"
" movaps 32(%%r9), %%xmm0\n\t"
" addps %%xmm2, %%xmm7\n\t"
" mulps %%xmm5, %%xmm3\n\t"
" add $32, %%r9\n\t"
" movaps 32(%%r10), %%xmm2\n\t"
" addps %%xmm3, %%xmm7\n\t"
" add $32, %%r10\n\t"
".%=L1_test:\n\t"
" dec %%rax\n\t"
" jge .%=Loop1\n\t"
" # We've handled the bulk of multiplies up to here.\n\t"
" # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
" # If so, we've got 2 more taps to do.\n\t"
" and $1, %%r8\n\t"
" je .%=Leven\n\t"
" # The count was odd, do 2 more taps.\n\t"
" # Note that we've already got mm0/mm2 preloaded\n\t"
" # from the main loop.\n\t"
" movaps %%xmm0, %%xmm4\n\t"
" mulps %%xmm2, %%xmm0\n\t"
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
" addps %%xmm0, %%xmm6\n\t"
" mulps %%xmm4, %%xmm2\n\t"
" addps %%xmm2, %%xmm7\n\t"
".%=Leven:\n\t"
" # neg inversor\n\t"
" xorps %%xmm1, %%xmm1\n\t"
" mov $0x80000000, %%r9\n\t"
" movd %%r9, %%xmm1\n\t"
" shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
" # pfpnacc\n\t"
" xorps %%xmm1, %%xmm6\n\t"
" movaps %%xmm6, %%xmm2\n\t"
" unpcklps %%xmm7, %%xmm6\n\t"
" unpckhps %%xmm7, %%xmm2\n\t"
" movaps %%xmm2, %%xmm3\n\t"
" shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
" shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
" addps %%xmm2, %%xmm6\n\t"
" # xmm6 = r1 i2 r3 i4\n\t"
" movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
" addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
" movlps %%xmm6, (%[rdi]) # store low 2x32 bits (complex) to memory\n\t"
:
:[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
:"rax", "r8", "r9", "r10"
);
if(isodd) {
*result += input[num_points - 1] * taps[num_points - 1];
}
return;
}
#endif
#if LV_HAVE_SSE && LV_HAVE_32
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
volk_gnsssdr_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_points);
#if 0
const unsigned int num_bytes = num_points*8;
unsigned int isodd = num_points & 1;
asm volatile
(
" #pushl %%ebp\n\t"
" #movl %%esp, %%ebp\n\t"
" movl 12(%%ebp), %%eax # input\n\t"
" movl 16(%%ebp), %%edx # taps\n\t"
" movl 20(%%ebp), %%ecx # n_bytes\n\t"
" xorps %%xmm6, %%xmm6 # zero accumulators\n\t"
" movaps 0(%%eax), %%xmm0\n\t"
" xorps %%xmm7, %%xmm7 # zero accumulators\n\t"
" movaps 0(%%edx), %%xmm2\n\t"
" shrl $5, %%ecx # ecx = n_2_ccomplex_blocks / 2\n\t"
" jmp .%=L1_test\n\t"
" # 4 taps / loop\n\t"
" # something like ?? cycles / loop\n\t"
".%=Loop1: \n\t"
"# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t"
"# movaps (%%eax), %%xmmA\n\t"
"# movaps (%%edx), %%xmmB\n\t"
"# movaps %%xmmA, %%xmmZ\n\t"
"# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t"
"# mulps %%xmmB, %%xmmA\n\t"
"# mulps %%xmmZ, %%xmmB\n\t"
"# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
"# xorps %%xmmPN, %%xmmA\n\t"
"# movaps %%xmmA, %%xmmZ\n\t"
"# unpcklps %%xmmB, %%xmmA\n\t"
"# unpckhps %%xmmB, %%xmmZ\n\t"
"# movaps %%xmmZ, %%xmmY\n\t"
"# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t"
"# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t"
"# addps %%xmmZ, %%xmmA\n\t"
"# addps %%xmmA, %%xmmC\n\t"
"# A=xmm0, B=xmm2, Z=xmm4\n\t"
"# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
" movaps 16(%%eax), %%xmm1\n\t"
" movaps %%xmm0, %%xmm4\n\t"
" mulps %%xmm2, %%xmm0\n\t"
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
" movaps 16(%%edx), %%xmm3\n\t"
" movaps %%xmm1, %%xmm5\n\t"
" addps %%xmm0, %%xmm6\n\t"
" mulps %%xmm3, %%xmm1\n\t"
" shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t"
" addps %%xmm1, %%xmm6\n\t"
" mulps %%xmm4, %%xmm2\n\t"
" movaps 32(%%eax), %%xmm0\n\t"
" addps %%xmm2, %%xmm7\n\t"
" mulps %%xmm5, %%xmm3\n\t"
" addl $32, %%eax\n\t"
" movaps 32(%%edx), %%xmm2\n\t"
" addps %%xmm3, %%xmm7\n\t"
" addl $32, %%edx\n\t"
".%=L1_test:\n\t"
" decl %%ecx\n\t"
" jge .%=Loop1\n\t"
" # We've handled the bulk of multiplies up to here.\n\t"
" # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
" # If so, we've got 2 more taps to do.\n\t"
" movl 20(%%ebp), %%ecx # n_2_ccomplex_blocks\n\t"
" shrl $4, %%ecx\n\t"
" andl $1, %%ecx\n\t"
" je .%=Leven\n\t"
" # The count was odd, do 2 more taps.\n\t"
" # Note that we've already got mm0/mm2 preloaded\n\t"
" # from the main loop.\n\t"
" movaps %%xmm0, %%xmm4\n\t"
" mulps %%xmm2, %%xmm0\n\t"
" shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t"
" addps %%xmm0, %%xmm6\n\t"
" mulps %%xmm4, %%xmm2\n\t"
" addps %%xmm2, %%xmm7\n\t"
".%=Leven:\n\t"
" # neg inversor\n\t"
" movl 8(%%ebp), %%eax \n\t"
" xorps %%xmm1, %%xmm1\n\t"
" movl $0x80000000, (%%eax)\n\t"
" movss (%%eax), %%xmm1\n\t"
" shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t"
" # pfpnacc\n\t"
" xorps %%xmm1, %%xmm6\n\t"
" movaps %%xmm6, %%xmm2\n\t"
" unpcklps %%xmm7, %%xmm6\n\t"
" unpckhps %%xmm7, %%xmm2\n\t"
" movaps %%xmm2, %%xmm3\n\t"
" shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t"
" shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t"
" addps %%xmm2, %%xmm6\n\t"
" # xmm6 = r1 i2 r3 i4\n\t"
" #movl 8(%%ebp), %%eax # @result\n\t"
" movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t"
" addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
" movlps %%xmm6, (%%eax) # store low 2x32 bits (complex) to memory\n\t"
" #popl %%ebp\n\t"
:
:
: "eax", "ecx", "edx"
);
int getem = num_bytes % 16;
if(isodd) {
*result += (input[num_points - 1] * taps[num_points - 1]);
}
return;
#endif
}
#endif /*LV_HAVE_SSE*/
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
const unsigned int num_bytes = num_points*8;
unsigned int isodd = num_points & 1;
lv_32fc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(float));
unsigned int number = 0;
const unsigned int halfPoints = num_bytes >> 4;
__m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
const lv_32fc_t* a = input;
const lv_32fc_t* b = taps;
dotProdVal = _mm_setzero_ps();
for(;number < halfPoints; number++){
x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together
a += 2;
b += 2;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
_mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector
dotProduct += ( dotProductVector[0] + dotProductVector[1] );
if(isodd) {
dotProduct += input[num_points - 1] * taps[num_points - 1];
}
*result = dotProduct;
}
#endif /*LV_HAVE_SSE3*/
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
unsigned int i = 0;
const unsigned int qtr_points = num_points/4;
const unsigned int isodd = num_points & 3;
__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1;
float *p_input, *p_taps;
__m64 *p_result;
static const __m128i neg = {0x000000000000000080000000};
p_result = (__m64*)result;
p_input = (float*)input;
p_taps = (float*)taps;
real0 = _mm_setzero_ps();
real1 = _mm_setzero_ps();
im0 = _mm_setzero_ps();
im1 = _mm_setzero_ps();
for(; i < qtr_points; ++i) {
xmm0 = _mm_load_ps(p_input);
xmm1 = _mm_load_ps(p_taps);
p_input += 4;
p_taps += 4;
xmm2 = _mm_load_ps(p_input);
xmm3 = _mm_load_ps(p_taps);
p_input += 4;
p_taps += 4;
xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
//imaginary vector from input
xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
//real vector from input
xmm3 = _mm_unpacklo_ps(xmm0, xmm4);
//imaginary vector from taps
xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
//real vector from taps
xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
real0 = _mm_add_ps(xmm4, real0);
real1 = _mm_add_ps(xmm5, real1);
im0 = _mm_add_ps(xmm6, im0);
im1 = _mm_add_ps(xmm7, im1);
}
real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec);
im0 = _mm_add_ps(im0, im1);
real0 = _mm_add_ps(real0, real1);
im0 = _mm_add_ps(im0, real0);
_mm_storel_pi(p_result, im0);
for(i = num_points-isodd; i < num_points; i++) {
*result += input[i] * taps[i];
}
}
#endif /*LV_HAVE_SSE4_1*/
#endif /*INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H*/

View File

@ -0,0 +1,170 @@
#ifndef INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H
#define INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
__m128 x, y, yl, yh, z, tmp1, tmp2;
lv_32fc_t* c = cVector;
const lv_32fc_t* a = aVector;
const lv_32fc_t* b = bVector;
for(;number < halfPoints; number++){
x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
_mm_storeu_ps((float*)c,z); // Store the results back into the C container
a += 2;
b += 2;
c += 2;
}
if((num_points % 2) != 0) {
*c = (*a) * (*b);
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_32fc_x2_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
lv_32fc_t* cPtr = cVector;
const lv_32fc_t* aPtr = aVector;
const lv_32fc_t* bPtr= bVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*cPtr++ = (*aPtr++) * (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H
#define INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
__m128 x, y, yl, yh, z, tmp1, tmp2;
lv_32fc_t* c = cVector;
const lv_32fc_t* a = aVector;
const lv_32fc_t* b = bVector;
for(;number < halfPoints; number++){
x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
_mm_store_ps((float*)c,z); // Store the results back into the C container
a += 2;
b += 2;
c += 2;
}
if((num_points % 2) != 0) {
*c = (*a) * (*b);
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
lv_32fc_t* cPtr = cVector;
const lv_32fc_t* aPtr = aVector;
const lv_32fc_t* bPtr= bVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*cPtr++ = (*aPtr++) * (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
extern void volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
static inline void volk_gnsssdr_32fc_x2_multiply_32fc_u_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */

View File

@ -0,0 +1,409 @@
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
/*!
* TODO: Code the SSE4 version and benchmark it
*/
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
lv_32fc_t dotProduct_E;
memset(&dotProduct_E, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_P;
memset(&dotProduct_P, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_L;
memset(&dotProduct_L, 0x0, 2*sizeof(float));
// Aux vars
__m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L;
z_E = _mm_setzero_ps();
z_P = _mm_setzero_ps();
z_L = _mm_setzero_ps();
//input and output vectors
//lv_32fc_t* _input_BB = input_BB;
const lv_32fc_t* _input = input;
const lv_32fc_t* _carrier = carrier;
const lv_32fc_t* _E_code = E_code;
const lv_32fc_t* _P_code = P_code;
const lv_32fc_t* _L_code = L_code;
for(;number < halfPoints; number++)
{
// carrier wipe-off (vector point-to-point product)
x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
//_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container
// correlation E,P,L (3x vector scalar product)
// Early
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
x = z;
y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
// Prompt
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
// Late
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
/*pointer increment*/
_carrier += 2;
_input += 2;
//_input_BB += 2;
_E_code += 2;
_P_code += 2;
_L_code +=2;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
//__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB;
_mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
if((num_points % 2) != 0)
{
//_input_BB = (*_input) * (*_carrier);
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
}
*E_out = dotProduct_E;
*P_out = dotProduct_P;
*L_out = dotProduct_L;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
{
lv_32fc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
lv_32fc_t dotProduct_E;
memset(&dotProduct_E, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_P;
memset(&dotProduct_P, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_L;
memset(&dotProduct_L, 0x0, 2*sizeof(float));
// Aux vars
__m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L;
z_E = _mm_setzero_ps();
z_P = _mm_setzero_ps();
z_L = _mm_setzero_ps();
//input and output vectors
//lv_32fc_t* _input_BB = input_BB;
const lv_32fc_t* _input = input;
const lv_32fc_t* _carrier = carrier;
const lv_32fc_t* _E_code = E_code;
const lv_32fc_t* _P_code = P_code;
const lv_32fc_t* _L_code = L_code;
for(;number < halfPoints; number++)
{
// carrier wipe-off (vector point-to-point product)
x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
//_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container
// correlation E,P,L (3x vector scalar product)
// Early
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
x = z;
y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
// Prompt
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
// Late
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
/*pointer increment*/
_carrier += 2;
_input += 2;
//_input_BB += 2;
_E_code += 2;
_P_code += 2;
_L_code +=2;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
//__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB;
_mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
if((num_points % 2) != 0)
{
//_input_BB = (*_input) * (*_carrier);
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
}
*E_out = dotProduct_E;
*P_out = dotProduct_P;
*L_out = dotProduct_L;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points)
{
lv_32fc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H */

View File

@ -0,0 +1,524 @@
/*!
* \file volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5
* \brief Volk protokernel: performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation with 64 bits vectors
* \authors <ul>
* <li>Javier Arribas, 2011. jarribas(at)cttc.es
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* VE, Early, Prompt, Late and VL correlation with 64 bits vectors (32 bits the
* real part and 32 bits the imaginary part):
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 64 bits vectors) It returns the input
* signal in base band (BB)
* - VE values are calculated by multiplying the input signal in BB by the
* VE code (multiplication of 64 bits vectors), accumulating the results
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 64 bits vectors), accumulating the results
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 64 bits vectors), accumulating the results
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 64 bits vectors), accumulating the results
* - VL values are calculated by multiplying the input signal in BB by the
* VL code (multiplication of 64 bits vectors), accumulating the results
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
/*!
* TODO: Code the SSE4 version and benchmark it
*/
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code VE PRN code replica input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param VL_code VL PRN code replica input
\param VE_out VE correlation output
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param VL_out VL correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
lv_32fc_t dotProduct_VE;
memset(&dotProduct_VE, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_E;
memset(&dotProduct_E, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_P;
memset(&dotProduct_P, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_L;
memset(&dotProduct_L, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_VL;
memset(&dotProduct_VL, 0x0, 2*sizeof(float));
// Aux vars
__m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
__m128 bb_signal_sample, bb_signal_sample_shuffled;
z_VE = _mm_setzero_ps();
z_E = _mm_setzero_ps();
z_P = _mm_setzero_ps();
z_L = _mm_setzero_ps();
z_VL = _mm_setzero_ps();
//input and output vectors
const lv_32fc_t* _input = input;
const lv_32fc_t* _carrier = carrier;
const lv_32fc_t* _VE_code = VE_code;
const lv_32fc_t* _E_code = E_code;
const lv_32fc_t* _P_code = P_code;
const lv_32fc_t* _L_code = L_code;
const lv_32fc_t* _VL_code = VL_code;
for(;number < halfPoints; number++)
{
// carrier wipe-off (vector point-to-point product)
x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
// correlation VE,E,P,L,VL (5x vector scalar product)
// VE
y = _mm_loadu_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together
// Early
y = _mm_loadu_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
// Prompt
y = _mm_loadu_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
// Late
y = _mm_loadu_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
// VL
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_loadu_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together
/*pointer increment*/
_carrier += 2;
_input += 2;
_VE_code += 2;
_E_code += 2;
_P_code += 2;
_L_code +=2;
_VL_code +=2;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2];
_mm_storeu_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
_mm_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
_mm_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
_mm_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
_mm_storeu_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
dotProduct_VE += ( dotProductVector_VE[0] + dotProductVector_VE[1] );
dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
dotProduct_VL += ( dotProductVector_VL[0] + dotProductVector_VL[1] );
if((num_points % 2) != 0)
{
dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier);
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier);
}
*VE_out = dotProduct_VE;
*E_out = dotProduct_E;
*P_out = dotProduct_P;
*L_out = dotProduct_L;
*VL_out = dotProduct_VL;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code VE PRN code replica input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param VL_code VL PRN code replica input
\param VE_out VE correlation output
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param VL_out VL correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
{
lv_32fc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
// perform Early, Prompt and Late correlation
for(int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*VE_out += bb_signal_sample * VE_code[i];
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
*VL_out += bb_signal_sample * VL_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code VE PRN code replica input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param VL_code VL PRN code replica input
\param VE_out VE correlation output
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param VL_out VL correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
{
unsigned int number = 0;
const unsigned int halfPoints = num_points / 2;
lv_32fc_t dotProduct_VE;
memset(&dotProduct_VE, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_E;
memset(&dotProduct_E, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_P;
memset(&dotProduct_P, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_L;
memset(&dotProduct_L, 0x0, 2*sizeof(float));
lv_32fc_t dotProduct_VL;
memset(&dotProduct_VL, 0x0, 2*sizeof(float));
// Aux vars
__m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL;
__m128 bb_signal_sample, bb_signal_sample_shuffled;
z_VE = _mm_setzero_ps();
z_E = _mm_setzero_ps();
z_P = _mm_setzero_ps();
z_L = _mm_setzero_ps();
z_VL = _mm_setzero_ps();
//input and output vectors
const lv_32fc_t* _input = input;
const lv_32fc_t* _carrier = carrier;
const lv_32fc_t* _VE_code = VE_code;
const lv_32fc_t* _E_code = E_code;
const lv_32fc_t* _P_code = P_code;
const lv_32fc_t* _L_code = L_code;
const lv_32fc_t* _VL_code = VL_code;
for(;number < halfPoints; number++)
{
// carrier wipe-off (vector point-to-point product)
x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br
// correlation VE,E,P,L,VL (5x vector scalar product)
// VE
y = _mm_load_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together
// Early
y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together
// Prompt
y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together
// Late
y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together
// VL
//x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi
y = _mm_load_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together
/*pointer increment*/
_carrier += 2;
_input += 2;
_VE_code += 2;
_E_code += 2;
_P_code += 2;
_L_code +=2;
_VL_code +=2;
}
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2];
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2];
_mm_store_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector
_mm_store_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector
dotProduct_VE += ( dotProductVector_VE[0] + dotProductVector_VE[1] );
dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] );
dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] );
dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] );
dotProduct_VL += ( dotProductVector_VL[0] + dotProductVector_VL[1] );
if((num_points % 2) != 0)
{
dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier);
dotProduct_E += (*_input) * (*_E_code)*(*_carrier);
dotProduct_P += (*_input) * (*_P_code)*(*_carrier);
dotProduct_L += (*_input) * (*_L_code)*(*_carrier);
dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier);
}
*VE_out = dotProduct_VE;
*E_out = dotProduct_E;
*P_out = dotProduct_P;
*L_out = dotProduct_L;
*VL_out = dotProduct_VL;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation
\param input The input signal input
\param carrier The carrier signal input
\param VE_code VE PRN code replica input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param VL_code VL PRN code replica input
\param VE_out VE correlation output
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param VL_out VL correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points)
{
lv_32fc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*VE_out = 0;
*E_out = 0;
*P_out = 0;
*L_out = 0;
*VL_out = 0;
// perform Early, Prompt and Late correlation
for(int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*VE_out += bb_signal_sample * VE_code[i];
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
*VL_out += bb_signal_sample * VL_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H */

View File

@ -0,0 +1,183 @@
/*!
* \file volk_gnsssdr_8i_accumulator_s8i.h
* \brief Volk protokernel: 8 bits (char) scalar accumulator
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that implements an accumulator of char values
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H
#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <xmmintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const char* inputBuffer, unsigned int num_points){
char returnValue = 0;
const unsigned int sse_iters = num_points / 16;
const char* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(16) char tempBuffer[16];
__m128i accumulator = _mm_setzero_si128();
__m128i aVal = _mm_setzero_si128();
for(unsigned int number = 0; number < sse_iters; number++){
aVal = _mm_lddqu_si128((__m128i*)aPtr);
accumulator = _mm_add_epi8(accumulator, aVal);
aPtr += 16;
}
_mm_storeu_si128((__m128i*)tempBuffer,accumulator);
for(int i = 0; i<16; ++i){
returnValue += tempBuffer[i];
}
for(int i = 0; i<(num_points % 16); ++i){
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const char* inputBuffer, unsigned int num_points){
const char* aPtr = inputBuffer;
char returnValue = 0;
for(unsigned int number = 0;number < num_points; number++){
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H */
#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H
#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <xmmintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const char* inputBuffer, unsigned int num_points){
char returnValue = 0;
const unsigned int sse_iters = num_points / 16;
const char* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(16) char tempBuffer[16];
__m128i accumulator = _mm_setzero_si128();
__m128i aVal = _mm_setzero_si128();
for(unsigned int number = 0; number < sse_iters; number++){
aVal = _mm_load_si128((__m128i*)aPtr);
accumulator = _mm_add_epi8(accumulator, aVal);
aPtr += 16;
}
_mm_store_si128((__m128i*)tempBuffer,accumulator);
for(int i = 0; i<16; ++i){
returnValue += tempBuffer[i];
}
for(int i = 0; i<(num_points % 16); ++i){
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_8i_accumulator_s8i_a_generic(char* result, const char* inputBuffer, unsigned int num_points){
const char* aPtr = inputBuffer;
char returnValue = 0;
for(unsigned int number = 0;number < num_points; number++){
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
extern void volk_gnsssdr_8i_accumulator_s8i_a_orc_impl(short* result, const char* inputBuffer, unsigned int num_points);
static inline void volk_gnsssdr_8i_accumulator_s8i_u_orc(char* result, const char* inputBuffer, unsigned int num_points){
short res = 0;
char* resc = (char*)&res;
resc++;
volk_gnsssdr_8i_accumulator_s8i_a_orc_impl(&res, inputBuffer, num_points);
*result = *resc;
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H */

View File

@ -0,0 +1,493 @@
/*!
* \file volk_gnsssdr_8i_index_max_16u.h
* \brief Volk protokernel: calculates the index of the maximum value in a group of 8 bits (char) scalars
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that returns the index of the maximum value of a group of 8 bits (char) scalars
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H
#define INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_AVX
#include "immintrin.h"
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 32;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
__m256i ones, compareResults, currentValues;
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
ones = _mm256_set1_epi8(0xFF);
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm256_lddqu_si256((__m256i*)inputPtr);
lo = _mm256_castsi256_si128(currentValues);
hi = _mm256_extractf128_si256(currentValues,1);
compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
//compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
if (!_mm256_testc_si256(compareResults, ones))
{
_mm256_storeu_si256((__m256i*)&currentValuesBuffer, currentValues);
for(int i = 0; i < 32; i++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 32;
}
for(int i = 0; i<(num_points % 32); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_AVX*/
#ifdef LV_HAVE_SSE4_1
#include<smmintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_lddqu_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
if (!_mm_test_all_ones(compareResults))
{
_mm_storeu_si128((__m128i*)&currentValuesBuffer, currentValues);
for(int i = 0; i < 16; i++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
for(int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE2
#include<xmmintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_loadu_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
mask = _mm_movemask_epi8(compareResults);
if (mask != 0xFFFF)
{
_mm_storeu_si128((__m128i*)&currentValuesBuffer, currentValues);
mask = ~mask;
int i = 0;
while (mask > 0)
{
if ((mask & 1) == 1)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
i++;
mask >>= 1;
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
for(int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_GENERIC
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0)
{
char max = src0[0];
unsigned int index = 0;
for(unsigned int i = 1; i < num_points; ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_GENERIC*/
#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H*/
#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H
#define INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_AVX
#include "immintrin.h"
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 32;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
__m256i ones, compareResults, currentValues;
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
ones = _mm256_set1_epi8(0xFF);
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm256_load_si256((__m256i*)inputPtr);
lo = _mm256_castsi256_si128(currentValues);
hi = _mm256_extractf128_si256(currentValues,1);
compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
//compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
if (!_mm256_testc_si256(compareResults, ones))
{
_mm256_store_si256((__m256i*)&currentValuesBuffer, currentValues);
for(int i = 0; i < 32; i++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 32;
}
for(int i = 0; i<(num_points % 32); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_AVX*/
#ifdef LV_HAVE_SSE4_1
#include "smmintrin.h"
#include "emmintrin.h"
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_load_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
if (!_mm_test_all_ones(compareResults))
{
_mm_store_si128((__m128i*)&currentValuesBuffer, currentValues);
for(int i = 0; i < 16; i++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
for(int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE2
#include "emmintrin.h"
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_load_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
mask = _mm_movemask_epi8(compareResults);
if (mask != 0xFFFF)
{
_mm_store_si128((__m128i*)&currentValuesBuffer, currentValues);
mask = ~mask;
int i = 0;
while (mask > 0)
{
if ((mask & 1) == 1)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
i++;
mask >>= 1;
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
for(int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_GENERIC
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_generic(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0)
{
char max = src0[0];
unsigned int index = 0;
for(unsigned int i = 1; i < num_points; ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_GENERIC*/
#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H*/

View File

@ -0,0 +1,327 @@
/*!
* \file volk_gnsssdr_8i_max_s8i.h
* \brief Volk protokernel: calculates the maximum value in a group of 8 bits (char) scalars
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that returns the maximum value of a group of 8 bits (char) scalars
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_u_H
#define INCLUDED_volk_gnsssdr_8i_max_s8i_u_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE4_1
#include<smmintrin.h>
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* inputPtr = (char*)src0;
char max = src0[0];
__VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_loadu_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
maxValues = _mm_blendv_epi8(currentValues, maxValues, compareResults);
inputPtr += 16;
}
_mm_storeu_si128((__m128i*)maxValuesBuffer, maxValues);
for(int i = 0; i<16; ++i)
{
if(maxValuesBuffer[i] > max)
{
max = maxValuesBuffer[i];
}
}
for(int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
max = src0[i];
}
}
target = max;
}
}
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE2
#include<xmmintrin.h>
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_loadu_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
mask = _mm_movemask_epi8(compareResults);
if (mask != 0xFFFF)
{
_mm_storeu_si128((__m128i*)&currentValuesBuffer, currentValues);
mask = ~mask;
int i = 0;
while (mask > 0)
{
if ((mask & 1) == 1)
{
if(currentValuesBuffer[i] > max)
{
max = currentValuesBuffer[i];
}
}
i++;
mask >>= 1;
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
for(int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
max = src0[i];
}
}
target = max;
}
}
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_GENERIC
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_generic(char target, const char* src0, unsigned int num_points) {
if(num_points > 0)
{
char max = src0[0];
for(unsigned int i = 1; i < num_points; ++i)
{
if(src0[i] > max)
{
max = src0[i];
}
}
target = max;
}
}
#endif /*LV_HAVE_GENERIC*/
#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_u_H*/
#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_a_H
#define INCLUDED_volk_gnsssdr_8i_max_s8i_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE4_1
#include "smmintrin.h"
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* inputPtr = (char*)src0;
char max = src0[0];
__VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_load_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
maxValues = _mm_blendv_epi8(currentValues, maxValues, compareResults);
inputPtr += 16;
}
_mm_store_si128((__m128i*)maxValuesBuffer, maxValues);
for(int i = 0; i<16; ++i)
{
if(maxValuesBuffer[i] > max)
{
max = maxValuesBuffer[i];
}
}
for(int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
max = src0[i];
}
}
target = max;
}
}
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE2
#include "emmintrin.h"
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_load_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
mask = _mm_movemask_epi8(compareResults);
if (mask != 0xFFFF)
{
_mm_store_si128((__m128i*)&currentValuesBuffer, currentValues);
mask = ~mask;
int i = 0;
while (mask > 0)
{
if ((mask & 1) == 1)
{
if(currentValuesBuffer[i] > max)
{
max = currentValuesBuffer[i];
}
}
i++;
mask >>= 1;
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
for(int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
max = src0[i];
}
}
target = max;
}
}
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_GENERIC
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_a_generic(char target, const char* src0, unsigned int num_points) {
if(num_points > 0)
{
if(num_points > 0)
{
char max = src0[0];
for(unsigned int i = 1; i < num_points; ++i)
{
if(src0[i] > max)
{
max = src0[i];
}
}
target = max;
}
}
}
#endif /*LV_HAVE_GENERIC*/
#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_a_H*/

View File

@ -0,0 +1,184 @@
/*!
* \file volk_gnsssdr_8i_x2_add_8i.h
* \brief Volk protokernel: adds pairs of 8 bits (char) scalars
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that adds pairs of 8 bits (char) scalars
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H
#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE2
#include "pmmintrin.h"
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 16;
char* cPtr = cVector;
const char* aPtr = aVector;
const char* bPtr= bVector;
__m128i aVal, bVal, cVal;
for(int number = 0; number < sse_iters; number++){
aVal = _mm_lddqu_si128((__m128i*)aPtr);
bVal = _mm_lddqu_si128((__m128i*)bPtr);
cVal = _mm_add_epi8(aVal, bVal);
_mm_storeu_si128((__m128i*)cPtr,cVal); // Store the results back into the C container
aPtr += 16;
bPtr += 16;
cPtr += 16;
}
for(int i = 0; i<(num_points % 16); ++i)
{
*cPtr++ = (*aPtr++) + (*bPtr++);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){
char* cPtr = cVector;
const char* aPtr = aVector;
const char* bPtr= bVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*cPtr++ = (*aPtr++) + (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H */
#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H
#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE2
#include "pmmintrin.h"
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 16;
char* cPtr = cVector;
const char* aPtr = aVector;
const char* bPtr= bVector;
__m128i aVal, bVal, cVal;
for(int number = 0; number < sse_iters; number++){
aVal = _mm_load_si128((__m128i*)aPtr);
bVal = _mm_load_si128((__m128i*)bPtr);
cVal = _mm_add_epi8(aVal, bVal);
_mm_store_si128((__m128i*)cPtr,cVal); // Store the results back into the C container
aPtr += 16;
bPtr += 16;
cPtr += 16;
}
for(int i = 0; i<(num_points % 16); ++i)
{
*cPtr++ = (*aPtr++) + (*bPtr++);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_8i_x2_add_8i_a_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){
char* cPtr = cVector;
const char* aPtr = aVector;
const char* bPtr= bVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*cPtr++ = (*aPtr++) + (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
extern void volk_gnsssdr_8i_x2_add_8i_a_orc_impl(char* cVector, const char* aVector, const char* bVector, unsigned int num_points);
static inline void volk_gnsssdr_8i_x2_add_8i_u_orc(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){
volk_gnsssdr_8i_x2_add_8i_a_orc_impl(cVector, aVector, bVector, num_points);
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H */

View File

@ -0,0 +1,326 @@
/*!
* \file volk_gnsssdr_8ic_conjugate_8ic.h
* \brief Volk protokernel: calculates the conjugate of a 16 bits vector
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that calculates the conjugate of a
* 16 bits vector (8 bits the real part and 8 bits the imaginary part)
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H
#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_AVX
#include "immintrin.h"
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 16;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
__m256 tmp;
__m128i tmp128lo, tmp128hi;
__m256 conjugator1 = _mm256_castsi256_ps(_mm256_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255));
__m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
for (int i = 0; i < sse_iters; ++i)
{
tmp = _mm256_loadu_ps((float*)a);
tmp = _mm256_xor_ps(tmp, conjugator1);
tmp128lo = _mm256_castsi256_si128(_mm256_castps_si256(tmp));
tmp128lo = _mm_add_epi8(tmp128lo, conjugator2);
tmp128hi = _mm256_extractf128_si256(_mm256_castps_si256(tmp),1);
tmp128hi = _mm_add_epi8(tmp128hi, conjugator2);
//tmp = _mm256_set_m128i(tmp128hi , tmp128lo); //not defined in some versions of immintrin.h
tmp = _mm256_insertf128_si256(_mm256_castsi128_si256(tmp128lo),(tmp128hi),1);
_mm256_storeu_ps((float*)c, tmp);
a += 16;
c += 16;
}
for (int i = 0; i<(num_points % 16); ++i)
{
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSSE3
#include "tmmintrin.h"
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 8;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
__m128i tmp;
__m128i conjugator = _mm_setr_epi8(1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1);
for (int i = 0; i < sse_iters; ++i)
{
tmp = _mm_lddqu_si128((__m128i*)a);
tmp = _mm_sign_epi8(tmp, conjugator);
_mm_storeu_si128((__m128i*)c, tmp);
a += 8;
c += 8;
}
for (int i = 0; i<(num_points % 8); ++i)
{
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_SSSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 8;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
__m128i tmp;
__m128i conjugator1 = _mm_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
__m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
for (int i = 0; i < sse_iters; ++i)
{
tmp = _mm_lddqu_si128((__m128i*)a);
tmp = _mm_xor_si128(tmp, conjugator1);
tmp = _mm_add_epi8(tmp, conjugator2);
_mm_storeu_si128((__m128i*)c, tmp);
a += 8;
c += 8;
}
for (int i = 0; i<(num_points % 8); ++i)
{
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*cPtr++ = lv_conj(*aPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H */
#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H
#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_AVX
#include "immintrin.h"
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 16;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
__m256 tmp;
__m128i tmp128lo, tmp128hi;
__m256 conjugator1 = _mm256_castsi256_ps(_mm256_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255));
__m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
for (int i = 0; i < sse_iters; ++i)
{
tmp = _mm256_load_ps((float*)a);
tmp = _mm256_xor_ps(tmp, conjugator1);
tmp128lo = _mm256_castsi256_si128(_mm256_castps_si256(tmp));
tmp128lo = _mm_add_epi8(tmp128lo, conjugator2);
tmp128hi = _mm256_extractf128_si256(_mm256_castps_si256(tmp),1);
tmp128hi = _mm_add_epi8(tmp128hi, conjugator2);
//tmp = _mm256_set_m128i(tmp128hi , tmp128lo); //not defined in some versions of immintrin.h
tmp = _mm256_insertf128_si256(_mm256_castsi128_si256(tmp128lo),(tmp128hi),1);
_mm256_store_ps((float*)c, tmp);
a += 16;
c += 16;
}
for (int i = 0; i<(num_points % 16); ++i)
{
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSSE3
#include "tmmintrin.h"
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 8;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
__m128i tmp;
__m128i conjugator = _mm_setr_epi8(1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1);
for (int i = 0; i < sse_iters; ++i)
{
tmp = _mm_load_si128((__m128i*)a);
tmp = _mm_sign_epi8(tmp, conjugator);
_mm_store_si128((__m128i*)c, tmp);
a += 8;
c += 8;
}
for (int i = 0; i<(num_points % 8); ++i)
{
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_SSSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 8;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
__m128i tmp;
__m128i conjugator1 = _mm_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
__m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
for (int i = 0; i < sse_iters; ++i)
{
tmp = _mm_load_si128((__m128i*)a);
tmp = _mm_xor_si128(tmp, conjugator1);
tmp = _mm_add_epi8(tmp, conjugator2);
_mm_store_si128((__m128i*)c, tmp);
a += 8;
c += 8;
}
for (int i = 0; i<(num_points % 8); ++i)
{
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++){
*cPtr++ = lv_conj(*aPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
extern void volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points);
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){
volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl(cVector, aVector, num_points);
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H */

View File

@ -0,0 +1,320 @@
/*!
* \file volk_gnsssdr_8ic_magnitude_squared_8i.h
* \brief Volk protokernel: calculates the magnitude squared of a 16 bits vector
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that calculates the magnitude squared of a
* 16 bits vector (8 bits the real part and 8 bits the imaginary part)
* result = (real*real) + (imag*imag)
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H
#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
#include "tmmintrin.h"
/*!
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 16;
const char* complexVectorPtr = (char*)complexVector;
char* magnitudeVectorPtr = magnitudeVector;
__m128i zero, result8;
__m128i avector, avectorhi, avectorlo, avectorlomult, avectorhimult, aadded, maska;
__m128i bvector, bvectorhi, bvectorlo, bvectorlomult, bvectorhimult, badded, maskb;
zero = _mm_setzero_si128();
maska = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
maskb = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
for(int number = 0;number < sse_iters; number++)
{
avector = _mm_lddqu_si128((__m128i*)complexVectorPtr);
avectorlo = _mm_unpacklo_epi8 (avector, zero);
avectorhi = _mm_unpackhi_epi8 (avector, zero);
avectorlomult = _mm_mullo_epi16 (avectorlo, avectorlo);
avectorhimult = _mm_mullo_epi16 (avectorhi, avectorhi);
aadded = _mm_hadd_epi16 (avectorlomult, avectorhimult);
complexVectorPtr += 16;
bvector = _mm_lddqu_si128((__m128i*)complexVectorPtr);
bvectorlo = _mm_unpacklo_epi8 (bvector, zero);
bvectorhi = _mm_unpackhi_epi8 (bvector, zero);
bvectorlomult = _mm_mullo_epi16 (bvectorlo, bvectorlo);
bvectorhimult = _mm_mullo_epi16 (bvectorhi, bvectorhi);
badded = _mm_hadd_epi16 (bvectorlomult, bvectorhimult);
complexVectorPtr += 16;
result8 = _mm_or_si128(_mm_shuffle_epi8(aadded, maska), _mm_shuffle_epi8(badded, maskb));
_mm_storeu_si128((__m128i*)magnitudeVectorPtr, result8);
magnitudeVectorPtr += 16;
}
for (int i = 0; i<(num_points % 16); ++i)
{
const char valReal = *complexVectorPtr++;
const char valImag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (valReal * valReal) + (valImag * valImag);
}
}
#endif /* LV_HAVE_SSE3 */
//#ifdef LV_HAVE_SSE
//#include <xmmintrin.h>
///*!
// \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
// \param complexVector The vector containing the complex input values
// \param magnitudeVector The vector containing the real output values
// \param num_points The number of complex values in complexVector to be calculated and stored into cVector
// */
//static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
// unsigned int number = 0;
// const unsigned int quarterPoints = num_points / 4;
//
// const float* complexVectorPtr = (float*)complexVector;
// float* magnitudeVectorPtr = magnitudeVector;
//
// __m128 cplxValue1, cplxValue2, iValue, qValue, result;
// for(;number < quarterPoints; number++){
// cplxValue1 = _mm_loadu_ps(complexVectorPtr);
// complexVectorPtr += 4;
//
// cplxValue2 = _mm_loadu_ps(complexVectorPtr);
// complexVectorPtr += 4;
//
// // Arrange in i1i2i3i4 format
// iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
// // Arrange in q1q2q3q4 format
// qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
//
// iValue = _mm_mul_ps(iValue, iValue); // Square the I values
// qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
//
// result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
//
// _mm_storeu_ps(magnitudeVectorPtr, result);
// magnitudeVectorPtr += 4;
// }
//
// number = quarterPoints * 4;
// for(; number < num_points; number++){
// float val1Real = *complexVectorPtr++;
// float val1Imag = *complexVectorPtr++;
// *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
// }
//}
//#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){
const char* complexVectorPtr = (char*)complexVector;
char* magnitudeVectorPtr = magnitudeVector;
for(int number = 0; number < num_points; number++){
const char real = *complexVectorPtr++;
const char imag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (real*real) + (imag*imag);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */
#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H
#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 16;
const char* complexVectorPtr = (char*)complexVector;
char* magnitudeVectorPtr = magnitudeVector;
__m128i zero, result8;
__m128i avector, avectorhi, avectorlo, avectorlomult, avectorhimult, aadded, maska;
__m128i bvector, bvectorhi, bvectorlo, bvectorlomult, bvectorhimult, badded, maskb;
zero = _mm_setzero_si128();
maska = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
maskb = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
for(int number = 0;number < sse_iters; number++)
{
avector = _mm_load_si128((__m128i*)complexVectorPtr);
avectorlo = _mm_unpacklo_epi8 (avector, zero);
avectorhi = _mm_unpackhi_epi8 (avector, zero);
avectorlomult = _mm_mullo_epi16 (avectorlo, avectorlo);
avectorhimult = _mm_mullo_epi16 (avectorhi, avectorhi);
aadded = _mm_hadd_epi16 (avectorlomult, avectorhimult);
complexVectorPtr += 16;
bvector = _mm_load_si128((__m128i*)complexVectorPtr);
bvectorlo = _mm_unpacklo_epi8 (bvector, zero);
bvectorhi = _mm_unpackhi_epi8 (bvector, zero);
bvectorlomult = _mm_mullo_epi16 (bvectorlo, bvectorlo);
bvectorhimult = _mm_mullo_epi16 (bvectorhi, bvectorhi);
badded = _mm_hadd_epi16 (bvectorlomult, bvectorhimult);
complexVectorPtr += 16;
result8 = _mm_or_si128(_mm_shuffle_epi8(aadded, maska), _mm_shuffle_epi8(badded, maskb));
_mm_store_si128((__m128i*)magnitudeVectorPtr, result8);
magnitudeVectorPtr += 16;
}
for (int i = 0; i<(num_points % 16); ++i)
{
const char valReal = *complexVectorPtr++;
const char valImag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (valReal * valReal) + (valImag * valImag);
}
}
#endif /* LV_HAVE_SSE3 */
//#ifdef LV_HAVE_SSE
//#include <xmmintrin.h>
///*!
// \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
// \param complexVector The vector containing the complex input values
// \param magnitudeVector The vector containing the real output values
// \param num_points The number of complex values in complexVector to be calculated and stored into cVector
// */
//static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
// unsigned int number = 0;
// const unsigned int quarterPoints = num_points / 4;
//
// const float* complexVectorPtr = (float*)complexVector;
// float* magnitudeVectorPtr = magnitudeVector;
//
// __m128 cplxValue1, cplxValue2, iValue, qValue, result;
// for(;number < quarterPoints; number++){
// cplxValue1 = _mm_load_ps(complexVectorPtr);
// complexVectorPtr += 4;
//
// cplxValue2 = _mm_load_ps(complexVectorPtr);
// complexVectorPtr += 4;
//
// // Arrange in i1i2i3i4 format
// iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
// // Arrange in q1q2q3q4 format
// qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
//
// iValue = _mm_mul_ps(iValue, iValue); // Square the I values
// qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
//
// result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
//
// _mm_store_ps(magnitudeVectorPtr, result);
// magnitudeVectorPtr += 4;
// }
//
// number = quarterPoints * 4;
// for(; number < num_points; number++){
// float val1Real = *complexVectorPtr++;
// float val1Imag = *complexVectorPtr++;
// *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
// }
//}
//#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){
const char* complexVectorPtr = (char*)complexVector;
char* magnitudeVectorPtr = magnitudeVector;
for(int number = 0; number < num_points; number++){
const char real = *complexVectorPtr++;
const char imag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (real*real) + (imag*imag);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
extern void volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points);
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_orc(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){
volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl(magnitudeVector, complexVector, num_points);
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */

View File

@ -0,0 +1,271 @@
/*!
* \file volk_gnsssdr_8ic_s8ic_multiply_8ic.h
* \brief Volk protokernel: multiplies a group of 16 bits vectors by one constant vector
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that multiplies a group of 16 bits vectors
* (8 bits the real part and 8 bits the imaginary part) by one constant vector
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H
#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
*/
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){
const unsigned int sse_iters = num_points / 8;
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
y = _mm_set1_epi16 (*(short*)&scalar);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
for(int number = 0;number < sse_iters; number++){
x = _mm_lddqu_si128((__m128i*)a);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128 (realc, mult1);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128 (imagc, mult1);
imagc = _mm_slli_si128 (imagc, 1);
totalc = _mm_or_si128 (realc, imagc);
_mm_storeu_si128((__m128i*)c, totalc);
a += 8;
c += 8;
}
for (int i = 0; i<(num_points % 8); ++i)
{
*c++ = (*a++) * scalar;
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
*/
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){
/*lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
for (int i = 0; i<num_points; ++i)
{
*cPtr++ = (*aPtr++) * scalar;
}*/
lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
unsigned int number = num_points;
// unwrap loop
while (number >= 8){
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
number -= 8;
}
// clean up any remaining
while (number-- > 0)
*cPtr++ = *aPtr++ * scalar;
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H
#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
*/
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){
const unsigned int sse_iters = num_points / 8;
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
y = _mm_set1_epi16 (*(short*)&scalar);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
for(int number = 0;number < sse_iters; number++){
x = _mm_load_si128((__m128i*)a);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128 (realc, mult1);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128 (imagc, mult1);
imagc = _mm_slli_si128 (imagc, 1);
totalc = _mm_or_si128 (realc, imagc);
_mm_store_si128((__m128i*)c, totalc);
a += 8;
c += 8;
}
for (int i = 0; i<(num_points % 8); ++i)
{
*c++ = (*a++) * scalar;
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
*/
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){
/*lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
for (int i = 0; i<num_points; ++i)
{
*cPtr++ = (*aPtr++) * scalar;
}*/
lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
unsigned int number = num_points;
// unwrap loop
while (number >= 8){
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
number -= 8;
}
// clean up any remaining
while (number-- > 0)
*cPtr++ = *aPtr++ * scalar;
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
*/
extern void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const char scalarreal, const char scalarimag, unsigned int num_points);
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){
volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl(cVector, aVector, lv_creal(scalar), lv_cimag(scalar), num_points);
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */

View File

@ -0,0 +1,499 @@
/*!
* \file volk_gnsssdr_8ic_x2_dot_prod_8ic.h
* \brief Volk protokernel: multiplies two 16 bits vectors and accumulates them
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
* and 8 bits the imaginary part) and accumulates them
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H
#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <stdio.h>
#include <string.h>
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
/*lv_8sc_t* cPtr = result;
const lv_8sc_t* aPtr = input;
const lv_8sc_t* bPtr = taps;
for(int number = 0; number < num_points; number++){
*cPtr += (*aPtr++) * (*bPtr++);
}*/
char * res = (char*) result;
char * in = (char*) input;
char * tp = (char*) taps;
unsigned int n_2_ccomplex_blocks = num_points/2;
unsigned int isodd = num_points & 1;
char sum0[2] = {0,0};
char sum1[2] = {0,0};
unsigned int i = 0;
for(i = 0; i < n_2_ccomplex_blocks; ++i) {
sum0[0] += in[0] * tp[0] - in[1] * tp[1];
sum0[1] += in[0] * tp[1] + in[1] * tp[0];
sum1[0] += in[2] * tp[2] - in[3] * tp[3];
sum1[1] += in[2] * tp[3] + in[3] * tp[2];
in += 4;
tp += 4;
}
res[0] = sum0[0] + sum1[0];
res[1] = sum0[1] + sum1[1];
// Cleanup if we had an odd number of points
for(i = 0; i < isodd; ++i) {
*result += input[num_points - 1] * taps[num_points - 1];
}
}
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_SSE2
#include "emmintrin.h"
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
lv_8sc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(char));
const lv_8sc_t* a = input;
const lv_8sc_t* b = taps;
const unsigned int sse_iters = num_points/8;
if (sse_iters>0)
{
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
realcacc = _mm_setzero_si128();
imagcacc = _mm_setzero_si128();
for(int number = 0; number < sse_iters; number++){
x = _mm_lddqu_si128((__m128i*)a);
y = _mm_lddqu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
realcacc = _mm_add_epi16 (realcacc, realc);
imagcacc = _mm_add_epi16 (imagcacc, imagc);
a += 8;
b += 8;
}
realcacc = _mm_and_si128 (realcacc, mult1);
imagcacc = _mm_and_si128 (imagcacc, mult1);
imagcacc = _mm_slli_si128 (imagcacc, 1);
totalc = _mm_or_si128 (realcacc, imagcacc);
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
_mm_storeu_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
for (int i = 0; i<8; ++i)
{
dotProduct += dotProductVector[i];
}
}
for (int i = 0; i<(num_points % 8); ++i)
{
dotProduct += (*a++) * (*b++);
}
*result = dotProduct;
}
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_SSE4_1
#include "smmintrin.h"
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
lv_8sc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(char));
const lv_8sc_t* a = input;
const lv_8sc_t* b = taps;
const unsigned int sse_iters = num_points/8;
if (sse_iters>0)
{
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
realcacc = _mm_setzero_si128();
imagcacc = _mm_setzero_si128();
for(int number = 0; number < sse_iters; number++){
x = _mm_lddqu_si128((__m128i*)a);
y = _mm_lddqu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
realcacc = _mm_add_epi16 (realcacc, realc);
imagcacc = _mm_add_epi16 (imagcacc, imagc);
a += 8;
b += 8;
}
imagcacc = _mm_slli_si128 (imagcacc, 1);
totalc = _mm_blendv_epi8 (imagcacc, realcacc, mult1);
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
_mm_storeu_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
for (int i = 0; i<8; ++i)
{
dotProduct += dotProductVector[i];
}
}
for (int i = 0; i<(num_points % 8); ++i)
{
dotProduct += (*a++) * (*b++);
}
*result = dotProduct;
}
#endif /*LV_HAVE_SSE4_1*/
#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H*/
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H
#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <stdio.h>
#include <string.h>
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
/*lv_8sc_t* cPtr = result;
const lv_8sc_t* aPtr = input;
const lv_8sc_t* bPtr = taps;
for(int number = 0; number < num_points; number++){
*cPtr += (*aPtr++) * (*bPtr++);
}*/
char * res = (char*) result;
char * in = (char*) input;
char * tp = (char*) taps;
unsigned int n_2_ccomplex_blocks = num_points/2;
unsigned int isodd = num_points & 1;
char sum0[2] = {0,0};
char sum1[2] = {0,0};
unsigned int i = 0;
for(i = 0; i < n_2_ccomplex_blocks; ++i) {
sum0[0] += in[0] * tp[0] - in[1] * tp[1];
sum0[1] += in[0] * tp[1] + in[1] * tp[0];
sum1[0] += in[2] * tp[2] - in[3] * tp[3];
sum1[1] += in[2] * tp[3] + in[3] * tp[2];
in += 4;
tp += 4;
}
res[0] = sum0[0] + sum1[0];
res[1] = sum0[1] + sum1[1];
// Cleanup if we had an odd number of points
for(i = 0; i < isodd; ++i) {
*result += input[num_points - 1] * taps[num_points - 1];
}
}
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_SSE2
#include "emmintrin.h"
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
lv_8sc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(char));
const lv_8sc_t* a = input;
const lv_8sc_t* b = taps;
const unsigned int sse_iters = num_points/8;
if (sse_iters>0)
{
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
realcacc = _mm_setzero_si128();
imagcacc = _mm_setzero_si128();
for(int number = 0; number < sse_iters; number++){
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
realcacc = _mm_add_epi16 (realcacc, realc);
imagcacc = _mm_add_epi16 (imagcacc, imagc);
a += 8;
b += 8;
}
realcacc = _mm_and_si128 (realcacc, mult1);
imagcacc = _mm_and_si128 (imagcacc, mult1);
imagcacc = _mm_slli_si128 (imagcacc, 1);
totalc = _mm_or_si128 (realcacc, imagcacc);
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
_mm_store_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
for (int i = 0; i<8; ++i)
{
dotProduct += dotProductVector[i];
}
}
for (int i = 0; i<(num_points % 8); ++i)
{
dotProduct += (*a++) * (*b++);
}
*result = dotProduct;
}
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_SSE4_1
#include "smmintrin.h"
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) {
lv_8sc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(char));
const lv_8sc_t* a = input;
const lv_8sc_t* b = taps;
const unsigned int sse_iters = num_points/8;
if (sse_iters>0)
{
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
realcacc = _mm_setzero_si128();
imagcacc = _mm_setzero_si128();
for(int number = 0; number < sse_iters; number++){
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
realcacc = _mm_add_epi16 (realcacc, realc);
imagcacc = _mm_add_epi16 (imagcacc, imagc);
a += 8;
b += 8;
}
imagcacc = _mm_slli_si128 (imagcacc, 1);
totalc = _mm_blendv_epi8 (imagcacc, realcacc, mult1);
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
_mm_store_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
for (int i = 0; i<8; ++i)
{
dotProduct += dotProductVector[i];
}
}
for (int i = 0; i<(num_points % 8); ++i)
{
dotProduct += (*a++) * (*b++);
}
*result = dotProduct;
}
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_ORC
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points);
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points){
short resReal = 0;
char* resRealChar = (char*)&resReal;
resRealChar++;
short resImag = 0;
char* resImagChar = (char*)&resImag;
resImagChar++;
volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(&resReal, &resImag, input, taps, num_points);
*result = lv_cmake(*resRealChar, *resImagChar);
}
#endif /* LV_HAVE_ORC */
#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H*/

View File

@ -0,0 +1,346 @@
/*!
* \file volk_gnsssdr_8ic_x2_multiply_8ic.h
* \brief Volk protokernel: multiplies two 16 bits vectors
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
* and 8 bits the imaginary part)
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H
#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_SSE2
#include "emmintrin.h"
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 8;
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
const lv_8sc_t* b = bVector;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
for(int number = 0;number < sse_iters; number++){
x = _mm_lddqu_si128((__m128i*)a);
y = _mm_lddqu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128 (realc, mult1);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128 (imagc, mult1);
imagc = _mm_slli_si128 (imagc, 1);
totalc = _mm_or_si128 (realc, imagc);
_mm_storeu_si128((__m128i*)c, totalc);
a += 8;
b += 8;
c += 8;
}
for (int i = 0; i<(num_points % 8); ++i)
{
*c++ = (*a++) * (*b++);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE4_1
#include "smmintrin.h"
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 8;
__m128i x, y, zero;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
const lv_8sc_t* b = bVector;
zero = _mm_setzero_si128();
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
for(int number = 0;number < sse_iters; number++){
x = _mm_lddqu_si128((__m128i*)a);
y = _mm_lddqu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_slli_si128 (imagc, 1);
totalc = _mm_blendv_epi8 (imagc, realc, mult1);
_mm_storeu_si128((__m128i*)c, totalc);
a += 8;
b += 8;
c += 8;
}
for (int i = 0; i<(num_points % 8); ++i)
{
*c++ = (*a++) * (*b++);
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
const lv_8sc_t* bPtr = bVector;
for(int number = 0; number < num_points; number++){
*cPtr++ = (*aPtr++) * (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H */
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H
#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_SSE2
#include "emmintrin.h"
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 8;
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
const lv_8sc_t* b = bVector;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
for(int number = 0;number < sse_iters; number++){
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128 (realc, mult1);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128 (imagc, mult1);
imagc = _mm_slli_si128 (imagc, 1);
totalc = _mm_or_si128 (realc, imagc);
_mm_store_si128((__m128i*)c, totalc);
a += 8;
b += 8;
c += 8;
}
for (int i = 0; i<(num_points % 8); ++i)
{
*c++ = (*a++) * (*b++);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE4_1
#include "smmintrin.h"
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
const unsigned int sse_iters = num_points / 8;
__m128i x, y, zero;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc;
lv_8sc_t* c = cVector;
const lv_8sc_t* a = aVector;
const lv_8sc_t* b = bVector;
zero = _mm_setzero_si128();
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
for(int number = 0;number < sse_iters; number++){
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_slli_si128 (imagc, 1);
totalc = _mm_blendv_epi8 (imagc, realc, mult1);
_mm_store_si128((__m128i*)c, totalc);
a += 8;
b += 8;
c += 8;
}
for (int i = 0; i<(num_points % 8); ++i)
{
*c++ = (*a++) * (*b++);
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
const lv_8sc_t* bPtr = bVector;
for(int number = 0; number < num_points; number++){
*cPtr++ = (*aPtr++) * (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points);
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){
volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(cVector, aVector, bVector, num_points);
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H */

View File

@ -0,0 +1,882 @@
/*!
* \file volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3.h
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
* real part and 8 bits the imaginary part):
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 16 bits vectors) It returns the input
* signal in base band (BB)
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 16 bits vectors), accumulating the results
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 16 bits vectors), accumulating the results
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 16 bits vectors), accumulating the results
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include "smmintrin.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_8sc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_8sc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_8sc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_si128();
imag_E_code_acc = _mm_setzero_si128();
real_L_code_acc = _mm_setzero_si128();
imag_L_code_acc = _mm_setzero_si128();
real_P_code_acc = _mm_setzero_si128();
imag_P_code_acc = _mm_setzero_si128();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++){
//Perform the carrier wipe-off
x = _mm_lddqu_si128((__m128i*)input_ptr);
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//Get early values
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//Get late values
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//Get prompt values
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
L_code_ptr += 8;
P_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_16sc_t E_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_16sc_t L_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_16sc_t P_dotProductVector[8];
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1);
_mm_storeu_si128((__m128i*)E_dotProductVector, output);
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1);
_mm_storeu_si128((__m128i*)L_dotProductVector, output);
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1);
_mm_storeu_si128((__m128i*)P_dotProductVector, output);
for (int i = 0; i<8; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
}
}
#endif /* LV_HAVE_SSE4_1 */
//#ifdef LV_HAVE_SSE2
//#include "emmintrin.h"
///*!
// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
// \param input The input signal input
// \param carrier The carrier signal input
// \param E_code Early PRN code replica input
// \param P_code Early PRN code replica input
// \param L_code Early PRN code replica input
// \param E_out Early correlation output
// \param P_out Early correlation output
// \param L_out Early correlation output
// \param num_points The number of complex values in vectors
// */
//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
//{
// const unsigned int sse_iters = num_points / 8;
//
// __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
//
// const lv_8sc_t* input_ptr = input;
// const lv_8sc_t* carrier_ptr = carrier;
//
// const lv_8sc_t* E_code_ptr = E_code;
// lv_8sc_t* E_out_ptr = E_out;
// const lv_8sc_t* L_code_ptr = L_code;
// lv_8sc_t* L_out_ptr = L_out;
// const lv_8sc_t* P_code_ptr = P_code;
// lv_8sc_t* P_out_ptr = P_out;
//
// *E_out_ptr = 0;
// *P_out_ptr = 0;
// *L_out_ptr = 0;
//
// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
//
// real_E_code_acc = _mm_setzero_si128();
// imag_E_code_acc = _mm_setzero_si128();
// real_L_code_acc = _mm_setzero_si128();
// imag_L_code_acc = _mm_setzero_si128();
// real_P_code_acc = _mm_setzero_si128();
// imag_P_code_acc = _mm_setzero_si128();
//
// if (sse_iters>0)
// {
// for(int number = 0;number < sse_iters; number++){
//
// //Perform the carrier wipe-off
// x = _mm_lddqu_si128((__m128i*)input_ptr);
// y = _mm_lddqu_si128((__m128i*)carrier_ptr);
//
// imagx = _mm_srli_si128 (x, 1);
// imagx = _mm_and_si128 (imagx, mult1);
// realx = _mm_and_si128 (x, mult1);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (realx, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
//
// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// //Get early values
// y = _mm_lddqu_si128((__m128i*)E_code_ptr);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
//
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
// imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//
// //Get late values
// y = _mm_lddqu_si128((__m128i*)L_code_ptr);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
//
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
// imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//
// //Get prompt values
// y = _mm_lddqu_si128((__m128i*)P_code_ptr);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
//
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
// imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
//
// input_ptr += 8;
// carrier_ptr += 8;
// E_code_ptr += 8;
// L_code_ptr += 8;
// P_code_ptr += 8;
// }
//
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
//
// real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1);
// imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1);
// imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
// output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc);
// _mm_storeu_si128((__m128i*)E_dotProductVector, output);
//
// real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1);
// imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1);
// imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
// output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc);
// _mm_storeu_si128((__m128i*)L_dotProductVector, output);
//
// real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1);
// imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1);
// imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
// output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc);
// _mm_storeu_si128((__m128i*)P_dotProductVector, output);
//
// for (int i = 0; i<8; ++i)
// {
// *E_out_ptr += E_dotProductVector[i];
// *L_out_ptr += L_dotProductVector[i];
// *P_out_ptr += P_dotProductVector[i];
// }
// }
//
// lv_8sc_t bb_signal_sample;
// for(int i=0; i < num_points%8; ++i)
// {
// //Perform the carrier wipe-off
// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// // Now get early, late, and prompt values for each
// *E_out_ptr += bb_signal_sample * (*E_code_ptr++);
// *P_out_ptr += bb_signal_sample * (*P_code_ptr++);
// *L_out_ptr += bb_signal_sample * (*L_code_ptr++);
// }
//}
//
//#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
lv_8sc_t bb_signal_sample;
lv_16sc_t tmp1;
lv_16sc_t tmp2;
lv_16sc_t tmp3;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
tmp1 = bb_signal_sample * E_code[i];
tmp2 = bb_signal_sample * P_code[i];
tmp3 = bb_signal_sample * L_code[i];
// Now get early, late, and prompt values for each
*E_out += tmp1;
*P_out += tmp2;
*L_out += tmp3;
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H */
//#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H
//#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H
//
//#include <inttypes.h>
//#include <stdio.h>
//#include <volk_gnsssdr/volk_gnsssdr_complex.h>
//#include <float.h>
//#include <string.h>
//
//#ifdef LV_HAVE_SSE4_1
//#include "smmintrin.h"
///*!
// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
// \param input The input signal input
// \param carrier The carrier signal input
// \param E_code Early PRN code replica input
// \param P_code Early PRN code replica input
// \param L_code Early PRN code replica input
// \param E_out Early correlation output
// \param P_out Early correlation output
// \param L_out Early correlation output
// \param num_points The number of complex values in vectors
// */
//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
//{
// const unsigned int sse_iters = num_points / 8;
//
// __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
//
// const lv_8sc_t* input_ptr = input;
// const lv_8sc_t* carrier_ptr = carrier;
//
// const lv_8sc_t* E_code_ptr = E_code;
// lv_8sc_t* E_out_ptr = E_out;
// const lv_8sc_t* L_code_ptr = L_code;
// lv_8sc_t* L_out_ptr = L_out;
// const lv_8sc_t* P_code_ptr = P_code;
// lv_8sc_t* P_out_ptr = P_out;
//
// *E_out_ptr = 0;
// *P_out_ptr = 0;
// *L_out_ptr = 0;
//
// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
//
// real_E_code_acc = _mm_setzero_si128();
// imag_E_code_acc = _mm_setzero_si128();
// real_L_code_acc = _mm_setzero_si128();
// imag_L_code_acc = _mm_setzero_si128();
// real_P_code_acc = _mm_setzero_si128();
// imag_P_code_acc = _mm_setzero_si128();
//
// if (sse_iters>0)
// {
// for(int number = 0;number < sse_iters; number++){
//
// //Perform the carrier wipe-off
// x = _mm_load_si128((__m128i*)input_ptr);
// y = _mm_load_si128((__m128i*)carrier_ptr);
//
// imagx = _mm_srli_si128 (x, 1);
// imagx = _mm_and_si128 (imagx, mult1);
// realx = _mm_and_si128 (x, mult1);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (realx, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
//
// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// //Get early values
// y = _mm_load_si128((__m128i*)E_code_ptr);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
//
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
// imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//
// //Get late values
// y = _mm_load_si128((__m128i*)L_code_ptr);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
//
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
// imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//
// //Get prompt values
// y = _mm_load_si128((__m128i*)P_code_ptr);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
//
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
// imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
//
// input_ptr += 8;
// carrier_ptr += 8;
// E_code_ptr += 8;
// L_code_ptr += 8;
// P_code_ptr += 8;
// }
//
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
//
// imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
// output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1);
// _mm_store_si128((__m128i*)E_dotProductVector, output);
//
// imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
// output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1);
// _mm_store_si128((__m128i*)L_dotProductVector, output);
//
// imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
// output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1);
// _mm_store_si128((__m128i*)P_dotProductVector, output);
//
// for (int i = 0; i<8; ++i)
// {
// *E_out_ptr += E_dotProductVector[i];
// *L_out_ptr += L_dotProductVector[i];
// *P_out_ptr += P_dotProductVector[i];
// }
// }
//
// lv_8sc_t bb_signal_sample;
// for(int i=0; i < num_points%8; ++i)
// {
// //Perform the carrier wipe-off
// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// // Now get early, late, and prompt values for each
// *E_out_ptr += bb_signal_sample * (*E_code_ptr++);
// *P_out_ptr += bb_signal_sample * (*P_code_ptr++);
// *L_out_ptr += bb_signal_sample * (*L_code_ptr++);
// }
//}
//
//#endif /* LV_HAVE_SSE4_1 */
//
//#ifdef LV_HAVE_SSE2
//#include "emmintrin.h"
///*!
// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
// \param input The input signal input
// \param carrier The carrier signal input
// \param E_code Early PRN code replica input
// \param P_code Early PRN code replica input
// \param L_code Early PRN code replica input
// \param E_out Early correlation output
// \param P_out Early correlation output
// \param L_out Early correlation output
// \param num_points The number of complex values in vectors
// */
//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
//{
// const unsigned int sse_iters = num_points / 8;
//
// __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
//
// const lv_8sc_t* input_ptr = input;
// const lv_8sc_t* carrier_ptr = carrier;
//
// const lv_8sc_t* E_code_ptr = E_code;
// lv_8sc_t* E_out_ptr = E_out;
// const lv_8sc_t* L_code_ptr = L_code;
// lv_8sc_t* L_out_ptr = L_out;
// const lv_8sc_t* P_code_ptr = P_code;
// lv_8sc_t* P_out_ptr = P_out;
//
// *E_out_ptr = 0;
// *P_out_ptr = 0;
// *L_out_ptr = 0;
//
// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
//
// real_E_code_acc = _mm_setzero_si128();
// imag_E_code_acc = _mm_setzero_si128();
// real_L_code_acc = _mm_setzero_si128();
// imag_L_code_acc = _mm_setzero_si128();
// real_P_code_acc = _mm_setzero_si128();
// imag_P_code_acc = _mm_setzero_si128();
//
// if (sse_iters>0)
// {
// for(int number = 0;number < sse_iters; number++){
//
// //Perform the carrier wipe-off
// x = _mm_load_si128((__m128i*)input_ptr);
// y = _mm_load_si128((__m128i*)carrier_ptr);
//
// imagx = _mm_srli_si128 (x, 1);
// imagx = _mm_and_si128 (imagx, mult1);
// realx = _mm_and_si128 (x, mult1);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (realx, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
//
// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// //Get early values
// y = _mm_load_si128((__m128i*)E_code_ptr);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
//
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
// imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//
// //Get late values
// y = _mm_load_si128((__m128i*)L_code_ptr);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
//
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
// imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//
// //Get prompt values
// y = _mm_load_si128((__m128i*)P_code_ptr);
//
// imagy = _mm_srli_si128 (y, 1);
// imagy = _mm_and_si128 (imagy, mult1);
// realy = _mm_and_si128 (y, mult1);
//
// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
//
// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//
// real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
// imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
//
// input_ptr += 8;
// carrier_ptr += 8;
// E_code_ptr += 8;
// L_code_ptr += 8;
// P_code_ptr += 8;
// }
//
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
// __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
//
// real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1);
// imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1);
// imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
// output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc);
// _mm_store_si128((__m128i*)E_dotProductVector, output);
//
// real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1);
// imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1);
// imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
// output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc);
// _mm_store_si128((__m128i*)L_dotProductVector, output);
//
// real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1);
// imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1);
// imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
// output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc);
// _mm_store_si128((__m128i*)P_dotProductVector, output);
//
// for (int i = 0; i<8; ++i)
// {
// *E_out_ptr += E_dotProductVector[i];
// *L_out_ptr += L_dotProductVector[i];
// *P_out_ptr += P_dotProductVector[i];
// }
// }
//
// lv_8sc_t bb_signal_sample;
// for(int i=0; i < num_points%8; ++i)
// {
// //Perform the carrier wipe-off
// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// // Now get early, late, and prompt values for each
// *E_out_ptr += bb_signal_sample * (*E_code_ptr++);
// *P_out_ptr += bb_signal_sample * (*P_code_ptr++);
// *L_out_ptr += bb_signal_sample * (*L_code_ptr++);
// }
//}
//
//#endif /* LV_HAVE_SSE2 */
//
//#ifdef LV_HAVE_GENERIC
///*!
// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
// \param input The input signal input
// \param carrier The carrier signal input
// \param E_code Early PRN code replica input
// \param P_code Early PRN code replica input
// \param L_code Early PRN code replica input
// \param E_out Early correlation output
// \param P_out Early correlation output
// \param L_out Early correlation output
// \param num_points The number of complex values in vectors
// */
//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
//{
// lv_8sc_t bb_signal_sample;
//
// bb_signal_sample = lv_cmake(0, 0);
//
// *E_out = 0;
// *P_out = 0;
// *L_out = 0;
// // perform Early, Prompt and Late correlation
// for(int i=0; i < num_points; ++i)
// {
// //Perform the carrier wipe-off
// bb_signal_sample = input[i] * carrier[i];
// // Now get early, late, and prompt values for each
// *E_out += bb_signal_sample * E_code[i];
// *P_out += bb_signal_sample * P_code[i];
// *L_out += bb_signal_sample * L_code[i];
// }
//}
//
//#endif /* LV_HAVE_GENERIC */
//
//#ifdef LV_HAVE_ORC
///*!
// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
// \param input The input signal input
// \param carrier The carrier signal input
// \param E_code Early PRN code replica input
// \param P_code Early PRN code replica input
// \param L_code Early PRN code replica input
// \param E_out Early correlation output
// \param P_out Early correlation output
// \param L_out Early correlation output
// \param num_points The number of complex values in vectors
// */
//
//extern void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_first_a_orc_impl(short* E_out_real, short* E_out_imag, short* P_out_real, short* P_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, unsigned int num_points);
//extern void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_second_a_orc_impl(short* L_out_real, short* L_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* L_code, unsigned int num_points);
//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_orc(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points){
//
// short E_out_real = 0;
// short E_out_imag = 0;
// char* E_out_real_c = (char*)&E_out_real;
// E_out_real_c++;
// char* E_out_imag_c = (char*)&E_out_imag;
// E_out_imag_c++;
//
// short P_out_real = 0;
// short P_out_imag = 0;
// char* P_out_real_c = (char*)&P_out_real;
// P_out_real_c++;
// char* P_out_imag_c = (char*)&P_out_imag;
// P_out_imag_c++;
//
// short L_out_real = 0;
// short L_out_imag = 0;
// char* L_out_real_c = (char*)&L_out_real;
// L_out_real_c++;
// char* L_out_imag_c = (char*)&L_out_imag;
// L_out_imag_c++;
//
// volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_first_a_orc_impl( &E_out_real, &E_out_imag, &P_out_real, &P_out_imag, input, carrier, E_code, P_code, num_points);
// volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_second_a_orc_impl( &L_out_real, &L_out_imag, input, carrier, L_code, num_points);
//
// //ORC implementation of 8ic_x5_cw_epl_corr_32fc_x3 is done in two different functions because it seems that
// //in one function the length of the code gives memory problems (bad access, segmentation fault).
// //Also, the maximum number of accumulators that can be used is 4 (and we need 6).
// //The "carrier wipe-off" step is done two times: one in the first function and another one in the second.
// //Joining all the ORC code in one function would be quicker because the "carrier wipe-off" step would be done just
// //one time.
//
// *E_out = lv_cmake(*E_out_real_c, *E_out_imag_c);
// *P_out = lv_cmake(*P_out_real_c, *P_out_imag_c);
// *L_out = lv_cmake(*L_out_real_c, *L_out_imag_c);
//}
//#endif /* LV_HAVE_ORC */
//
//#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H */

View File

@ -0,0 +1,874 @@
/*!
* \file volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.h
* \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that performs the carrier wipe-off mixing and the
* Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
* real part and 8 bits the imaginary part):
* - The carrier wipe-off is done by multiplying the input signal by the
* carrier (multiplication of 16 bits vectors) It returns the input
* signal in base band (BB)
* - Early values are calculated by multiplying the input signal in BB by the
* early code (multiplication of 16 bits vectors), accumulating the results
* - Prompt values are calculated by multiplying the input signal in BB by the
* prompt code (multiplication of 16 bits vectors), accumulating the results
* - Late values are calculated by multiplying the input signal in BB by the
* late code (multiplication of 16 bits vectors), accumulating the results
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include "smmintrin.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_8sc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_8sc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_8sc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_si128();
imag_E_code_acc = _mm_setzero_si128();
real_L_code_acc = _mm_setzero_si128();
imag_L_code_acc = _mm_setzero_si128();
real_P_code_acc = _mm_setzero_si128();
imag_P_code_acc = _mm_setzero_si128();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++){
//Perform the carrier wipe-off
x = _mm_lddqu_si128((__m128i*)input_ptr);
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//Get early values
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//Get late values
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//Get prompt values
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
L_code_ptr += 8;
P_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1);
_mm_storeu_si128((__m128i*)E_dotProductVector, output);
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1);
_mm_storeu_si128((__m128i*)L_dotProductVector, output);
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1);
_mm_storeu_si128((__m128i*)P_dotProductVector, output);
for (int i = 0; i<8; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
#include "emmintrin.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_8sc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_8sc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_8sc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_si128();
imag_E_code_acc = _mm_setzero_si128();
real_L_code_acc = _mm_setzero_si128();
imag_L_code_acc = _mm_setzero_si128();
real_P_code_acc = _mm_setzero_si128();
imag_P_code_acc = _mm_setzero_si128();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++){
//Perform the carrier wipe-off
x = _mm_lddqu_si128((__m128i*)input_ptr);
y = _mm_lddqu_si128((__m128i*)carrier_ptr);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//Get early values
y = _mm_lddqu_si128((__m128i*)E_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//Get late values
y = _mm_lddqu_si128((__m128i*)L_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//Get prompt values
y = _mm_lddqu_si128((__m128i*)P_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
L_code_ptr += 8;
P_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1);
imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1);
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc);
_mm_storeu_si128((__m128i*)E_dotProductVector, output);
real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1);
imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1);
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc);
_mm_storeu_si128((__m128i*)L_dotProductVector, output);
real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1);
imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1);
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc);
_mm_storeu_si128((__m128i*)P_dotProductVector, output);
for (int i = 0; i<8; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
lv_8sc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H */
#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H
#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#include <string.h>
#ifdef LV_HAVE_SSE4_1
#include "smmintrin.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_8sc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_8sc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_8sc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_si128();
imag_E_code_acc = _mm_setzero_si128();
real_L_code_acc = _mm_setzero_si128();
imag_L_code_acc = _mm_setzero_si128();
real_P_code_acc = _mm_setzero_si128();
imag_P_code_acc = _mm_setzero_si128();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++){
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
L_code_ptr += 8;
P_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1);
_mm_store_si128((__m128i*)E_dotProductVector, output);
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1);
_mm_store_si128((__m128i*)L_dotProductVector, output);
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1);
_mm_store_si128((__m128i*)P_dotProductVector, output);
for (int i = 0; i<8; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
}
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
#include "emmintrin.h"
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
__m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc;
__m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output;
const lv_8sc_t* input_ptr = input;
const lv_8sc_t* carrier_ptr = carrier;
const lv_8sc_t* E_code_ptr = E_code;
lv_8sc_t* E_out_ptr = E_out;
const lv_8sc_t* L_code_ptr = L_code;
lv_8sc_t* L_out_ptr = L_out;
const lv_8sc_t* P_code_ptr = P_code;
lv_8sc_t* P_out_ptr = P_out;
*E_out_ptr = 0;
*P_out_ptr = 0;
*L_out_ptr = 0;
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
real_E_code_acc = _mm_setzero_si128();
imag_E_code_acc = _mm_setzero_si128();
real_L_code_acc = _mm_setzero_si128();
imag_L_code_acc = _mm_setzero_si128();
real_P_code_acc = _mm_setzero_si128();
imag_P_code_acc = _mm_setzero_si128();
if (sse_iters>0)
{
for(int number = 0;number < sse_iters; number++){
//Perform the carrier wipe-off
x = _mm_load_si128((__m128i*)input_ptr);
y = _mm_load_si128((__m128i*)carrier_ptr);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
//Get early values
y = _mm_load_si128((__m128i*)E_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output);
imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output);
//Get late values
y = _mm_load_si128((__m128i*)L_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output);
imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output);
//Get prompt values
y = _mm_load_si128((__m128i*)P_code_ptr);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy);
realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy);
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output);
imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output);
input_ptr += 8;
carrier_ptr += 8;
E_code_ptr += 8;
L_code_ptr += 8;
P_code_ptr += 8;
}
__VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8];
__VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8];
real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1);
imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1);
imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1);
output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc);
_mm_store_si128((__m128i*)E_dotProductVector, output);
real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1);
imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1);
imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1);
output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc);
_mm_store_si128((__m128i*)L_dotProductVector, output);
real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1);
imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1);
imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1);
output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc);
_mm_store_si128((__m128i*)P_dotProductVector, output);
for (int i = 0; i<8; ++i)
{
*E_out_ptr += E_dotProductVector[i];
*L_out_ptr += L_dotProductVector[i];
*P_out_ptr += P_dotProductVector[i];
}
}
lv_8sc_t bb_signal_sample;
for(int i=0; i < num_points%8; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = (*input_ptr++) * (*carrier_ptr++);
// Now get early, late, and prompt values for each
*E_out_ptr += bb_signal_sample * (*E_code_ptr++);
*P_out_ptr += bb_signal_sample * (*P_code_ptr++);
*L_out_ptr += bb_signal_sample * (*L_code_ptr++);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points)
{
lv_8sc_t bb_signal_sample;
bb_signal_sample = lv_cmake(0, 0);
*E_out = 0;
*P_out = 0;
*L_out = 0;
// perform Early, Prompt and Late correlation
for(int i=0; i < num_points; ++i)
{
//Perform the carrier wipe-off
bb_signal_sample = input[i] * carrier[i];
// Now get early, late, and prompt values for each
*E_out += bb_signal_sample * E_code[i];
*P_out += bb_signal_sample * P_code[i];
*L_out += bb_signal_sample * L_code[i];
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation
\param input The input signal input
\param carrier The carrier signal input
\param E_code Early PRN code replica input
\param P_code Early PRN code replica input
\param L_code Early PRN code replica input
\param E_out Early correlation output
\param P_out Early correlation output
\param L_out Early correlation output
\param num_points The number of complex values in vectors
*/
extern void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl(short* E_out_real, short* E_out_imag, short* P_out_real, short* P_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, unsigned int num_points);
extern void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl(short* L_out_real, short* L_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* L_code, unsigned int num_points);
static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_orc(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points){
short E_out_real = 0;
short E_out_imag = 0;
char* E_out_real_c = (char*)&E_out_real;
E_out_real_c++;
char* E_out_imag_c = (char*)&E_out_imag;
E_out_imag_c++;
short P_out_real = 0;
short P_out_imag = 0;
char* P_out_real_c = (char*)&P_out_real;
P_out_real_c++;
char* P_out_imag_c = (char*)&P_out_imag;
P_out_imag_c++;
short L_out_real = 0;
short L_out_imag = 0;
char* L_out_real_c = (char*)&L_out_real;
L_out_real_c++;
char* L_out_imag_c = (char*)&L_out_imag;
L_out_imag_c++;
volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl( &E_out_real, &E_out_imag, &P_out_real, &P_out_imag, input, carrier, E_code, P_code, num_points);
volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl( &L_out_real, &L_out_imag, input, carrier, L_code, num_points);
//ORC implementation of 8ic_x5_cw_epl_corr_8ic_x3 is done in two different functions because it seems that
//in one function the length of the code gives memory problems (bad access, segmentation fault).
//Also, the maximum number of accumulators that can be used is 4 (and we need 6).
//The "carrier wipe-off" step is done two times: one in the first function and another one in the second.
//Joining all the ORC code in one function would be quicker because the "carrier wipe-off" step would be done just
//one time.
*E_out = lv_cmake(*E_out_real_c, *E_out_imag_c);
*P_out = lv_cmake(*P_out_real_c, *P_out_imag_c);
*L_out = lv_cmake(*L_out_real_c, *L_out_imag_c);
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H */

View File

@ -0,0 +1,210 @@
/*!
* \file volk_gnsssdr_8u_x2_multiply_8u.h
* \brief Volk protokernel: multiplies unsigned char values
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that multiplies unsigned char values (8 bits data)
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H
#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
#include <emmintrin.h>
/*!
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
\param cChar The unsigned char where the results will be stored
\param aChar One of the unsigned char to be multiplied
\param bChar One of the unsigned char to be multiplied
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
*/
static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){
const unsigned int sse_iters = num_points / 16;
__m128i x, y, x1, x2, y1, y2, mult1, x1_mult_y1, x2_mult_y2, tmp, tmp1, tmp2, totalc;
unsigned char* c = cChar;
const unsigned char* a = aChar;
const unsigned char* b = bChar;
for(int number = 0;number < sse_iters; number++){
x = _mm_lddqu_si128((__m128i*)a);
y = _mm_lddqu_si128((__m128i*)b);
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
x1 = _mm_srli_si128 (x, 1);
x1 = _mm_and_si128 (x1, mult1);
x2 = _mm_and_si128 (x, mult1);
y1 = _mm_srli_si128 (y, 1);
y1 = _mm_and_si128 (y1, mult1);
y2 = _mm_and_si128 (y, mult1);
x1_mult_y1 = _mm_mullo_epi16 (x1, y1);
x2_mult_y2 = _mm_mullo_epi16 (x2, y2);
tmp = _mm_and_si128 (x1_mult_y1, mult1);
tmp1 = _mm_slli_si128 (tmp, 1);
tmp2 = _mm_and_si128 (x2_mult_y2, mult1);
totalc = _mm_or_si128 (tmp1, tmp2);
_mm_storeu_si128((__m128i*)c, totalc);
a += 16;
b += 16;
c += 16;
}
for (int i = 0; i<(num_points % 16); ++i)
{
*c++ = (*a++) * (*b++);
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
\param cChar The unsigned char where the results will be stored
\param aChar One of the unsigned char to be multiplied
\param bChar One of the unsigned char to be multiplied
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
*/
static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){
unsigned char* cPtr = cChar;
const unsigned char* aPtr = aChar;
const unsigned char* bPtr = bChar;
for(int number = 0; number < num_points; number++){
*cPtr++ = (*aPtr++) * (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H */
#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H
#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
#include <emmintrin.h>
/*!
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
\param cChar The unsigned char where the results will be stored
\param aChar One of the unsigned char to be multiplied
\param bChar One of the unsigned char to be multiplied
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
*/
static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){
const unsigned int sse_iters = num_points / 16;
__m128i x, y, x1, x2, y1, y2, mult1, x1_mult_y1, x2_mult_y2, tmp, tmp1, tmp2, totalc;
unsigned char* c = cChar;
const unsigned char* a = aChar;
const unsigned char* b = bChar;
for(int number = 0;number < sse_iters; number++){
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
x1 = _mm_srli_si128 (x, 1);
x1 = _mm_and_si128 (x1, mult1);
x2 = _mm_and_si128 (x, mult1);
y1 = _mm_srli_si128 (y, 1);
y1 = _mm_and_si128 (y1, mult1);
y2 = _mm_and_si128 (y, mult1);
x1_mult_y1 = _mm_mullo_epi16 (x1, y1);
x2_mult_y2 = _mm_mullo_epi16 (x2, y2);
tmp = _mm_and_si128 (x1_mult_y1, mult1);
tmp1 = _mm_slli_si128 (tmp, 1);
tmp2 = _mm_and_si128 (x2_mult_y2, mult1);
totalc = _mm_or_si128 (tmp1, tmp2);
_mm_store_si128((__m128i*)c, totalc);
a += 16;
b += 16;
c += 16;
}
for (int i = 0; i<(num_points % 16); ++i)
{
*c++ = (*a++) * (*b++);
}
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
\param cChar The unsigned char where the results will be stored
\param aChar One of the unsigned char to be multiplied
\param bChar One of the unsigned char to be multiplied
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
*/
static inline void volk_gnsssdr_8u_x2_multiply_8u_a_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){
unsigned char* cPtr = cChar;
const unsigned char* aPtr = aChar;
const unsigned char* bPtr = bChar;
for(int number = 0; number < num_points; number++){
*cPtr++ = (*aPtr++) * (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
\param cChar The unsigned char where the results will be stored
\param aChar One of the unsigned char to be multiplied
\param bChar One of the unsigned char to be multiplied
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
*/
extern void volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl(unsigned char* cVector, const unsigned char* aVector, const unsigned char* bVector, unsigned int num_points);
static inline void volk_gnsssdr_8u_x2_multiply_8u_u_orc(unsigned char* cVector, const unsigned char* aVector, const unsigned char* bVector, unsigned int num_points){
volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl(cVector, aVector, bVector, num_points);
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H */

View File

@ -0,0 +1,572 @@
#
# Copyright 2011-2012,2014 Free Software Foundation, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
########################################################################
# header file detection
########################################################################
include(CheckIncludeFile)
CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H)
if(HAVE_CPUID_H)
add_definitions(-DHAVE_CPUID_H)
endif()
CHECK_INCLUDE_FILE(intrin.h HAVE_INTRIN_H)
if(HAVE_INTRIN_H)
add_definitions(-DHAVE_INTRIN_H)
endif()
CHECK_INCLUDE_FILE(fenv.h HAVE_FENV_H)
if(HAVE_FENV_H)
add_definitions(-DHAVE_FENV_H)
endif()
CHECK_INCLUDE_FILE(dlfcn.h HAVE_DLFCN_H)
if(HAVE_DLFCN_H)
add_definitions(-DHAVE_DLFCN_H)
list(APPEND volk_gnsssdr_libraries ${CMAKE_DL_LIBS})
endif()
########################################################################
# Setup the compiler name
########################################################################
set(COMPILER_NAME ${CMAKE_C_COMPILER_ID})
if(MSVC) #its not set otherwise
set(COMPILER_NAME MSVC)
endif()
message(STATUS "Compiler name: ${COMPILER_NAME}")
if(NOT DEFINED COMPILER_NAME)
message(FATAL_ERROR "COMPILER_NAME undefined. Volk build may not support this compiler.")
endif()
########################################################################
# Special clang flag so flag checks can fail
########################################################################
if(COMPILER_NAME MATCHES "GNU")
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-Werror=unused-command-line-argument" HAVE_WERROR_UNUSED_CMD_LINE_ARG)
if(HAVE_WERROR_UNUSED_CMD_LINE_ARG)
set(VOLK_FLAG_CHECK_FLAGS "-Werror=unused-command-line-argument")
endif()
endif()
########################################################################
# check for posix_memalign, since some OSs do not internally define
# _XOPEN_SOURCE or _POSIX_C_SOURCE; they leave this to the user.
########################################################################
include(CheckFunctionExists)
CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
if(HAVE_POSIX_MEMALIGN)
add_definitions(-DHAVE_POSIX_MEMALIGN)
endif(HAVE_POSIX_MEMALIGN)
########################################################################
# detect x86 flavor of CPU
########################################################################
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(i.86|x86|x86_64|amd64)$")
message(STATUS "x86* CPU detected")
set(CPU_IS_x86 TRUE)
endif()
########################################################################
# determine passing architectures based on compile flag tests
########################################################################
execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_compile_utils.py
--mode "arch_flags" --compiler "${COMPILER_NAME}"
OUTPUT_VARIABLE arch_flag_lines OUTPUT_STRIP_TRAILING_WHITESPACE
)
macro(check_arch arch_name)
set(flags ${ARGN})
set(have_${arch_name} TRUE)
foreach(flag ${flags})
include(CheckCXXCompilerFlag)
set(have_flag have${flag})
execute_process( #make the have_flag have nice alphanum chars (just for looks/not necessary)
COMMAND ${PYTHON_EXECUTABLE} -c "import re; print(re.sub('\\W', '_', '${have_flag}'))"
OUTPUT_VARIABLE have_flag OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(VOLK_FLAG_CHECK_FLAGS)
set(CMAKE_REQUIRED_FLAGS ${VOLK_FLAG_CHECK_FLAGS})
endif()
CHECK_CXX_COMPILER_FLAG(${flag} ${have_flag})
unset(CMAKE_REQUIRED_FLAGS)
if (NOT ${have_flag})
set(have_${arch_name} FALSE)
endif()
endforeach()
if (have_${arch_name})
list(APPEND available_archs ${arch_name})
endif()
endmacro(check_arch)
foreach(line ${arch_flag_lines})
string(REGEX REPLACE "," ";" arch_flags ${line})
check_arch(${arch_flags})
endforeach(line)
macro(OVERRULE_ARCH arch reason)
message(STATUS "${reason}, Overruled arch ${arch}")
list(REMOVE_ITEM available_archs ${arch})
endmacro(OVERRULE_ARCH)
########################################################################
# eliminate AVX on if not on x86, or if the compiler does not accept
# the xgetbv instruction, or {if not cross-compiling and the xgetbv
# executable does not function correctly}.
########################################################################
set(HAVE_XGETBV 0)
set(HAVE_AVX_CVTPI32_PS 0)
if(CPU_IS_x86)
# check to see if the compiler/linker works with xgetb instruction
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c "unsigned long long _xgetbv(unsigned int index) { unsigned int eax, edx; __asm__ __volatile__(\"xgetbv\" : \"=a\"(eax), \"=d\"(edx) : \"c\"(index)); return ((unsigned long long)edx << 32) | eax; } int main (void) { (void) _xgetbv(0); return (0); }")
execute_process(COMMAND ${CMAKE_C_COMPILER} -o
${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv
${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c
OUTPUT_QUIET ERROR_QUIET
RESULT_VARIABLE avx_compile_result)
if(NOT ${avx_compile_result} EQUAL 0)
OVERRULE_ARCH(avx "Compiler or linker missing xgetbv instruction")
elseif(NOT CROSSCOMPILE_MULTILIB)
execute_process(COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv
OUTPUT_QUIET ERROR_QUIET
RESULT_VARIABLE avx_exe_result)
if(NOT ${avx_exe_result} EQUAL 0)
OVERRULE_ARCH(avx "CPU missing xgetbv")
else()
set(HAVE_XGETBV 1)
endif()
else()
# cross compiling and compiler/linker seems to work; assume working
set(HAVE_XGETBV 1)
endif()
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv
${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c)
#########################################################################
# eliminate AVX if cvtpi32_ps intrinsic fails like some versions of clang
#########################################################################
# check to see if the compiler/linker works with cvtpi32_ps instrinsic when using AVX
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c "#include <immintrin.h>\nint main (void) {__m128 __a; __m64 __b; __m128 foo = _mm_cvtpi32_ps(__a, __b); return (0); }")
execute_process(COMMAND ${CMAKE_C_COMPILER} -mavx -o
${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps
${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c
OUTPUT_QUIET ERROR_QUIET
RESULT_VARIABLE avx_compile_result)
if(NOT ${avx_compile_result} EQUAL 0)
OVERRULE_ARCH(avx "Compiler missing cvtpi32_ps instrinsic")
elseif(NOT CROSSCOMPILE_MULTILIB)
execute_process(COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps
OUTPUT_QUIET ERROR_QUIET
RESULT_VARIABLE avx_exe_result)
if(NOT ${avx_exe_result} EQUAL 0)
OVERRULE_ARCH(avx "CPU missing cvtpi32_ps")
else()
set(HAVE_AVX_CVTPI32_PS 1)
endif()
else()
set(HAVE_AVX_CVTPI32_PS 1)
endif()
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps
${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c)
# Disable SSE4a if Clang is less than version 3.2
if("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
# Figure out the version of Clang
if(CMAKE_VERSION VERSION_LESS "2.8.10")
# Exctract the Clang version from the --version string.
# In cmake 2.8.10, we can just use CMAKE_C_COMPILER_VERSION
# without having to go through these string manipulations
execute_process(COMMAND ${CMAKE_C_COMPILER} --version
OUTPUT_VARIABLE clang_version)
string(REGEX MATCH "[0-9].[0-9]" CMAKE_C_COMPILER_VERSION ${clang_version})
endif(CMAKE_VERSION VERSION_LESS "2.8.10")
if(CMAKE_C_COMPILER_VERSION VERSION_LESS "3.2")
OVERRULE_ARCH(sse4_a "Clang >= 3.2 required for SSE4a")
endif(CMAKE_C_COMPILER_VERSION VERSION_LESS "3.2")
endif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
endif(CPU_IS_x86)
if(${HAVE_XGETBV})
add_definitions(-DHAVE_XGETBV)
endif()
if(${HAVE_AVX_CVTPI32_PS})
add_definitions(-DHAVE_AVX_CVTPI32_PS)
endif()
########################################################################
# if the CPU is not x86, eliminate all Intel SIMD
########################################################################
if(NOT CPU_IS_x86)
OVERRULE_ARCH(3dnow "Architecture is not x86 or x86_64")
OVERRULE_ARCH(mmx "Architecture is not x86 or x86_64")
OVERRULE_ARCH(sse "Architecture is not x86 or x86_64")
OVERRULE_ARCH(sse2 "Architecture is not x86 or x86_64")
OVERRULE_ARCH(sse3 "Architecture is not x86 or x86_64")
OVERRULE_ARCH(ssse3 "Architecture is not x86 or x86_64")
OVERRULE_ARCH(sse4_a "Architecture is not x86 or x86_64")
OVERRULE_ARCH(sse4_1 "Architecture is not x86 or x86_64")
OVERRULE_ARCH(sse4_2 "Architecture is not x86 or x86_64")
OVERRULE_ARCH(avx "Architecture is not x86 or x86_64")
endif(NOT CPU_IS_x86)
########################################################################
# implement overruling in the ORC case,
# since ORC always passes flag detection
########################################################################
if(NOT ORC_FOUND)
OVERRULE_ARCH(orc "ORC support not found")
endif()
########################################################################
# implement overruling in the non-multilib case
# this makes things work when both -m32 and -m64 pass
########################################################################
if(NOT CROSSCOMPILE_MULTILIB AND CPU_IS_x86)
include(CheckTypeSize)
check_type_size("void*[8]" SIZEOF_CPU BUILTIN_TYPES_ONLY)
if (${SIZEOF_CPU} EQUAL 64)
OVERRULE_ARCH(32 "CPU width is 64 bits")
endif()
if (${SIZEOF_CPU} EQUAL 32)
OVERRULE_ARCH(64 "CPU width is 32 bits")
endif()
#MSVC 64 bit does not have MMX, overrule it
if (${SIZEOF_CPU} EQUAL 64 AND MSVC)
OVERRULE_ARCH(mmx "No MMX for Win64")
endif()
endif()
########################################################################
# done overrules! print the result
########################################################################
message(STATUS "Available architectures: ${available_archs}")
########################################################################
# determine available machines given the available architectures
########################################################################
execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_compile_utils.py
--mode "machines" --archs "${available_archs}"
OUTPUT_VARIABLE available_machines OUTPUT_STRIP_TRAILING_WHITESPACE
)
########################################################################
# Implement machine overruling for redundant machines:
# A machine is redundant when expansion rules occur,
# and the arch superset passes configuration checks.
# When this occurs, eliminate the redundant machines
# to avoid unnecessary compilation of subset machines.
########################################################################
foreach(arch mmx orc 64 32)
foreach(machine_name ${available_machines})
string(REPLACE "_${arch}" "" machine_name_no_arch ${machine_name})
if (${machine_name} STREQUAL ${machine_name_no_arch})
else()
list(REMOVE_ITEM available_machines ${machine_name_no_arch})
endif()
endforeach(machine_name)
endforeach(arch)
########################################################################
# done overrules! print the result
########################################################################
message(STATUS "Available machines: ${available_machines}")
########################################################################
# Create rules to run the volk_gnsssdr generator
########################################################################
#dependencies are all python, xml, and header implementation files
file(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml)
file(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py)
file(GLOB h_files ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr/*.h)
macro(gen_template tmpl output)
list(APPEND volk_gnsssdr_gen_sources ${output})
add_custom_command(
OUTPUT ${output}
DEPENDS ${xml_files} ${py_files} ${h_files} ${tmpl}
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_tmpl_utils.py
--input ${tmpl} --output ${output} ${ARGN}
)
endmacro(gen_template)
make_directory(${CMAKE_BINARY_DIR}/include/volk_gnsssdr)
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h)
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr.c)
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_typedefs.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_typedefs.h)
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_cpu.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h)
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_cpu.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr_cpu.c)
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_config_fixed.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h)
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machines.tmpl.h ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr_machines.h)
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machines.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr_machines.c)
set(BASE_CFLAGS NONE)
STRING(TOUPPER ${CMAKE_BUILD_TYPE} CBTU)
MESSAGE(STATUS BUILT TYPE ${CBTU})
MESSAGE(STATUS "Base cflags = ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS}")
set(COMPILER_INFO "")
IF(MSVC)
IF(MSVC90) #Visual Studio 9
SET(cmake_c_compiler_version "Microsoft Visual Studio 9.0")
ELSE(MSVC10) #Visual Studio 10
SET(cmake_c_compiler_version "Microsoft Visual Studio 10.0")
ELSE(MSVC11) #Visual Studio 11
SET(cmake_c_compiler_version "Microsoft Visual Studio 11.0")
ELSE(MSVC12) #Visual Studio 12
SET(cmake_c_compiler_version "Microsoft Visual Studio 12.0")
ENDIF()
ELSE()
execute_process(COMMAND ${CMAKE_C_COMPILER} --version
OUTPUT_VARIABLE cmake_c_compiler_version)
ENDIF(MSVC)
set(COMPILER_INFO "${CMAKE_C_COMPILER}:::${CMAKE_C_FLAGS_${GRCBTU}} ${CMAKE_C_FLAGS}\n${CMAKE_CXX_COMPILER}:::${CMAKE_CXX_FLAGS_${GRCBTU}} ${CMAKE_CXX_FLAGS}\n" )
foreach(machine_name ${available_machines})
#generate machine source
set(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr_machine_${machine_name}.c)
gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machine_xxx.tmpl.c ${machine_source} ${machine_name})
#determine machine flags
execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_compile_utils.py
--mode "machine_flags" --machine "${machine_name}" --compiler "${COMPILER_NAME}"
OUTPUT_VARIABLE ${machine_name}_flags OUTPUT_STRIP_TRAILING_WHITESPACE
)
MESSAGE(STATUS "BUILD INFO ::: ${machine_name} ::: ${COMPILER_NAME} ::: ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}")
set(COMPILER_INFO "${COMPILER_INFO}${machine_name}:::${COMPILER_NAME}:::${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}\n" )
if(${machine_name}_flags)
set_source_files_properties(${machine_source} PROPERTIES COMPILE_FLAGS "${${machine_name}_flags}")
endif()
#add to available machine defs
string(TOUPPER LV_MACHINE_${machine_name} machine_def)
list(APPEND machine_defs ${machine_def})
endforeach(machine_name)
# Convert to a C string to compile and display properly
string(STRIP "${cmake_c_compiler_version}" cmake_c_compiler_version)
string(STRIP ${COMPILER_INFO} COMPILER_INFO)
MESSAGE(STATUS "Compiler Version: ${cmake_c_compiler_version}")
string(REPLACE "\n" " \\n" cmake_c_compiler_version ${cmake_c_compiler_version})
string(REPLACE "\n" " \\n" COMPILER_INFO ${COMPILER_INFO})
########################################################################
# Set local include directories first
########################################################################
include_directories(
${CMAKE_BINARY_DIR}/include
${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/kernels
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
)
########################################################################
# Handle ASM support
# on by default, but let users turn it off
########################################################################
if(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
set(ASM_ARCHS_AVAILABLE "armv7")
set(FULL_C_FLAGS "${CMAKE_C_FLAGS}" "${CMAKE_CXX_COMPILER_ARG1}")
# sort through a list of all architectures we have ASM for
# if we find one that matches our current system architecture
# set up the assembler flags and include the source files
foreach(ARCH ${ASM_ARCHS_AVAILABLE})
message(STATUS "--==>> -CFLAGS1: ${FULL_C_FLAGS}")
string(REGEX MATCH "${ARCH}" ASM_ARCH "${FULL_C_FLAGS}")
if( ASM_ARCH STREQUAL "armv7" )
set(ASM-ATT $ENV{ASM})
message(STATUS "---- Adding ASM files") # we always use ATT syntax
message(STATUS "-- Detected armv7 architecture; enabling ASM")
# setup architecture specific assembler flags
set(ARCH_ASM_FLAGS "-mfpu=neon -g")
# then add the files
include_directories(${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr/asm/neon)
file(GLOB asm_files ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr/asm/neon/*.s)
foreach(asm_file ${asm_files})
list(APPEND volk_gnsssdr_sources ${asm_file})
message(STATUS "Adding source file: ${asm_file}")
endforeach(asm_file)
endif()
set(CMAKE_ASM-ATT_FLAGS_INIT ${ARCH_ASM_FLAGS})
enable_language(ASM-ATT) # this must be after flags_init
message(STATUS "asm flags: ${CMAKE_ASM-ATT_FLAGS}")
endforeach(ARCH)
else(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
message(STATUS "Not enabling ASM support. CMake >= 2.8.10 required.")
endif(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
########################################################################
# Handle orc support
########################################################################
if(ORC_FOUND)
#setup orc library usage
include_directories(${ORC_INCLUDE_DIRS})
link_directories(${ORC_LIBRARY_DIRS})
list(APPEND volk_gnsssdr_libraries ${ORC_LIBRARIES})
#setup orc functions
file(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc)
foreach(orc_file ${orc_files})
#extract the name for the generated c source from the orc file
get_filename_component(orc_file_name_we ${orc_file} NAME_WE)
set(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c)
#create a rule to generate the source and add to the list of sources
add_custom_command(
COMMAND ${ORCC_EXECUTABLE} --include math.h --implementation -o ${orcc_gen} ${orc_file}
DEPENDS ${orc_file} OUTPUT ${orcc_gen}
)
list(APPEND volk_gnsssdr_sources ${orcc_gen})
endforeach(orc_file)
else()
message(STATUS "Did not find liborc and orcc, disabling orc support...")
endif()
########################################################################
# Handle the generated constants
########################################################################
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c
"import time;print time.strftime('%a, %d %b %Y %H:%M:%S', time.gmtime())"
OUTPUT_VARIABLE BUILD_DATE OUTPUT_STRIP_TRAILING_WHITESPACE
)
message(STATUS "Loading build date ${BUILD_DATE} into constants...")
message(STATUS "Loading version ${VERSION} into constants...")
#double escape for windows backslash path separators
string(REPLACE "\\" "\\\\" prefix ${prefix})
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/constants.c.in
${CMAKE_CURRENT_BINARY_DIR}/constants.c
@ONLY)
list(APPEND volk_gnsssdr_sources ${CMAKE_CURRENT_BINARY_DIR}/constants.c)
########################################################################
# Setup the volk_gnsssdr sources list and library
########################################################################
if(NOT WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
endif()
list(APPEND volk_gnsssdr_sources
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_prefs.c
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_rank_archs.c
${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_malloc.c
${volk_gnsssdr_gen_sources}
)
#set the machine definitions where applicable
set_source_files_properties(
${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.c
${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr_machines.c
PROPERTIES COMPILE_DEFINITIONS "${machine_defs}")
if(MSVC)
#add compatibility includes for stdint types
include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc)
add_definitions(-DHAVE_CONFIG_H)
#compile the sources as C++ due to the lack of complex.h under MSVC
set_source_files_properties(${volk_gnsssdr_sources} PROPERTIES LANGUAGE CXX)
endif()
#create the volk_gnsssdr runtime library
#MODIFICATIONS BY GNSS-SDR
file(GLOB orc ${CMAKE_SOURCE_DIR}/orc/*.orc)
#add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources})
add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources} ${h_files} ${orc})
source_group("Kernels" FILES ${h_files})
source_group("ORC Files" FILES ${orc})
#END OF MODIFICATIONS
target_link_libraries(volk_gnsssdr ${volk_gnsssdr_libraries})
set_target_properties(volk_gnsssdr PROPERTIES SOVERSION ${LIBVER})
set_target_properties(volk_gnsssdr PROPERTIES DEFINE_SYMBOL "volk_gnsssdr_EXPORTS")
install(TARGETS volk_gnsssdr
LIBRARY DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_runtime" # .so file
ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file
RUNTIME DESTINATION bin COMPONENT "volk_gnsssdr_runtime" # .dll file
)
if(ENABLE_STATIC_LIBS)
add_library(volk_gnsssdr_static STATIC ${volk_gnsssdr_sources})
if(NOT WIN32)
set_target_properties(volk_gnsssdr_static
PROPERTIES OUTPUT_NAME volk_gnsssdr)
endif(NOT WIN32)
install(TARGETS volk_gnsssdr_static
ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file
)
endif(ENABLE_STATIC_LIBS)
########################################################################
# Build the QA test application
########################################################################
if(Boost_FOUND)
set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES
COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN"
)
include_directories(${Boost_INCLUDE_DIRS})
link_directories(${Boost_LIBRARY_DIRS})
add_executable(test_all
${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc
${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc
)
target_link_libraries(test_all volk_gnsssdr ${Boost_LIBRARIES})
add_test(qa_volk_gnsssdr_test_all test_all)
endif(Boost_FOUND)

View File

@ -0,0 +1,63 @@
/* -*- c++ -*- */
/*
* Copyright 2013 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include <config.h>
#endif
#include <volk_gnsssdr/constants.h>
char*
volk_gnsssdr_prefix()
{
return "@prefix@";
}
char*
volk_gnsssdr_build_date()
{
return "@BUILD_DATE@";
}
char*
volk_gnsssdr_version()
{
return "@VERSION@";
}
char*
volk_gnsssdr_c_compiler()
{
return "@cmake_c_compiler_version@";
}
char*
volk_gnsssdr_compiler_flags()
{
return "@COMPILER_INFO@";
}
char*
volk_gnsssdr_available_machines()
{
return "@available_machines@";
}

View File

@ -0,0 +1,188 @@
/*
* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
*
* This file is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 3, or (at your option) any
* later version.
*
* This file is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* Under Section 7 of GPL version 3, you are granted additional
* permissions described in the GCC Runtime Library Exception, version
* 3.1, as published by the Free Software Foundation.
*
* You should have received a copy of the GNU General Public License and
* a copy of the GCC Runtime Library Exception along with this program;
* see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
* <http://www.gnu.org/licenses/>.
*/
/* %ecx */
#define bit_SSE3 (1 << 0)
#define bit_PCLMUL (1 << 1)
#define bit_SSSE3 (1 << 9)
#define bit_FMA (1 << 12)
#define bit_CMPXCHG16B (1 << 13)
#define bit_SSE4_1 (1 << 19)
#define bit_SSE4_2 (1 << 20)
#define bit_MOVBE (1 << 22)
#define bit_POPCNT (1 << 23)
#define bit_AES (1 << 25)
#define bit_XSAVE (1 << 26)
#define bit_OSXSAVE (1 << 27)
#define bit_AVX (1 << 28)
#define bit_F16C (1 << 29)
#define bit_RDRND (1 << 30)
/* %edx */
#define bit_CMPXCHG8B (1 << 8)
#define bit_CMOV (1 << 15)
#define bit_MMX (1 << 23)
#define bit_FXSAVE (1 << 24)
#define bit_SSE (1 << 25)
#define bit_SSE2 (1 << 26)
/* Extended Features */
/* %ecx */
#define bit_LAHF_LM (1 << 0)
#define bit_ABM (1 << 5)
#define bit_SSE4a (1 << 6)
#define bit_XOP (1 << 11)
#define bit_LWP (1 << 15)
#define bit_FMA4 (1 << 16)
#define bit_TBM (1 << 21)
/* %edx */
#define bit_MMXEXT (1 << 22)
#define bit_LM (1 << 29)
#define bit_3DNOWP (1 << 30)
#define bit_3DNOW (1 << 31)
/* Extended Features (%eax == 7) */
#define bit_FSGSBASE (1 << 0)
#define bit_BMI (1 << 3)
#if defined(__i386__) && defined(__PIC__)
/* %ebx may be the PIC register. */
#if __GNUC__ >= 3
#define __cpuid(level, a, b, c, d) \
__asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
"cpuid\n\t" \
"xchg{l}\t{%%}ebx, %1\n\t" \
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
: "0" (level))
#define __cpuid_count(level, count, a, b, c, d) \
__asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
"cpuid\n\t" \
"xchg{l}\t{%%}ebx, %1\n\t" \
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
: "0" (level), "2" (count))
#else
/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
nor alternatives in i386 code. */
#define __cpuid(level, a, b, c, d) \
__asm__ ("xchgl\t%%ebx, %1\n\t" \
"cpuid\n\t" \
"xchgl\t%%ebx, %1\n\t" \
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
: "0" (level))
#define __cpuid_count(level, count, a, b, c, d) \
__asm__ ("xchgl\t%%ebx, %1\n\t" \
"cpuid\n\t" \
"xchgl\t%%ebx, %1\n\t" \
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
: "0" (level), "2" (count))
#endif
#else
#define __cpuid(level, a, b, c, d) \
__asm__ ("cpuid\n\t" \
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
: "0" (level))
#define __cpuid_count(level, count, a, b, c, d) \
__asm__ ("cpuid\n\t" \
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
: "0" (level), "2" (count))
#endif
/* Return highest supported input value for cpuid instruction. ext can
be either 0x0 or 0x8000000 to return highest supported value for
basic or extended cpuid information. Function returns 0 if cpuid
is not supported or whatever cpuid returns in eax register. If sig
pointer is non-null, then first four bytes of the signature
(as found in ebx register) are returned in location pointed by sig. */
static __inline unsigned int
__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
{
unsigned int __eax, __ebx, __ecx, __edx;
#ifndef __x86_64__
/* See if we can use cpuid. On AMD64 we always can. */
#if __GNUC__ >= 3
__asm__ ("pushf{l|d}\n\t"
"pushf{l|d}\n\t"
"pop{l}\t%0\n\t"
"mov{l}\t{%0, %1|%1, %0}\n\t"
"xor{l}\t{%2, %0|%0, %2}\n\t"
"push{l}\t%0\n\t"
"popf{l|d}\n\t"
"pushf{l|d}\n\t"
"pop{l}\t%0\n\t"
"popf{l|d}\n\t"
: "=&r" (__eax), "=&r" (__ebx)
: "i" (0x00200000));
#else
/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
nor alternatives in i386 code. */
__asm__ ("pushfl\n\t"
"pushfl\n\t"
"popl\t%0\n\t"
"movl\t%0, %1\n\t"
"xorl\t%2, %0\n\t"
"pushl\t%0\n\t"
"popfl\n\t"
"pushfl\n\t"
"popl\t%0\n\t"
"popfl\n\t"
: "=&r" (__eax), "=&r" (__ebx)
: "i" (0x00200000));
#endif
if (!((__eax ^ __ebx) & 0x00200000))
return 0;
#endif
/* Host supports cpuid. Return highest supported cpuid input value. */
__cpuid (__ext, __eax, __ebx, __ecx, __edx);
if (__sig)
*__sig = __ebx;
return __eax;
}
/* Return cpuid data for requested cpuid level, as found in returned
eax, ebx, ecx and edx registers. The function checks if cpuid is
supported and returns 1 for valid cpuid information or 0 for
unsupported cpuid level. All pointers are required to be non-null. */
static __inline int
__get_cpuid (unsigned int __level,
unsigned int *__eax, unsigned int *__ebx,
unsigned int *__ecx, unsigned int *__edx)
{
unsigned int __ext = __level & 0x80000000;
if (__get_cpuid_max (__ext, 0) < __level)
return 0;
__cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
return 1;
}

View File

@ -0,0 +1,89 @@
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_16s_add_quad_aligned16.h>
#include <volk_gnsssdr/volk_gnsssdr_16s_add_quad_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for sse2
#ifndef LV_HAVE_SSE2
void qa_16s_add_quad_aligned16::t1() {
printf("sse2 not available... no test performed\n");
}
#else
void qa_16s_add_quad_aligned16::t1() {
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3200;
const int ITERS = 100000;
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short input4[vlen];
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
__VOLK_ATTR_ALIGNED(16) short output2[vlen];
__VOLK_ATTR_ALIGNED(16) short output3[vlen];
__VOLK_ATTR_ALIGNED(16) short output01[vlen];
__VOLK_ATTR_ALIGNED(16) short output11[vlen];
__VOLK_ATTR_ALIGNED(16) short output21[vlen];
__VOLK_ATTR_ALIGNED(16) short output31[vlen];
for(int i = 0; i < vlen; ++i) {
short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
input0[i] = plus0 - minus0;
input1[i] = plus1 - minus1;
input2[i] = plus2 - minus2;
input3[i] = plus3 - minus3;
input4[i] = plus4 - minus4;
}
printf("16s_add_quad_aligned\n");
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_16s_add_quad_aligned16_manual(output0, output1, output2, output3, input0, input1, input2, input3, input4, vlen << 1 , "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_16s_add_quad_aligned16_manual(output01, output11, output21, output31, input0, input1, input2, input3, input4, vlen << 1 , "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total);
for(int i = 0; i < 1; ++i) {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]);
CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]);
CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]);
}
}
#endif

View File

@ -0,0 +1,18 @@
#ifndef INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H
#define INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_16s_add_quad_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H */

View File

@ -0,0 +1,106 @@
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_16s_branch_4_state_8_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for ssse3
#ifndef LV_HAVE_SSSE3
void qa_16s_branch_4_state_8_aligned16::t1() {
printf("ssse3 not available... no test performed\n");
}
#else
void qa_16s_branch_4_state_8_aligned16::t1() {
const int num_iters = 1000000;
const int vlen = 32;
static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03};
static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01};
static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
static char* permuters[4] = {permute0, permute1, permute2, permute3};
unsigned int num_bytes = vlen << 1;
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
__VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short target3[vlen];
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = {
7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = {
0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = {
0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
for(int i = 0; i < vlen; ++i) {
src0[i] = i;
}
printf("16s_branch_4_state_8_aligned\n");
start = clock();
for(int i = 0; i < num_iters; ++i) {
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time: %f\n", total);
start = clock();
for(int i = 0; i < num_iters; ++i) {
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("branch_4_state_8_time, ssse3: %f\n", total);
start = clock();
for(int i = 0; i < num_iters; ++i) {
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time, generic: %f\n", total);
for(int i = 0; i < vlen; ++i) {
printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
}
for(int i = 0; i < vlen; ++i) {
CPPUNIT_ASSERT(target[i] == target2[i]);
CPPUNIT_ASSERT(target[i] == target3[i]);
}
}
#endif

View File

@ -0,0 +1,18 @@
#ifndef INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H
#define INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_16s_branch_4_state_8_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_16s_branch_4_state_8_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H */

View File

@ -0,0 +1,78 @@
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_16s_permute_and_scalar_add_aligned16.h>
#include <volk_gnsssdr/volk_gnsssdr_16s_permute_and_scalar_add_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for sse2
#ifndef LV_HAVE_SSE2
void qa_16s_permute_and_scalar_add_aligned16::t1() {
printf("sse2 not available... no test performed\n");
}
#else
void qa_16s_permute_and_scalar_add_aligned16::t1() {
const int vlen = 64;
unsigned int num_bytes = vlen << 1;
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
__VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen];
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
for(int i = 0; i < vlen; ++i) {
src0[i] = i;
permute_indexes[i] = (3 * i)%vlen;
cntl0[i] = 0xff;
cntl1[i] = 0xff * (i%2);
cntl2[i] = 0xff * ((i>>1)%2);
cntl3[i] = 0xff * ((i%4) == 3);
}
printf("16s_permute_and_scalar_add_aligned\n");
start = clock();
for(int i = 0; i < 100000; ++i) {
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int i = 0; i < 100000; ++i) {
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total);
for(int i = 0; i < vlen; ++i) {
//printf("generic... %d, sse2... %d\n", target[i], target2[i]);
}
for(int i = 0; i < vlen; ++i) {
CPPUNIT_ASSERT(target[i] == target2[i]);
}
}
#endif

View File

@ -0,0 +1,18 @@
#ifndef INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
#define INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_16s_permute_and_scalar_add_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_16s_permute_and_scalar_add_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H */

View File

@ -0,0 +1,60 @@
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_16s_quad_max_star_aligned16.h>
#include <volk_gnsssdr/volk_gnsssdr_16s_quad_max_star_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for sse2
#ifndef LV_HAVE_SSE2
void qa_16s_quad_max_star_aligned16::t1() {
printf("sse2 not available... no test performed\n");
}
#else
void qa_16s_quad_max_star_aligned16::t1() {
const int vlen = 34;
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
for(int i = 0; i < vlen; ++i) {
short plus0 = (short) (rand() - (RAND_MAX/2));
short plus1 = (short) (rand() - (RAND_MAX/2));
short plus2 = (short) (rand() - (RAND_MAX/2));
short plus3 = (short) (rand() - (RAND_MAX/2));
short minus0 = (short) (rand() - (RAND_MAX/2));
short minus1 = (short) (rand() - (RAND_MAX/2));
short minus2 = (short) (rand() - (RAND_MAX/2));
short minus3 = (short) (rand() - (RAND_MAX/2));
input0[i] = plus0 - minus0;
input1[i] = plus1 - minus1;
input2[i] = plus2 - minus2;
input3[i] = plus3 - minus3;
}
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, input3, 2*vlen, "generic");
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, input3, 2*vlen, "sse2");
printf("16s_quad_max_star_aligned\n");
for(int i = 0; i < vlen; ++i) {
printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]);
}
for(int i = 0; i < vlen; ++i) {
CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
}
}
#endif

View File

@ -0,0 +1,18 @@
#ifndef INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H
#define INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_16s_quad_max_star_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_16s_quad_max_star_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H */

View File

@ -0,0 +1,61 @@
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_32f_fm_detect_aligned16.h>
#include <volk_gnsssdr/volk_gnsssdr_32f_fm_detect_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for sse
#ifndef LV_HAVE_SSE
void qa_32f_fm_detect_aligned16::t1() {
printf("sse not available... no test performed\n");
}
#else
void qa_32f_fm_detect_aligned16::t1() {
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) float input0[vlen];
__VOLK_ATTR_ALIGNED(16) float output0[vlen];
__VOLK_ATTR_ALIGNED(16) float output01[vlen];
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
}
printf("32f_fm_detect_aligned\n");
start = clock();
float save = 0.1;
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
save = 0.1;
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, "sse");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse_time: %f\n", total);
for(int i = 0; i < 1; ++i) {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4);
}
}
#endif

View File

@ -0,0 +1,18 @@
#ifndef INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H
#define INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_32f_fm_detect_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_32f_fm_detect_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H */

View File

@ -0,0 +1,103 @@
#include <volk_gnsssdr/volk_gnsssdr_runtime.h>
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_32f_index_max_aligned16.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define ERR_DELTA (1e-4)
#define NUM_ITERS 1000000
#define VEC_LEN 3097
static float uniform() {
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
}
static void
random_floats (float *buf, unsigned n)
{
unsigned int i = 0;
for (; i < n; i++) {
buf[i] = uniform () * 32767;
}
}
#ifndef LV_HAVE_SSE
void qa_32f_index_max_aligned16::t1(){
printf("sse not available... no test performed\n");
}
#else
void qa_32f_index_max_aligned16::t1(){
const int vlen = VEC_LEN;
volk_gnsssdr_runtime_init();
volk_gnsssdr_environment_init();
int ret;
unsigned int* target_sse4_1;
unsigned int* target_sse;
unsigned int* target_generic;
float* src0 ;
unsigned int i_target_sse4_1;
target_sse4_1 = &i_target_sse4_1;
unsigned int i_target_sse;
target_sse = &i_target_sse;
unsigned int i_target_generic;
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
random_floats((float*)src0, vlen);
printf("32f_index_max_aligned16\n");
clock_t start, end;
double total;
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_gnsssdr_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_gnsssdr_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
get_volk_gnsssdr_runtime()->volk_gnsssdr_32f_index_max_aligned16(target_sse4_1, src0, vlen);
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse4.1 time: %f\n", total);
printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
free(src0);
}
#endif /*LV_HAVE_SSE3*/

View File

@ -0,0 +1,18 @@
#ifndef INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H
#define INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_32f_index_max_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_32f_index_max_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H */

View File

@ -0,0 +1,89 @@
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_32fc_index_max_aligned16.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define ERR_DELTA (1e-4)
#define NUM_ITERS 1000000
#define VEC_LEN 3096
static float uniform() {
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
}
static void
random_floats (float *buf, unsigned n)
{
unsigned int i = 0;
for (; i < n; i++) {
buf[i] = uniform () * 32767;
}
}
#ifndef LV_HAVE_SSE3
void qa_32fc_index_max_aligned16::t1(){
printf("sse3 not available... no test performed\n");
}
#else
void qa_32fc_index_max_aligned16::t1(){
const int vlen = VEC_LEN;
volk_gnsssdr_environment_init();
int ret;
unsigned int* target;
unsigned int* target_generic;
std::complex<float>* src0 ;
unsigned int i_target;
target = &i_target;
unsigned int i_target_generic;
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen << 3);
random_floats((float*)src0, vlen * 2);
printf("32fc_index_max_aligned16\n");
clock_t start, end;
double total;
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_gnsssdr_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_gnsssdr_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3 time: %f\n", total);
printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
free(src0);
}
#endif /*LV_HAVE_SSE3*/

View File

@ -0,0 +1,18 @@
#ifndef INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H
#define INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_32fc_index_max_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_32fc_index_max_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H */

View File

@ -0,0 +1,64 @@
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <qa_32fc_power_spectral_density_32f_aligned16.h>
#include <volk_gnsssdr/volk_gnsssdr_32fc_power_spectral_density_32f_aligned16.h>
#include <cstdlib>
#include <ctime>
//test for sse3
#ifndef LV_HAVE_SSE3
void qa_32fc_power_spectral_density_32f_aligned16::t1() {
printf("sse3 not available... no test performed\n");
}
#else
void qa_32fc_power_spectral_density_32f_aligned16::t1() {
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
__VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
__VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
const float scalar = vlen;
const float rbw = 1.7;
float* inputLoad = (float*)input0;
for(int i = 0; i < 2*vlen; ++i) {
inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
}
printf("32fc_power_spectral_density_32f_aligned\n");
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_generic, input0, scalar, rbw, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, scalar, rbw, vlen, "sse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3_time: %f\n", total);
for(int i = 0; i < 1; ++i) {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
}
}
#endif

View File

@ -0,0 +1,18 @@
#ifndef INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
#define INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_32fc_power_spectral_density_32f_aligned16 : public CppUnit::TestCase {
CPPUNIT_TEST_SUITE (qa_32fc_power_spectral_density_32f_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
};
#endif /* INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H */

View File

@ -0,0 +1,704 @@
#include "qa_utils.h"
#include <cstring>
#include <boost/foreach.hpp>
#include <boost/assign/list_of.hpp>
#include <boost/tokenizer.hpp>
#include <boost/xpressive/xpressive.hpp>
#include <iostream>
#include <vector>
#include <list>
#include <ctime>
#include <cmath>
#include <limits>
#include <boost/lexical_cast.hpp>
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <volk_gnsssdr/volk_gnsssdr_cpu.h>
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
#include <boost/typeof/typeof.hpp>
#include <boost/type_traits.hpp>
#include <stdio.h>
float uniform() {
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
}
template <class t>
void random_floats (t *buf, unsigned n)
{
for (unsigned i = 0; i < n; i++)
buf[i] = uniform ();
}
void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n) {
if(type.is_complex) n *= 2;
if(type.is_float) {
if(type.size == 8) random_floats<double>((double *)data, n);
else random_floats<float>((float *)data, n);
} else {
float int_max = float(uint64_t(2) << (type.size*8));
if(type.is_signed) int_max /= 2.0;
for(unsigned int i=0; i<n; i++) {
float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))) * int_max;
//man i really don't know how to do this in a more clever way, you have to cast down at some point
switch(type.size) {
case 8:
if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
break;
case 4:
if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
break;
case 2:
if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand;
else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
break;
case 1:
if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
break;
default:
throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here
}
}
}
}
static std::vector<std::string> get_arch_list(volk_gnsssdr_func_desc_t desc) {
std::vector<std::string> archlist;
for(size_t i = 0; i < desc.n_impls; i++) {
//if(!(archs[i+1] & volk_gnsssdr_get_lvarch())) continue; //this arch isn't available on this pc
archlist.push_back(std::string(desc.impl_names[i]));
}
return archlist;
}
volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) {
volk_gnsssdr_type_t type;
type.is_float = false;
type.is_scalar = false;
type.is_complex = false;
type.is_signed = false;
type.size = 0;
type.str = name;
if(name.size() < 2) throw std::string("name too short to be a datatype");
//is it a scalar?
if(name[0] == 's') {
type.is_scalar = true;
name = name.substr(1, name.size()-1);
}
//get the data size
size_t last_size_pos = name.find_last_of("0123456789");
if(last_size_pos == std::string::npos)
throw std::string("no size spec in type ").append(name);
//will throw if malformed
int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1));
assert(((size % 8) == 0) && (size <= 64) && (size != 0));
type.size = size/8; //in bytes
for(size_t i=last_size_pos+1; i < name.size(); i++) {
switch (name[i]) {
case 'f':
type.is_float = true;
break;
case 'i':
type.is_signed = true;
break;
case 'c':
type.is_complex = true;
break;
case 'u':
type.is_signed = false;
break;
default:
throw;
}
}
return type;
}
static void get_signatures_from_name(std::vector<volk_gnsssdr_type_t> &inputsig,
std::vector<volk_gnsssdr_type_t> &outputsig,
std::string name) {
boost::char_separator<char> sep("_");
boost::tokenizer<boost::char_separator<char> > tok(name, sep);
std::vector<std::string> toked;
tok.assign(name);
toked.assign(tok.begin(), tok.end());
assert(toked[0] == "volk");
toked.erase(toked.begin());
toked.erase(toked.begin());
//ok. we're assuming a string in the form
//(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT;
std::string fn_name;
volk_gnsssdr_type_t type;
BOOST_FOREACH(std::string token, toked) {
try {
type = volk_gnsssdr_type_from_string(token);
if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name...
if(side == SIDE_INPUT) inputsig.push_back(type);
else outputsig.push_back(type);
} catch (...){
if(token[0] == 'x') { //it's a multiplier
if(side == SIDE_INPUT) assert(inputsig.size() > 0);
else assert(outputsig.size() > 0);
int multiplier = boost::lexical_cast<int>(token.substr(1, token.size()-1)); //will throw if invalid
for(int i=1; i<multiplier; i++) {
if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
else outputsig.push_back(outputsig.back());
}
}
else if(side == SIDE_INPUT) { //it's the function name, at least it better be
side = SIDE_NAME;
fn_name.append("_");
fn_name.append(token);
}
else if(side == SIDE_OUTPUT) {
if(token != toked.back()) throw; //the last token in the name is the alignment
}
}
}
//we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input!
assert(inputsig.size() != 0);
}
inline void run_cast_test1(volk_gnsssdr_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], vlen, arch.c_str());
}
inline void run_cast_test2(volk_gnsssdr_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str());
}
inline void run_cast_test3(volk_gnsssdr_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str());
}
inline void run_cast_test4(volk_gnsssdr_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str());
}
inline void run_cast_test1_s32f(volk_gnsssdr_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
}
inline void run_cast_test2_s32f(volk_gnsssdr_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
}
inline void run_cast_test3_s32f(volk_gnsssdr_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
}
inline void run_cast_test1_s32fc(volk_gnsssdr_fn_1arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
}
inline void run_cast_test2_s32fc(volk_gnsssdr_fn_2arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
}
inline void run_cast_test3_s32fc(volk_gnsssdr_fn_3arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
}
//ADDED BY GNSS-SDR. START
inline void run_cast_test1_s8i(volk_gnsssdr_fn_1arg_s8i func, std::vector<void *> &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
}
inline void run_cast_test2_s8i(volk_gnsssdr_fn_2arg_s8i func, std::vector<void *> &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
}
inline void run_cast_test3_s8i(volk_gnsssdr_fn_3arg_s8i func, std::vector<void *> &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
}
inline void run_cast_test1_s8ic(volk_gnsssdr_fn_1arg_s8ic func, std::vector<void *> &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
}
inline void run_cast_test2_s8ic(volk_gnsssdr_fn_2arg_s8ic func, std::vector<void *> &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
}
inline void run_cast_test3_s8ic(volk_gnsssdr_fn_3arg_s8ic func, std::vector<void *> &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
}
inline void run_cast_test8(volk_gnsssdr_fn_8arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], vlen, arch.c_str());
}
inline void run_cast_test8_s8i(volk_gnsssdr_fn_8arg_s8i func, std::vector<void *> &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
}
inline void run_cast_test8_s8ic(volk_gnsssdr_fn_8arg_s8ic func, std::vector<void *> &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
}
inline void run_cast_test8_s32f(volk_gnsssdr_fn_8arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
}
inline void run_cast_test8_s32fc(volk_gnsssdr_fn_8arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str());
}
inline void run_cast_test12(volk_gnsssdr_fn_12arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], vlen, arch.c_str());
}
inline void run_cast_test12_s8i(volk_gnsssdr_fn_12arg_s8i func, std::vector<void *> &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
}
inline void run_cast_test12_s8ic(volk_gnsssdr_fn_12arg_s8ic func, std::vector<void *> &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
}
inline void run_cast_test12_s32f(volk_gnsssdr_fn_12arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
}
inline void run_cast_test12_s32fc(volk_gnsssdr_fn_12arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str());
}
//ADDED BY GNSS-SDR. END
// This function is a nop that helps resolve GNU Radio bugs 582 and 583.
// Without this the cast in run_volk_gnsssdr_tests for tol_i = static_cast<int>(float tol)
// won't happen on armhf (reported on cortex A9 and A15).
void lv_force_cast_hf( int tol_i, float tol_f)
{
int diff_i = 1;
float diff_f = 1;
if( diff_i > tol_i )
std::cout << "" ;
if( diff_f > tol_f )
std::cout << "" ;
}
template <class t>
bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
bool fail = false;
int print_max_errs = 10;
for(unsigned int i=0; i<vlen; i++) {
// for very small numbers we'll see round off errors due to limited
// precision. So a special test case...
if(fabs(((t *)(in1))[i]) < 1e-30) {
if( fabs( ((t *)(in2))[i] ) > tol )
{
fail=true;
if(print_max_errs-- > 0) {
std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl;
}
}
}
// the primary test is the percent different greater than given tol
else if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) {
fail=true;
if(print_max_errs-- > 0) {
std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl;
}
}
}
return fail;
}
template <class t>
bool ccompare(t *in1, t *in2, unsigned int vlen, float tol) {
bool fail = false;
int print_max_errs = 10;
for(unsigned int i=0; i<2*vlen; i+=2) {
t diff[2] = { in1[i] - in2[i], in1[i+1] - in2[i+1] };
t err = std::sqrt(diff[0] * diff[0] + diff[1] * diff[1]);
t norm = std::sqrt(in1[i] * in1[i] + in1[i+1] * in1[i+1]);
// for very small numbers we'll see round off errors due to limited
// precision. So a special test case...
if (norm < 1e-30) {
if (err > tol)
{
fail=true;
if(print_max_errs-- > 0) {
std::cout << "offset " << i/2 << " in1: " << in1[i] << " + " << in1[i+1] << "j in2: " << in2[i] << " + " << in2[i+1] << "j" << std::endl;
}
}
}
// the primary test is the percent different greater than given tol
else if((err / norm) > tol) {
fail=true;
if(print_max_errs-- > 0) {
std::cout << "offset " << i/2 << " in1: " << in1[i] << " + " << in1[i+1] << "j in2: " << in2[i] << " + " << in2[i+1] << "j" << std::endl;
}
}
}
return fail;
}
template <class t>
bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
bool fail = false;
int print_max_errs = 10;
for(unsigned int i=0; i<vlen; i++) {
if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) {
fail=true;
if(print_max_errs-- > 0) {
std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i])) << std::endl;
}
}
}
return fail;
}
class volk_gnsssdr_qa_aligned_mem_pool{
public:
void *get_new(size_t size){
size_t alignment = volk_gnsssdr_get_alignment();
void* ptr = volk_gnsssdr_malloc(size, alignment);
memset(ptr, 0x00, size);
_mems.push_back(ptr);
return ptr;
}
~volk_gnsssdr_qa_aligned_mem_pool() {
for(unsigned int ii = 0; ii < _mems.size(); ++ii) {
volk_gnsssdr_free(_mems[ii]);
}
}
private: std::vector<void * > _mems;
};
bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
void (*manual_func)(),
std::string name,
float tol,
lv_32fc_t scalar,
int vlen,
int iter,
std::vector<std::string> *best_arch_vector = 0,
std::string puppet_master_name = "NULL",
bool benchmark_mode,
std::string kernel_regex
) {
boost::xpressive::sregex kernel_expression = boost::xpressive::sregex::compile(kernel_regex);
if( !boost::xpressive::regex_search(name, kernel_expression) ) {
// in this case we have a regex and are only looking to test one kernel
return false;
}
std::cout << "RUN_VOLK_TESTS: " << name << "(" << vlen << "," << iter << ")" << std::endl;
// The multiply and lv_force_cast_hf are work arounds for GNU Radio bugs 582 and 583
// The bug is the casting/assignment below do not happen, which results in false
// positives when testing for errors in fcompare and icompare.
// Since this only happens on armhf (reported for Cortex A9 and A15) combined with
// the following fixes it is suspected to be a compiler bug.
// Bug 1272024 on launchpad has been filed with Linaro GCC.
const float tol_f = tol*1.0000001;
const unsigned int tol_i = static_cast<const unsigned int>(tol);
lv_force_cast_hf( tol_i, tol_f );
//first let's get a list of available architectures for the test
std::vector<std::string> arch_list = get_arch_list(desc);
if((!benchmark_mode) && (arch_list.size() < 2)) {
std::cout << "no architectures to test" << std::endl;
return false;
}
//something that can hang onto memory and cleanup when this function exits
volk_gnsssdr_qa_aligned_mem_pool mem_pool;
//now we have to get a function signature by parsing the name
std::vector<volk_gnsssdr_type_t> inputsig, outputsig;
get_signatures_from_name(inputsig, outputsig, name);
//pull the input scalars into their own vector
std::vector<volk_gnsssdr_type_t> inputsc;
for(size_t i=0; i<inputsig.size(); i++) {
if(inputsig[i].is_scalar) {
inputsc.push_back(inputsig[i]);
inputsig.erase(inputsig.begin() + i);
i -= 1;
}
}
//for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << inputsig[i].str << std::endl;
//for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << outputsig[i].str << std::endl;
std::vector<void *> inbuffs;
BOOST_FOREACH(volk_gnsssdr_type_t sig, inputsig) {
if(!sig.is_scalar) //we don't make buffers for scalars
inbuffs.push_back(mem_pool.get_new(vlen*sig.size*(sig.is_complex ? 2 : 1)));
}
for(size_t i=0; i<inbuffs.size(); i++) {
load_random_data(inbuffs[i], inputsig[i], vlen);
}
//ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
std::vector<std::vector<void *> > test_data;
for(size_t i=0; i<arch_list.size(); i++) {
std::vector<void *> arch_buffs;
for(size_t j=0; j<outputsig.size(); j++) {
arch_buffs.push_back(mem_pool.get_new(vlen*outputsig[j].size*(outputsig[j].is_complex ? 2 : 1)));
}
for(size_t j=0; j<inputsig.size(); j++) {
arch_buffs.push_back(inbuffs[j]);
}
test_data.push_back(arch_buffs);
}
std::vector<volk_gnsssdr_type_t> both_sigs;
both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end());
both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end());
//now run the test
clock_t start, end;
std::vector<double> profile_times;
for(size_t i = 0; i < arch_list.size(); i++) {
start = clock();
switch(both_sigs.size()) {
case 1:
if(inputsc.size() == 0) {
run_cast_test1((volk_gnsssdr_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test1_s32fc((volk_gnsssdr_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
} else {
run_cast_test1_s32f((volk_gnsssdr_fn_1arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
}
}
//ADDED BY GNSS-SDR. START
else if(inputsc.size() == 1 && !inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test1_s8ic((volk_gnsssdr_fn_1arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
} else {
run_cast_test1_s8i((volk_gnsssdr_fn_1arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
}
}
//ADDED BY GNSS-SDR. END
else throw "unsupported 1 arg function >1 scalars";
break;
case 2:
if(inputsc.size() == 0) {
run_cast_test2((volk_gnsssdr_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test2_s32fc((volk_gnsssdr_fn_2arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
} else {
run_cast_test2_s32f((volk_gnsssdr_fn_2arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
}
}
//ADDED BY GNSS-SDR. START
else if(inputsc.size() == 1 && !inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test2_s8ic((volk_gnsssdr_fn_2arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
} else {
run_cast_test2_s8i((volk_gnsssdr_fn_2arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
}
}
//ADDED BY GNSS-SDR. END
else throw "unsupported 2 arg function >1 scalars";
break;
case 3:
if(inputsc.size() == 0) {
run_cast_test3((volk_gnsssdr_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test3_s32fc((volk_gnsssdr_fn_3arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
} else {
run_cast_test3_s32f((volk_gnsssdr_fn_3arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
}
}
//ADDED BY GNSS-SDR. START
else if(inputsc.size() == 1 && !inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test3_s8ic((volk_gnsssdr_fn_3arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
} else {
run_cast_test3_s8i((volk_gnsssdr_fn_3arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
}
}
//ADDED BY GNSS-SDR. END
else throw "unsupported 3 arg function >1 scalars";
break;
case 4:
run_cast_test4((volk_gnsssdr_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
break;
//ADDED BY GNSS-SDR. START
case 8:
if(inputsc.size() == 0) {
run_cast_test8((volk_gnsssdr_fn_8arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test8_s32fc((volk_gnsssdr_fn_8arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
} else {
run_cast_test8_s32f((volk_gnsssdr_fn_8arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
}
}
else if(inputsc.size() == 1 && !inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test8_s8ic((volk_gnsssdr_fn_8arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
} else {
run_cast_test8_s8i((volk_gnsssdr_fn_8arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
}
}
else throw "unsupported 8 arg function >1 scalars";
break;
case 12:
if(inputsc.size() == 0) {
run_cast_test12((volk_gnsssdr_fn_12arg)(manual_func), test_data[i], vlen, iter, arch_list[i]);
} else if(inputsc.size() == 1 && inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test12_s32fc((volk_gnsssdr_fn_12arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
} else {
run_cast_test12_s32f((volk_gnsssdr_fn_12arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
}
}
else if(inputsc.size() == 1 && !inputsc[0].is_float) {
if(inputsc[0].is_complex) {
run_cast_test12_s8ic((volk_gnsssdr_fn_12arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]);
} else {
run_cast_test12_s8i((volk_gnsssdr_fn_12arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]);
}
}
else throw "unsupported 12 arg function >1 scalars";
break;
//ADDED BY GNSS-SDR. END
default:
throw "no function handler for this signature";
break;
}
end = clock();
double arch_time = 1000.0 * (double)(end-start)/(double)CLOCKS_PER_SEC;
std::cout << arch_list[i] << " completed in " << arch_time << "ms" << std::endl;
profile_times.push_back(arch_time);
}
//and now compare each output to the generic output
//first we have to know which output is the generic one, they aren't in order...
size_t generic_offset=0;
for(size_t i=0; i<arch_list.size(); i++)
if(arch_list[i] == "generic") generic_offset=i;
//now compare
//if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
bool fail = false;
bool fail_global = false;
std::vector<bool> arch_results;
for(size_t i=0; i<arch_list.size(); i++) {
fail = false;
if(i != generic_offset) {
for(size_t j=0; j<both_sigs.size(); j++) {
if(both_sigs[j].is_float) {
if(both_sigs[j].size == 8) {
if (both_sigs[j].is_complex) {
fail = ccompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen, tol_f);
} else {
fail = fcompare((double *) test_data[generic_offset][j], (double *) test_data[i][j], vlen, tol_f);
}
} else {
if (both_sigs[j].is_complex) {
fail = ccompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen, tol_f);
} else {
fail = fcompare((float *) test_data[generic_offset][j], (float *) test_data[i][j], vlen, tol_f);
}
}
} else {
//i could replace this whole switch statement with a memcmp if i wasn't interested in printing the outputs where they differ
switch(both_sigs[j].size) {
case 8:
if(both_sigs[j].is_signed) {
fail = icompare((int64_t *) test_data[generic_offset][j], (int64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
} else {
fail = icompare((uint64_t *) test_data[generic_offset][j], (uint64_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
}
break;
case 4:
if(both_sigs[j].is_signed) {
fail = icompare((int32_t *) test_data[generic_offset][j], (int32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
} else {
fail = icompare((uint32_t *) test_data[generic_offset][j], (uint32_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
}
break;
case 2:
if(both_sigs[j].is_signed) {
fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
} else {
fail = icompare((uint16_t *) test_data[generic_offset][j], (uint16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
}
break;
case 1:
if(both_sigs[j].is_signed) {
fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
} else {
fail = icompare((uint8_t *) test_data[generic_offset][j], (uint8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
}
break;
default:
fail=1;
}
}
if(fail) {
fail_global = true;
std::cout << name << ": fail on arch " << arch_list[i] << std::endl;
}
//fail = memcmp(outbuffs[generic_offset], outbuffs[i], outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
}
}
arch_results.push_back(!fail);
}
double best_time_a = std::numeric_limits<double>::max();
double best_time_u = std::numeric_limits<double>::max();
std::string best_arch_a = "generic";
std::string best_arch_u = "generic";
for(size_t i=0; i < arch_list.size(); i++)
{
if((profile_times[i] < best_time_u) && arch_results[i] && desc.impl_alignment[i] == 0)
{
best_time_u = profile_times[i];
best_arch_u = arch_list[i];
}
if((profile_times[i] < best_time_a) && arch_results[i])
{
best_time_a = profile_times[i];
best_arch_a = arch_list[i];
}
}
std::cout << "Best aligned arch: " << best_arch_a << std::endl;
std::cout << "Best unaligned arch: " << best_arch_u << std::endl;
if(best_arch_vector) {
if(puppet_master_name == "NULL") {
best_arch_vector->push_back(name + " " + best_arch_a + " " + best_arch_u);
}
else {
best_arch_vector->push_back(puppet_master_name + " " + best_arch_a + " " + best_arch_u);
}
}
return fail_global;
}

View File

@ -0,0 +1,62 @@
#ifndef VOLK_QA_UTILS_H
#define VOLK_QA_UTILS_H
#include <cstdlib>
#include <string>
#include <vector>
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <volk_gnsssdr/volk_gnsssdr_common.h>
struct volk_gnsssdr_type_t {
bool is_float;
bool is_scalar;
bool is_signed;
bool is_complex;
int size;
std::string str;
};
volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string);
float uniform(void);
void random_floats(float *buf, unsigned n);
bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t, void(*)(), std::string, float, lv_32fc_t, int, int, std::vector<std::string> *, std::string, bool benchmark_mode=false, std::string kernel_regex="");
#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0, "NULL"), 0); }
#define VOLK_PROFILE(func, tol, scalar, len, iter, results, bnmode, kernel_regex) run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results, "NULL", bnmode, kernel_regex)
#define VOLK_PUPPET_PROFILE(func, puppet_master_func, tol, scalar, len, iter, results, bnmode, kernel_regex) run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results, std::string(#puppet_master_func), bnmode, kernel_regex)
typedef void (*volk_gnsssdr_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place
typedef void (*volk_gnsssdr_fn_2arg)(void *, void *, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_3arg)(void *, void *, void *, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input
typedef void (*volk_gnsssdr_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_1arg_s32fc)(void *, lv_32fc_t, unsigned int, const char*); //one input vector, one scalar float input
typedef void (*volk_gnsssdr_fn_2arg_s32fc)(void *, void *, lv_32fc_t, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_3arg_s32fc)(void *, void *, void *, lv_32fc_t, unsigned int, const char*);
//ADDED BY GNSS-SDR. START
typedef void (*volk_gnsssdr_fn_1arg_s8i)(void *, char, unsigned int, const char*); //one input vector, one scalar char input
typedef void (*volk_gnsssdr_fn_2arg_s8i)(void *, void *, char, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_3arg_s8i)(void *, void *, void *, char, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_1arg_s8ic)(void *, lv_8sc_t, unsigned int, const char*); //one input vector, one scalar lv_8sc_t vector input
typedef void (*volk_gnsssdr_fn_2arg_s8ic)(void *, void *, lv_8sc_t, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_3arg_s8ic)(void *, void *, void *, lv_8sc_t, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_8arg)(void *, void *, void *, void *, void *, void *, void *, void *, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_8arg_s32f)(void *, void *, void *, void *, void *, void *, void *, void *, float, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_8arg_s32fc)(void *, void *, void *, void *, void *, void *, void *, void *, lv_32fc_t, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_8arg_s8i)(void *, void *, void *, void *, void *, void *, void *, void *, char, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_8arg_s8ic)(void *, void *, void *, void *, void *, void *, void *, void *, lv_8sc_t, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_12arg)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_12arg_s32f)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, float, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_12arg_s32fc)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, lv_32fc_t, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_12arg_s8i)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, char, unsigned int, const char*);
typedef void (*volk_gnsssdr_fn_12arg_s8ic)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, lv_8sc_t, unsigned int, const char*);
//ADDED BY GNSS-SDR. END
#endif //VOLK_QA_UTILS_H

View File

@ -0,0 +1,67 @@
/* -*- c++ -*- */
/*
* Copyright 2012-2014 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#include "qa_utils.h"
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <boost/test/unit_test.hpp>
//GNSS-SDR PROTO-KERNELS
VOLK_RUN_TESTS(volk_gnsssdr_32fc_x2_multiply_32fc, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_x2_dot_prod_32fc, 1e-4, 0, 204603, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204603, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_conjugate_32fc, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32f_x2_add_32f, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32f_index_max_16u, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8i_index_max_16u, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32f_accumulator_s32f, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_magnitude_squared_32f, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_16i_s32f_convert_32f, 1e-4, 32768.0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_8i_max_s8i, 3, 0, 20462, 1);
//VOLK_RUN_TESTS(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000);
//VOLK_RUN_TESTS(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000);
//VOLK_RUN_TESTS(volk_gnsssdr_16i_max_star_16i, 0, 0, 20462, 10000);
//VOLK_RUN_TESTS(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 20462, 10000);
//VOLK_RUN_TESTS(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 1000);
//VOLK_RUN_TESTS(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 1000);
//VOLK_RUN_TESTS(volk_gnsssdr_16i_32fc_dot_prod_32fc, 1e-4, 0, 204602, 1);
//VOLK_RUN_TESTS(volk_gnsssdr_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000);
//VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 10000);
//VOLK_RUN_TESTS(volk_gnsssdr_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000);
//VOLK_RUN_TESTS(volk_gnsssdr_32u_popcnt, 0, 0, 2046, 10000);
//VOLK_RUN_TESTS(volk_gnsssdr_64u_popcnt, 0, 0, 2046, 10000);

View File

@ -0,0 +1,142 @@
/* -*- c -*- */
/*
* Copyright 2014 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#include <pthread.h>
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
#include <stdio.h>
#include <stdlib.h>
/*
* For #defines used to determine support for allocation functions,
* see: http://linux.die.net/man/3/aligned_alloc
*/
// Disabling use of aligned_alloc. This function requires that size be
// a multiple of alignment, which is too restrictive for many uses of
// VOLK.
//// If we are using C11 standard, use the aligned_alloc
//#ifdef _ISOC11_SOURCE
//
//void *volk_gnsssdr_malloc(size_t size, size_t alignment)
//{
// void *ptr = aligned_alloc(alignment, size);
// if(ptr == NULL) {
// fprintf(stderr, "VOLK: Error allocating memory (aligned_alloc)\n");
// }
// return ptr;
//}
//
//void volk_gnsssdr_free(void *ptr)
//{
// free(ptr);
//}
//
//#else // _ISOC11_SOURCE
// Otherwise, test if we are a POSIX or X/Open system
// This only has a restriction that alignment be a power of 2.
#if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN
void *volk_gnsssdr_malloc(size_t size, size_t alignment)
{
void *ptr;
int err = posix_memalign(&ptr, alignment, size);
if(err == 0) {
return ptr;
}
else {
fprintf(stderr, "VOLK: Error allocating memory (posix_memalign: %d)\n", err);
return NULL;
}
}
void volk_gnsssdr_free(void *ptr)
{
free(ptr);
}
// _aligned_malloc has no restriction on size,
// available on Windows since Visual C++ 2005
#elif _MSC_VER >= 1400
void *volk_gnsssdr_malloc(size_t size, size_t alignment)
{
void *ptr = _aligned_malloc(size, alignment);
if(ptr == NULL) {
fprintf(stderr, "VOLK: Error allocating memory (_aligned_malloc)\n");
}
return ptr;
}
void volk_gnsssdr_free(void *ptr)
{
_aligned_free(ptr);
}
// No standard handlers; we'll do it ourselves.
#else // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN
struct block_info
{
void *real;
};
void *
volk_gnsssdr_malloc(size_t size, size_t alignment)
{
void *real, *user;
struct block_info *info;
/* At least align to sizeof our struct */
if (alignment < sizeof(struct block_info))
alignment = sizeof(struct block_info);
/* Alloc */
real = malloc(size + (2 * alignment - 1));
/* Get pointer to the various zones */
user = (void *)((((uintptr_t) real) + sizeof(struct block_info) + alignment - 1) & ~(alignment - 1));
info = (struct block_info *)(((uintptr_t)user) - sizeof(struct block_info));
/* Store the info for the free */
info->real = real;
/* Return pointer to user */
return user;
}
void
volk_gnsssdr_free(void *ptr)
{
struct block_info *info;
/* Get the real pointer */
info = (struct block_info *)(((uintptr_t)ptr) - sizeof(struct block_info));
/* Release real pointer */
free(info->real);
}
#endif // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN
//#endif // _ISOC11_SOURCE

View File

@ -0,0 +1,50 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <volk_gnsssdr/volk_gnsssdr_prefs.h>
//#if defined(_WIN32)
//#include <Windows.h>
//#endif
void volk_gnsssdr_get_config_path(char *path)
{
const char *suffix = "/.volk_gnsssdr/volk_gnsssdr_config";
char *home = NULL;
if (home == NULL) home = getenv("HOME");
if (home == NULL) home = getenv("APPDATA");
if (home == NULL){
path = NULL;
return;
}
strcpy(path, home);
strcat(path, suffix);
}
size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **prefs_res)
{
FILE *config_file;
char path[512], line[512];
size_t n_arch_prefs = 0;
volk_gnsssdr_arch_pref_t *prefs = NULL;
//get the config path
volk_gnsssdr_get_config_path(path);
if (path == NULL) return n_arch_prefs; //no prefs found
config_file = fopen(path, "r");
if(!config_file) return n_arch_prefs; //no prefs found
//reset the file pointer and write the prefs into volk_gnsssdr_arch_prefs
while(fgets(line, sizeof(line), config_file) != NULL)
{
prefs = (volk_gnsssdr_arch_pref_t *) realloc(prefs, (n_arch_prefs+1) * sizeof(*prefs));
volk_gnsssdr_arch_pref_t *p = prefs + n_arch_prefs;
if(sscanf(line, "%s %s %s", p->name, p->impl_a, p->impl_u) == 3 && !strncmp(p->name, "volk_gnsssdr_", 5))
{
n_arch_prefs++;
}
}
fclose(config_file);
*prefs_res = prefs;
return n_arch_prefs;
}

View File

@ -0,0 +1,119 @@
/*
* Copyright 2011-2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#include <volk_gnsssdr_rank_archs.h>
#include <volk_gnsssdr/volk_gnsssdr_prefs.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4
#define __popcnt __builtin_popcount
#else
inline unsigned __popcnt(unsigned num)
{
unsigned pop = 0;
while(num)
{
if (num & 0x1) pop++;
num >>= 1;
}
return pop;
}
#endif
int volk_gnsssdr_get_index(
const char *impl_names[], //list of implementations by name
const size_t n_impls, //number of implementations available
const char *impl_name //the implementation name to find
){
unsigned int i;
for (i = 0; i < n_impls; i++) {
if(!strncmp(impl_names[i], impl_name, 20)) {
return i;
}
}
//TODO return -1;
//something terrible should happen here
printf("Volk warning: no arch found, returning generic impl\n");
return volk_gnsssdr_get_index(impl_names, n_impls, "generic"); //but we'll fake it for now
}
int volk_gnsssdr_rank_archs(
const char *kern_name, //name of the kernel to rank
const char *impl_names[], //list of implementations by name
const int* impl_deps, //requirement mask per implementation
const bool* alignment, //alignment status of each implementation
size_t n_impls, //number of implementations available
const bool align //if false, filter aligned implementations
){
size_t i;
static volk_gnsssdr_arch_pref_t *volk_gnsssdr_arch_prefs;
static size_t n_arch_prefs = 0;
static int prefs_loaded = 0;
if(!prefs_loaded) {
n_arch_prefs = volk_gnsssdr_load_preferences(&volk_gnsssdr_arch_prefs);
prefs_loaded = 1;
}
// If we've defined VOLK_GENERIC to be anything, always return the
// 'generic' kernel. Used in GR's QA code.
char *gen_env = getenv("VOLK_GENERIC");
if(gen_env) {
return volk_gnsssdr_get_index(impl_names, n_impls, "generic");
}
//now look for the function name in the prefs list
for(i = 0; i < n_arch_prefs; i++)
{
if(!strncmp(kern_name, volk_gnsssdr_arch_prefs[i].name, sizeof(volk_gnsssdr_arch_prefs[i].name))) //found it
{
const char *impl_name = align? volk_gnsssdr_arch_prefs[i].impl_a : volk_gnsssdr_arch_prefs[i].impl_u;
return volk_gnsssdr_get_index(impl_names, n_impls, impl_name);
}
}
//return the best index with the largest deps
size_t best_index_a = 0;
size_t best_index_u = 0;
int best_value_a = -1;
int best_value_u = -1;
for(i = 0; i < n_impls; i++)
{
const signed val = __popcnt(impl_deps[i]);
if (alignment[i] && val > best_value_a)
{
best_index_a = i;
best_value_a = val;
}
if (!alignment[i] && val > best_value_u)
{
best_index_u = i;
best_value_u = val;
}
}
//when align and we found a best aligned, use it
if (align && best_value_a != -1) return best_index_a;
//otherwise return the best unaligned
return best_index_u;
}

View File

@ -0,0 +1,50 @@
/*
* Copyright 2011-2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#ifndef INCLUDED_VOLK_RANK_ARCHS_H
#define INCLUDED_VOLK_RANK_ARCHS_H
#include <stdlib.h>
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
int volk_gnsssdr_get_index(
const char *impl_names[], //list of implementations by name
const size_t n_impls, //number of implementations available
const char *impl_name //the implementation name to find
);
int volk_gnsssdr_rank_archs(
const char *kern_name, //name of the kernel to rank
const char *impl_names[], //list of implementations by name
const int* impl_deps, //requirement mask per implementation
const bool* alignment, //alignment status of each implementation
size_t n_impls, //number of implementations available
const bool align //if false, filter aligned implementations
);
#ifdef __cplusplus
}
#endif
#endif /*INCLUDED_VOLK_RANK_ARCHS_H*/

View File

@ -0,0 +1,25 @@
.function volk_gnsssdr_16ic_magnitude_32f_a_orc_impl
.source 4 src
.dest 4 dst
.floatparam 4 scalar
.temp 4 reall
.temp 4 imagl
.temp 2 reals
.temp 2 imags
.temp 4 realf
.temp 4 imagf
.temp 4 sumf
splitlw reals, imags, src
convswl reall, reals
convswl imagl, imags
convlf realf, reall
convlf imagf, imagl
divf realf, realf, scalar
divf imagf, imagf, scalar
mulf realf, realf, realf
mulf imagf, imagf, imagf
addf sumf, realf, imagf
sqrtf dst, sumf

View File

@ -0,0 +1,5 @@
.function volk_gnsssdr_32f_x2_add_32f_a_orc_impl
.dest 4 dst
.source 4 src1
.source 4 src2
addf dst, src1, src2

View File

@ -0,0 +1,18 @@
.function volk_gnsssdr_32fc_s32fc_multiply_32fc_a_orc_impl
.source 8 src1
.floatparam 8 scalar
.dest 8 dst
.temp 8 iqprod
.temp 4 real
.temp 4 imag
.temp 4 ac
.temp 4 bd
.temp 8 swapped
x2 mulf iqprod, src1, scalar
splitql bd, ac, iqprod
subf real, ac, bd
swaplq swapped, src1
x2 mulf iqprod, swapped, scalar
splitql bd, ac, iqprod
addf imag, ac, bd
mergelq dst, real, imag

View File

@ -0,0 +1,18 @@
.function volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl
.source 8 src1
.source 8 src2
.dest 8 dst
.temp 8 iqprod
.temp 4 real
.temp 4 imag
.temp 4 ac
.temp 4 bd
.temp 8 swapped
x2 mulf iqprod, src1, src2
splitql bd, ac, iqprod
subf real, ac, bd
swaplq swapped, src1
x2 mulf iqprod, swapped, src2
splitql bd, ac, iqprod
addf imag, ac, bd
mergelq dst, real, imag

View File

@ -0,0 +1,40 @@
#/*!
# * \file volk_gnsssdr_8i_accumulator_s8i.orc
# * \brief ORC implementation: 8 bits (char) scalar accumulator
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that implements an accumulator of char values
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8i_accumulator_s8i_a_orc_impl
.source 1 src1
.accumulator 2 acc
.temp 2 sum
mergebw sum, 0, src1
accw acc, sum

View File

@ -0,0 +1,39 @@
#/*!
# * \file volk_gnsssdr_8i_x2_add_8i.orc
# * \brief ORC implementation: adds pairs of 8 bits (char) scalars
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that adds pairs of 8 bits (char) scalars
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8i_x2_add_8i_a_orc_impl
.dest 1 dst
.source 1 src1
.source 1 src2
addb dst, src1, src2

View File

@ -0,0 +1,42 @@
#/*!
# * \file volk_gnsssdr_8ic_conjugate_8ic.orc
# * \brief ORC implementation: calculates the conjugate of a 16 bits vector
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that calculates the conjugate of a
# * 16 bits vector (8 bits the real part and 8 bits the imaginary part)
# * result = (real*real) + (imag*imag)
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl
.source 2 src1
.dest 2 dst
.temp 2 merged
mergebw merged, 1, -1
x2 mullb dst, merged, src1

View File

@ -0,0 +1,45 @@
#/*!
# * \file volk_gnsssdr_8ic_magnitude_squared_8i.orc
# * \brief ORC implementation: calculates the magnitude squared of a 16 bits vector
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that calculates the magnitude squared of a
# * 16 bits vector (8 bits the real part and 8 bits the imaginary part)
# * result = (real*real) + (imag*imag)
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl
.source 2 src1
.dest 1 dst
.temp 2 iqprod
.temp 1 ac
.temp 1 bd
x2 mullb iqprod, src1, src1
splitwb bd, ac, iqprod
addb dst, ac, bd

View File

@ -0,0 +1,58 @@
#/*!
# * \file volk_gnsssdr_8ic_s8ic_multiply_8ic.orc
# * \brief ORC implementation: multiplies a group of 16 bits vectors by one constant vector
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that multiplies a group of 16 bits vectors
# * (8 bits the real part and 8 bits the imaginary part) by one constant vector
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl
.source 2 src1
.param 2 src2real
.param 2 src2imag
.dest 2 dst
.temp 2 iqprod
.temp 1 real
.temp 1 imag
.temp 1 rr
.temp 1 ii
.temp 1 ri
.temp 1 ir
x2 mullb iqprod, src1, src2real
splitwb ir, rr, iqprod
x2 mullb iqprod, src1, src2imag
splitwb ii, ri, iqprod
subb real, rr, ii
addb imag, ri, ir
mergebw dst, real, imag

View File

@ -0,0 +1,59 @@
#/*!
# * \file volk_gnsssdr_8ic_x2_dot_prod_8ic.orc
# * \brief ORC implementation: multiplies two 16 bits vectors and accumulates them
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that multiplies two 16 bits vectors (8 bits the real part
# * and 8 bits the imaginary part) and accumulates them
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl
.source 2 src1
.source 2 src2
.accumulator 2 accreal
.accumulator 2 accimag
.temp 2 iqprod
.temp 1 real
.temp 1 imag
.temp 2 real2
.temp 2 imag2
.temp 1 ac
.temp 1 bd
.temp 2 swapped
x2 mullb iqprod, src1, src2
splitwb bd, ac, iqprod
subb real, ac, bd
swapw swapped, src1
x2 mullb iqprod, swapped, src2
splitwb bd, ac, iqprod
addb imag, ac, bd
mergebw real2, 0, real
accw accreal, real2
mergebw imag2, 0, imag
accw accimag, imag2

View File

@ -0,0 +1,57 @@
#/*!
# * \file volk_gnsssdr_8ic_x2_multiply_8ic.orc
# * \brief ORC implementation: multiplies two 16 bits vectors
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that multiplies two 16 bits vectors (8 bits the real part
# * and 8 bits the imaginary part)
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl
.source 2 src1
.source 2 src2
.dest 2 dst
.temp 2 iqprod
.temp 1 real
.temp 1 imag
.temp 1 ac
.temp 1 bd
.temp 2 swapped
x2 mullb iqprod, src1, src2
splitwb bd, ac, iqprod
subb real, ac, bd
swapw swapped, src1
x2 mullb iqprod, swapped, src2
splitwb bd, ac, iqprod
addb imag, ac, bd
mergebw dst, real, imag

View File

@ -0,0 +1,139 @@
#/*!
# * \file volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.orc
# * \brief ORC implementation: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that performs the carrier wipe-off mixing and the
# * Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
# * real part and 8 bits the imaginary part):
# * - The carrier wipe-off is done by multiplying the input signal by the
# * carrier (multiplication of 16 bits vectors) It returns the input
# * signal in base band (BB)
# * - Early values are calculated by multiplying the input signal in BB by the
# * early code (multiplication of 16 bits vectors), accumulating the results
# * - Prompt values are calculated by multiplying the input signal in BB by the
# * prompt code (multiplication of 16 bits vectors), accumulating the results
# * - Late values are calculated by multiplying the input signal in BB by the
# * late code (multiplication of 16 bits vectors), accumulating the results
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl
.source 2 input
.source 2 carrier
.source 2 E_code
.source 2 P_code
.accumulator 2 E_out_real
.accumulator 2 E_out_imag
.accumulator 2 P_out_real
.accumulator 2 P_out_imag
.temp 2 bb_signal_sample
.temp 2 iqprod
.temp 1 real
.temp 1 imag
.temp 1 ac
.temp 1 bd
.temp 2 swapped
.temp 2 real2
.temp 2 imag2
x2 mullb iqprod, input, carrier
splitwb bd, ac, iqprod
subb real, ac, bd
swapw swapped, input
x2 mullb iqprod, swapped, carrier
splitwb bd, ac, iqprod
addb imag, ac, bd
mergebw bb_signal_sample, real, imag
swapw swapped, bb_signal_sample
x2 mullb iqprod, bb_signal_sample, E_code
splitwb bd, ac, iqprod
subb real, ac, bd
x2 mullb iqprod, swapped, E_code
splitwb bd, ac, iqprod
addb imag, ac, bd
mergebw real2, 0, real
mergebw imag2, 0, imag
accw E_out_real, real2
accw E_out_imag, imag2
x2 mullb iqprod, bb_signal_sample, P_code
splitwb bd, ac, iqprod
subb real, ac, bd
x2 mullb iqprod, swapped, P_code
splitwb bd, ac, iqprod
addb imag, ac, bd
mergebw real2, 0, real
mergebw imag2, 0, imag
accw P_out_real, real2
accw P_out_imag, imag2
.function volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl
.source 2 input
.source 2 carrier
.source 2 L_code
.accumulator 2 L_out_real
.accumulator 2 L_out_imag
.temp 2 bb_signal_sample
.temp 2 iqprod
.temp 1 real
.temp 1 imag
.temp 1 ac
.temp 1 bd
.temp 2 swapped
.temp 2 real2
.temp 2 imag2
x2 mullb iqprod, input, carrier
splitwb bd, ac, iqprod
subb real, ac, bd
swapw swapped, input
x2 mullb iqprod, swapped, carrier
splitwb bd, ac, iqprod
addb imag, ac, bd
mergebw bb_signal_sample, real, imag
swapw swapped, bb_signal_sample
x2 mullb iqprod, bb_signal_sample, L_code
splitwb bd, ac, iqprod
subb real, ac, bd
x2 mullb iqprod, swapped, L_code
splitwb bd, ac, iqprod
addb imag, ac, bd
mergebw real2, 0, real
mergebw imag2, 0, imag
accw L_out_real, real2
accw L_out_imag, imag2

View File

@ -0,0 +1,39 @@
#/*!
# * \file volk_gnsssdr_8u_x2_multiply_8u.orc
# * \brief ORC implementation: multiplies unsigned char values
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that multiplies unsigned char values (8 bits data)
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl
.source 1 src1
.source 1 src2
.dest 1 dst
mullb dst, src1, src2

View File

@ -0,0 +1,39 @@
# Copyright 2013 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
########################################################################
# Install python files and apps
########################################################################
include(GrPython)
VOLK_PYTHON_INSTALL(
FILES
__init__.py
cfg.py
volk_gnsssdr_modtool_generate.py
DESTINATION ${VOLK_PYTHON_DIR}/volk_gnsssdr_modtool
COMPONENT "volk_gnsssdr"
)
VOLK_PYTHON_INSTALL(
PROGRAMS
volk_gnsssdr_modtool
DESTINATION ${VOLK_RUNTIME_DIR}
COMPONENT "volk_gnsssdr"
)

View File

@ -0,0 +1,114 @@
The volk_gnsssdr_modtool tool is installed along with VOLK as a way of helping
to construct, add to, and interogate the VOLK library or companion
libraries.
volk_gnsssdr_modtool is installed into $prefix/bin.
VOLK modtool enables creating standalone (out-of-tree) VOLK modules
and provides a few tools for sharing VOLK kernels between VOLK
modules. If you need to design or work with VOLK kernels away from
the canonical VOLK library, this is the tool. If you need to tailor
your own VOLK library for whatever reason, this is the tool.
The canonical VOLK library installs a volk_gnsssdr.h and a libvolk_gnsssdr.so. Your
own library will install volk_gnsssdr_$name.h and libvolk_gnsssdr_$name.so. Ya Gronk?
Good.
There isn't a substantial difference between the canonical VOLK
module and any other VOLK module. They're all peers. Any module
created via VOLK modtool will come complete with a default
volk_gnsssdr_modtool.cfg file associating the module with the base from which
it came, its distinctive $name and its destination (or path). These
values (created from user input if VOLK modtool runs without a
user-supplied config file or a default config file) serve as default
values for some VOLK modtool actions. It's more or less intended for
the user to change directories to the top level of a created VOLK
module and then run volk_gnsssdr_modtool to take advantage of the values
stored in the default volk_gnsssdr_modtool.cfg file.
Apart from creating new VOLK modules, VOLK modtool allows you to list
the names of kernels in other modules, list the names of kernels in
the current module, add kernels from another module into the current
module, and remove kernels from the current module. When moving
kernels between modules, VOLK modtool does its best to keep the qa
and profiling code for those kernels intact. If the base has a test
or a profiling call for some kernel, those calls will follow the
kernel when VOLK modtool adds that kernel. If QA or profiling
requires a puppet kernel, the puppet kernel will follow the original
kernel when VOLK modtool adds that original kernel. VOLK modtool
respects puppets.
======================================================================
Installing a new VOLK Library:
Run the command "volk_gnsssdr_modtool -i". This will ask you three questions:
name: // the name to give your VOLK library: volk_gnsssdr_<name>
destination: // directory new source tree is built under -- must exists.
// It will create <directory>/volk_gnsssdr_<name>
base: // the directory containing the original VOLK source code
The name provided must be alphanumeric (and cannot start with a
number). No special characters including dashes and underscores are
allowed.
This will build a new skeleton directory in the destination provided
with the name volk_gnsssdr_<name>. It will contain the necessary structure to
build:
mkdir build
cd build
cmake -DCMAKE_INSTALL_PREFIX=/opt/volk_gnsssdr ../
make
sudo make install
Right now, the library is empty and contains no kernels. Kernels can
be added from another VOLK library using the '-a' option. If not
specified, the kernel will be extracted from the base VOLK
directory. Using the '-b' allows us to specify another VOLK library to
use for this purpose.
volk_gnsssdr_modtool -a -n 32fc_x2_conjugate_dot_prod_32fc
This will put the code for the new kernel into
<destination>/volk_gnsssdr_<name>/kernels/volk_gnsssdr_<name>/
Other kernels must be added by hand. See the following webpages for
more information about creating VOLK kernels:
http://gnuradio.org/doc/doxygen/volk_gnsssdr_guide.html
http://gnuradio.org/redmine/projects/gnuradio/wiki/Volk
======================================================================
OPTIONS
Options for Adding and Removing Kernels:
-a, --add_kernel
Add kernel from existing VOLK module. Uses the base VOLK module
unless -b is used. Use -n to specify the kernel name.
Requires: -n.
Optional: -b
-A, --add_all_kernels
Add all kernels from existing VOLK module. Uses the base VOLK
module unless -b is used.
Optional: -b
-x, --remove_kernel
Remove kernel from module.
Required: -n.
Optional: -b
Options for Listing Kernels:
-l, --list
Lists all kernels available in the base VOLK module.
-k, --kernels
Lists all kernels in this VOLK module.
-r, --remote-list
Lists all kernels in another VOLK module that is specified
using the -b option.

View File

@ -0,0 +1,24 @@
#!/usr/bin/env python
#
# Copyright 2013 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
#
from cfg import volk_gnsssdr_modtool_config
from volk_gnsssdr_modtool_generate import volk_gnsssdr_modtool

View File

@ -0,0 +1,104 @@
#!/usr/bin/env python
#
# Copyright 2013 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
#
import ConfigParser
import sys
import os
import exceptions
import re
class volk_gnsssdr_modtool_config:
def key_val_sub(self, num, stuff, section):
return re.sub('\$' + 'k' + str(num), stuff[num][0], (re.sub('\$' + str(num), stuff[num][1], section[1][num])));
def verify(self):
for i in self.verification:
self.verify_section(i)
def remap(self):
for i in self.remapification:
self.verify_section(i)
def verify_section(self, section):
stuff = self.cfg.items(section[0])
for i in range(len(section[1])):
eval(self.key_val_sub(i, stuff, section))
try:
val = eval(self.key_val_sub(i, stuff, section))
if val == False:
raise exceptions.ValueError
except ValueError:
raise exceptions.ValueError('Verification function returns False... key:%s, val:%s'%(stuff[i][0], stuff[i][1]))
except:
raise exceptions.IOError('bad configuration... key:%s, val:%s'%(stuff[i][0], stuff[i][1]))
def __init__(self, cfg=None):
self.config_name = 'config'
self.config_defaults = ['name', 'destination', 'base']
self.config_defaults_remap = ['1',
'self.cfg.set(self.config_name, \'$k1\', os.path.realpath(os.path.expanduser(\'$1\')))',
'self.cfg.set(self.config_name, \'$k2\', os.path.realpath(os.path.expanduser(\'$2\')))']
self.config_defaults_verify = ['re.match(\'[a-zA-Z0-9]+$\', \'$0\')',
'os.path.exists(\'$1\')',
'os.path.exists(\'$2\')']
self.remapification = [(self.config_name, self.config_defaults_remap)]
self.verification = [(self.config_name, self.config_defaults_verify)]
default = os.path.join(os.getcwd(), 'volk_gnsssdr_modtool.cfg')
icfg = ConfigParser.RawConfigParser()
if cfg:
icfg.read(cfg)
elif os.path.exists(default):
icfg.read(default)
else:
print "Initializing config file..."
icfg.add_section(self.config_name)
for kn in self.config_defaults:
rv = raw_input("%s: "%(kn))
icfg.set(self.config_name, kn, rv)
self.cfg = icfg
self.remap()
self.verify()
def read_map(self, name, inp):
if self.cfg.has_section(name):
self.cfg.remove_section(name)
self.cfg.add_section(name)
for i in inp:
self.cfg.set(name, i, inp[i])
def get_map(self, name):
retval = {}
stuff = self.cfg.items(name)
for i in stuff:
retval[i[0]] = i[1]
return retval

View File

@ -0,0 +1,128 @@
#!/usr/bin/env python
#
# Copyright 2013 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
#
from volk_gnsssdr_modtool import volk_gnsssdr_modtool, volk_gnsssdr_modtool_config
from optparse import OptionParser, OptionGroup
import exceptions
import os
import sys
if __name__ == '__main__':
parser = OptionParser();
actions = OptionGroup(parser, 'Actions');
actions.add_option('-i', '--install', action='store_true',
help='Create a new volk_gnsssdr module.')
parser.add_option('-b', '--base_path', action='store', default=None,
help='Base path for action. By default, volk_gnsssdr_modtool.cfg loads this value.')
parser.add_option('-n', '--kernel_name', action='store', default=None,
help='Kernel name for action. No default')
parser.add_option('-c', '--config', action='store', dest='config_file', default=None,
help='Config file for volk_gnsssdr_modtool. By default, volk_gnsssdr_modtool.cfg in the local directory will be used/created.')
actions.add_option('-a', '--add_kernel', action='store_true',
help='Add kernel from existing volk_gnsssdr module. Requires: -n. Optional: -b')
actions.add_option('-A', '--add_all_kernels', action='store_true',
help='Add all kernels from existing volk_gnsssdr module. Optional: -b')
actions.add_option('-x', '--remove_kernel', action='store_true',
help='Remove kernel from module. Required: -n. Optional: -b')
actions.add_option('-l', '--list', action='store_true',
help='List all kernels in the base.')
actions.add_option('-k', '--kernels', action='store_true',
help='List all kernels in the module.')
actions.add_option('-r', '--remote_list', action='store_true',
help='List all available kernels in remote volk_gnsssdr module. Requires: -b.')
actions.add_option('-m', '--moo', action='store_true',
help='Have you mooed today?')
parser.add_option_group(actions)
(options, args) = parser.parse_args();
if len(sys.argv) < 2:
parser.print_help()
elif options.moo:
print " (__) "
print " (oo) "
print " /------\/ "
print " / | || "
print " * /\---/\ "
print " ~~ ~~ "
else:
my_cfg = volk_gnsssdr_modtool_config(options.config_file);
my_modtool = volk_gnsssdr_modtool(my_cfg.get_map(my_cfg.config_name));
if options.install:
my_modtool.make_module_skeleton();
my_modtool.write_default_cfg(my_cfg.cfg);
if options.add_kernel:
if not options.kernel_name:
raise exceptions.IOError("This action requires the -n option.");
else:
name = options.kernel_name;
if options.base_path:
base = options.base_path;
else:
base = my_cfg.cfg.get(my_cfg.config_name, 'base');
my_modtool.import_kernel(name, base);
if options.remove_kernel:
if not options.kernel_name:
raise exceptions.IOError("This action requires the -n option.");
else:
name = options.kernel_name;
my_modtool.remove_kernel(name);
if options.add_all_kernels:
if options.base_path:
base = options.base_path;
else:
base = my_cfg.cfg.get(my_cfg.config_name, 'base');
kernelset = my_modtool.get_current_kernels(base);
for i in kernelset:
my_modtool.import_kernel(i, base);
if options.remote_list:
if not options.base_path:
raise exceptions.IOError("This action requires the -b option. Try -l or -k for listing kernels in the base or the module.")
else:
base = options.base_path;
kernelset = my_modtool.get_current_kernels(base);
for i in kernelset:
print i;
if options.list:
kernelset = my_modtool.get_current_kernels();
for i in kernelset:
print i;
if options.kernels:
dest = my_cfg.cfg.get(my_cfg.config_name, 'destination');
name = my_cfg.cfg.get(my_cfg.config_name, 'name');
base = os.path.join(dest, 'volk_gnsssdr_' + name);
kernelset = my_modtool.get_current_kernels(base);
for i in kernelset:
print i;

View File

@ -0,0 +1,330 @@
#
# Copyright 2013 Free Software Foundation, Inc.
#
# This file is part of GNU Radio
#
# GNU Radio is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# GNU Radio is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Radio; see the file COPYING. If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street,
# Boston, MA 02110-1301, USA.
#
import os
import glob
import sys
import re
import glob
import shutil
import exceptions
from sets import Set
class volk_gnsssdr_modtool:
def __init__(self, cfg):
self.volk_gnsssdr = re.compile('volk_gnsssdr');
self.remove_after_underscore = re.compile("_.*");
self.volk_gnsssdr_run_tests = re.compile('^\s*VOLK_RUN_TESTS.*\n', re.MULTILINE);
self.volk_gnsssdr_profile = re.compile('^\s*(VOLK_PROFILE|VOLK_PUPPET_PROFILE).*\n', re.MULTILINE);
self.my_dict = cfg;
self.lastline = re.compile('\s*char path\[1024\];.*');
self.badassert = re.compile('^\s*assert\(toked\[0\] == "volk_gnsssdr_.*\n', re.MULTILINE);
self.goodassert = ' assert(toked[0] == "volk_gnsssdr");\n'
self.baderase = re.compile('^\s*toked.erase\(toked.begin\(\)\);.*\n', re.MULTILINE);
self.gooderase = ' toked.erase(toked.begin());\n toked.erase(toked.begin());\n';
def get_basename(self, base=None):
if not base:
base = self.my_dict['base']
candidate = base.split('/')[-1];
if len(candidate.split('_')) == 1:
return '';
else:
return candidate.split('_')[-1];
def get_current_kernels(self, base=None):
if not base:
base = self.my_dict['base']
name = self.get_basename();
else:
name = self.get_basename(base);
if name == '':
hdr_files = glob.glob(os.path.join(base, "kernels/volk_gnsssdr/*.h"));
begins = re.compile("(?<=volk_gnsssdr_).*")
else:
hdr_files = glob.glob(os.path.join(base, "kernels/volk_gnsssdr_" + name + "/*.h"));
begins = re.compile("(?<=volk_gnsssdr_" + name + "_).*")
datatypes = [];
functions = [];
for line in hdr_files:
subline = re.search(".*\.h.*", os.path.basename(line))
if subline:
subsubline = begins.search(subline.group(0));
if subsubline:
dtype = self.remove_after_underscore.sub("", subsubline.group(0));
subdtype = re.search("[0-9]+[A-z]+", dtype);
if subdtype:
datatypes.append(subdtype.group(0));
datatypes = set(datatypes);
for line in hdr_files:
for dt in datatypes:
if dt in line:
#subline = re.search("(?<=volk_gnsssdr_)" + dt + ".*(?=\.h)", line);
subline = re.search(begins.pattern[:-2] + dt + ".*(?=\.h)", line);
if subline:
functions.append(subline.group(0));
return set(functions);
def make_module_skeleton(self):
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'])
if os.path.exists(dest):
raise exceptions.IOError("Destination %s already exits!"%(dest));
if not os.path.exists(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'kernels/volk_gnsssdr_' + self.my_dict['name'])):
os.makedirs(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'kernels/volk_gnsssdr_' + self.my_dict['name']))
current_kernel_names = self.get_current_kernels();
for root, dirnames, filenames in os.walk(self.my_dict['base']):
for name in filenames:
t_table = map(lambda a: re.search(a, name), current_kernel_names);
t_table = set(t_table);
if t_table == set([None]):
infile = os.path.join(root, name);
instring = open(infile, 'r').read();
outstring = re.sub(self.volk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], instring);
newname = re.sub(self.volk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], name);
relpath = os.path.relpath(infile, self.my_dict['base']);
newrelpath = re.sub(self.volk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], relpath);
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], os.path.dirname(newrelpath), newname);
if not os.path.exists(os.path.dirname(dest)):
os.makedirs(os.path.dirname(dest))
open(dest, 'w+').write(outstring);
infile = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/testqa.cc');
instring = open(infile, 'r').read();
outstring = re.sub(self.volk_gnsssdr_run_tests, '', instring);
open(infile, 'w+').write(outstring);
infile = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'apps/volk_gnsssdr_' + self.my_dict['name'] + '_profile.cc');
instring = open(infile, 'r').read();
outstring = re.sub(self.volk_gnsssdr_profile, '', instring);
open(infile, 'w+').write(outstring);
infile = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/qa_utils.cc');
instring = open(infile, 'r').read();
outstring = re.sub(self.badassert, self.goodassert, instring);
outstring = re.sub(self.baderase, self.gooderase, outstring);
open(infile, 'w+').write(outstring);
def write_default_cfg(self, cfg):
outfile = open(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'volk_gnsssdr_modtool.cfg'), 'wb');
cfg.write(outfile);
outfile.close();
def convert_kernel(self, oldvolk_gnsssdr, name, base, inpath, top):
infile = os.path.join(inpath, 'kernels/' + top[:-1] + '/' + top + name + '.h');
instring = open(infile, 'r').read();
outstring = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], instring);
newname = 'volk_gnsssdr_' + self.my_dict['name'] + '_' + name + '.h';
relpath = os.path.relpath(infile, base);
newrelpath = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], relpath);
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], os.path.dirname(newrelpath), newname);
if not os.path.exists(os.path.dirname(dest)):
os.makedirs(os.path.dirname(dest))
open(dest, 'w+').write(outstring);
# copy orc proto-kernels if they exist
for orcfile in glob.glob(inpath + '/orc/' + top + name + '*.orc'):
if os.path.isfile(orcfile):
instring = open(orcfile, 'r').read();
outstring = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], instring);
newname = 'volk_gnsssdr_' + self.my_dict['name'] + '_' + name + '.orc';
relpath = os.path.relpath(orcfile, base);
newrelpath = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], relpath);
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], os.path.dirname(newrelpath), newname);
if not os.path.exists(os.path.dirname(dest)):
os.makedirs(os.path.dirname(dest));
open(dest, 'w+').write(outstring)
def remove_kernel(self, name):
basename = self.my_dict['name'];
if len(basename) > 0:
top = 'volk_gnsssdr_' + basename + '_';
else:
top = 'volk_gnsssdr_'
base = os.path.join(self.my_dict['destination'], top[:-1]) ;
if not name in self.get_current_kernels():
raise exceptions.IOError("Requested kernel %s is not in module %s"%(name,base));
inpath = os.path.abspath(base);
kernel = re.compile(name)
search_kernels = Set([kernel])
profile = re.compile('^\s*VOLK_PROFILE')
puppet = re.compile('^\s*VOLK_PUPPET')
src_dest = os.path.join(inpath, 'apps/', top[:-1] + '_profile.cc');
infile = open(src_dest);
otherlines = infile.readlines();
open(src_dest, 'w+').write('');
for otherline in otherlines:
write_okay = True;
if kernel.search(otherline):
write_okay = False;
if puppet.match(otherline):
args = re.search("(?<=VOLK_PUPPET_PROFILE).*", otherline)
m_func = args.group(0).split(',')[0];
func = re.search('(?<=' + top + ').*', m_func);
search_kernels.add(re.compile(func.group(0)));
if write_okay:
open(src_dest, 'a').write(otherline);
src_dest = os.path.join(inpath, 'lib/testqa.cc')
infile = open(src_dest);
otherlines = infile.readlines();
open(src_dest, 'w+').write('');
for otherline in otherlines:
write_okay = True;
for kernel in search_kernels:
if kernel.search(otherline):
write_okay = False;
if write_okay:
open(src_dest, 'a').write(otherline);
for kernel in search_kernels:
infile = os.path.join(inpath, 'kernels/' + top[:-1] + '/' + top + kernel.pattern + '.h');
print "Removing kernel %s"%(kernel.pattern)
if os.path.exists(infile):
os.remove(infile);
# remove the orc proto-kernels if they exist. There are no puppets here
# so just need to glob for files matching kernel name
print glob.glob(inpath + '/orc/' + top + name + '*.orc');
for orcfile in glob.glob(inpath + '/orc/' + top + name + '*.orc'):
print orcfile
if(os.path.exists(orcfile)):
os.remove(orcfile);
def import_kernel(self, name, base):
if not (base):
base = self.my_dict['base'];
basename = self.getbasename();
else:
basename = self.get_basename(base);
if not name in self.get_current_kernels(base):
raise exceptions.IOError("Requested kernel %s is not in module %s"%(name,base));
inpath = os.path.abspath(base);
if len(basename) > 0:
top = 'volk_gnsssdr_' + basename + '_';
else:
top = 'volk_gnsssdr_'
oldvolk_gnsssdr = re.compile(top[:-1]);
self.convert_kernel(oldvolk_gnsssdr, name, base, inpath, top);
kernel = re.compile(name)
search_kernels = Set([kernel])
profile = re.compile('^\s*VOLK_PROFILE')
puppet = re.compile('^\s*VOLK_PUPPET')
infile = open(os.path.join(inpath, 'apps/', oldvolk_gnsssdr.pattern + '_profile.cc'));
otherinfile = open(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'apps/volk_gnsssdr_' + self.my_dict['name'] + '_profile.cc'));
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'apps/volk_gnsssdr_' + self.my_dict['name'] + '_profile.cc');
lines = infile.readlines();
otherlines = otherinfile.readlines();
open(dest, 'w+').write('');
insert = False;
inserted = False
for otherline in otherlines:
if self.lastline.match(otherline):
insert = True;
if insert and not inserted:
inserted = True;
for line in lines:
if kernel.search(line):
if profile.match(line):
outline = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], line);
open(dest, 'a').write(outline);
elif puppet.match(line):
outline = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], line);
open(dest, 'a').write(outline);
args = re.search("(?<=VOLK_PUPPET_PROFILE).*", line)
m_func = args.group(0).split(',')[0];
func = re.search('(?<=' + top + ').*', m_func);
search_kernels.add(re.compile(func.group(0)));
self.convert_kernel(oldvolk_gnsssdr, func.group(0), base, inpath, top);
write_okay = True;
for kernel in search_kernels:
if kernel.search(otherline):
write_okay = False
if write_okay:
open(dest, 'a').write(otherline);
for kernel in search_kernels:
print "Adding kernel %s from module %s"%(kernel.pattern,base)
infile = open(os.path.join(inpath, 'lib/testqa.cc'));
otherinfile = open(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/testqa.cc'));
dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/testqa.cc');
lines = infile.readlines();
otherlines = otherinfile.readlines();
open(dest, 'w+').write('');
inserted = False;
insert = False
for otherline in otherlines:
if (re.match('\s*', otherline) == None or re.match('\s*#.*', otherline) == None):
insert = True;
if insert and not inserted:
inserted = True;
for line in lines:
for kernel in search_kernels:
if kernel.search(line):
if self.volk_gnsssdr_run_tests.match(line):
outline = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], line);
open(dest, 'a').write(outline);
write_okay = True;
for kernel in search_kernels:
if kernel.search(otherline):
write_okay = False
if write_okay:
open(dest, 'a').write(otherline);

View File

@ -0,0 +1,212 @@
/*
* Copyright 2011-2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include "volk_gnsssdr_machines.h"
#include <volk_gnsssdr/volk_gnsssdr_typedefs.h>
#include <volk_gnsssdr/volk_gnsssdr_cpu.h>
#include "volk_gnsssdr_rank_archs.h"
#include <volk_gnsssdr/volk_gnsssdr.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
static size_t __alignment = 0;
static intptr_t __alignment_mask = 0;
struct volk_gnsssdr_machine *get_machine(void)
{
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
extern unsigned int n_volk_gnsssdr_machines;
static struct volk_gnsssdr_machine *machine = NULL;
if(machine != NULL)
return machine;
else {
unsigned int max_score = 0;
unsigned int i;
struct volk_gnsssdr_machine *max_machine = NULL;
for(i=0; i<n_volk_gnsssdr_machines; i++) {
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
if(volk_gnsssdr_machines[i]->caps > max_score) {
max_score = volk_gnsssdr_machines[i]->caps;
max_machine = volk_gnsssdr_machines[i];
}
}
}
machine = max_machine;
printf("Using Volk machine: %s\n", machine->name);
__alignment = machine->alignment;
__alignment_mask = (intptr_t)(__alignment-1);
return machine;
}
}
void volk_gnsssdr_list_machines(void)
{
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
extern unsigned int n_volk_gnsssdr_machines;
unsigned int i;
for(i=0; i<n_volk_gnsssdr_machines; i++) {
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
printf("%s;", volk_gnsssdr_machines[i]->name);
}
}
printf("\n");
}
const char* volk_gnsssdr_get_machine(void)
{
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
extern unsigned int n_volk_gnsssdr_machines;
static struct volk_gnsssdr_machine *machine = NULL;
if(machine != NULL)
return machine->name;
else {
unsigned int max_score = 0;
unsigned int i;
struct volk_gnsssdr_machine *max_machine = NULL;
for(i=0; i<n_volk_gnsssdr_machines; i++) {
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
if(volk_gnsssdr_machines[i]->caps > max_score) {
max_score = volk_gnsssdr_machines[i]->caps;
max_machine = volk_gnsssdr_machines[i];
}
}
}
machine = max_machine;
return machine->name;
}
}
size_t volk_gnsssdr_get_alignment(void)
{
get_machine(); //ensures alignment is set
return __alignment;
}
bool volk_gnsssdr_is_aligned(const void *ptr)
{
return ((intptr_t)(ptr) & __alignment_mask) == 0;
}
#define LV_HAVE_GENERIC
#define LV_HAVE_DISPATCHER
#for $kern in $kernels
#if $kern.has_dispatcher
#include <volk_gnsssdr/$(kern.name).h> //pulls in the dispatcher
#end if
static inline void __$(kern.name)_d($kern.arglist_full)
{
#if $kern.has_dispatcher
$(kern.name)_dispatcher($kern.arglist_names);
return;
#end if
if (volk_gnsssdr_is_aligned(
#set $num_open_parens = 0
#for $arg_type, $arg_name in $kern.args
#if '*' in $arg_type
VOLK_OR_PTR($arg_name,
#set $num_open_parens += 1
#end if
#end for
0$(')'*$num_open_parens)
)){
$(kern.name)_a($kern.arglist_names);
}
else{
$(kern.name)_u($kern.arglist_names);
}
}
static inline void __init_$(kern.name)(void)
{
const char *name = get_machine()->$(kern.name)_name;
const char **impl_names = get_machine()->$(kern.name)_impl_names;
const int *impl_deps = get_machine()->$(kern.name)_impl_deps;
const bool *alignment = get_machine()->$(kern.name)_impl_alignment;
const size_t n_impls = get_machine()->$(kern.name)_n_impls;
const size_t index_a = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, true/*aligned*/);
const size_t index_u = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, false/*unaligned*/);
$(kern.name)_a = get_machine()->$(kern.name)_impls[index_a];
$(kern.name)_u = get_machine()->$(kern.name)_impls[index_u];
assert($(kern.name)_a);
assert($(kern.name)_u);
$(kern.name) = &__$(kern.name)_d;
}
static inline void __$(kern.name)_a($kern.arglist_full)
{
__init_$(kern.name)();
$(kern.name)_a($kern.arglist_names);
}
static inline void __$(kern.name)_u($kern.arglist_full)
{
__init_$(kern.name)();
$(kern.name)_u($kern.arglist_names);
}
static inline void __$(kern.name)($kern.arglist_full)
{
__init_$(kern.name)();
$(kern.name)($kern.arglist_names);
}
$kern.pname $(kern.name)_a = &__$(kern.name)_a;
$kern.pname $(kern.name)_u = &__$(kern.name)_u;
$kern.pname $(kern.name) = &__$(kern.name);
void $(kern.name)_manual($kern.arglist_full, const char* impl_name)
{
const int index = volk_gnsssdr_get_index(
get_machine()->$(kern.name)_impl_names,
get_machine()->$(kern.name)_n_impls,
impl_name
);
get_machine()->$(kern.name)_impls[index](
$kern.arglist_names
);
}
volk_gnsssdr_func_desc_t $(kern.name)_get_func_desc(void) {
const char **impl_names = get_machine()->$(kern.name)_impl_names;
const int *impl_deps = get_machine()->$(kern.name)_impl_deps;
const bool *alignment = get_machine()->$(kern.name)_impl_alignment;
const size_t n_impls = get_machine()->$(kern.name)_n_impls;
volk_gnsssdr_func_desc_t desc = {
impl_names,
impl_deps,
alignment,
n_impls
};
return desc;
}
#end for

View File

@ -0,0 +1,94 @@
/*
* Copyright 2011-2012 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/
#ifndef INCLUDED_VOLK_RUNTIME
#define INCLUDED_VOLK_RUNTIME
#include <volk_gnsssdr/volk_gnsssdr_typedefs.h>
#include <volk_gnsssdr/volk_gnsssdr_config_fixed.h>
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
#include <stdlib.h>
#include <stdbool.h>
__VOLK_DECL_BEGIN
typedef struct volk_gnsssdr_func_desc
{
const char **impl_names;
const int *impl_deps;
const bool *impl_alignment;
const size_t n_impls;
} volk_gnsssdr_func_desc_t;
//! Prints a list of machines available
VOLK_API void volk_gnsssdr_list_machines(void);
//! Returns the name of the machine this instance will use
VOLK_API const char* volk_gnsssdr_get_machine(void);
//! Get the machine alignment in bytes
VOLK_API size_t volk_gnsssdr_get_alignment(void);
/*!
* The VOLK_OR_PTR macro is a convenience macro
* for checking the alignment of a set of pointers.
* Example usage:
* volk_gnsssdr_is_aligned(VOLK_OR_PTR((VOLK_OR_PTR(p0, p1), p2)))
*/
#define VOLK_OR_PTR(ptr0, ptr1) \
(const void *)(((intptr_t)(ptr0)) | ((intptr_t)(ptr1)))
/*!
* Is the pointer on a machine alignment boundary?
*
* Note: for performance reasons, this function
* is not usable until another volk_gnsssdr API call is made
* which will perform certain initialization tasks.
*
* \param ptr the pointer to some memory buffer
* \return 1 for alignment boundary, else 0
*/
VOLK_API bool volk_gnsssdr_is_aligned(const void *ptr);
#for $kern in $kernels
//! A function pointer to the dispatcher implementation
extern VOLK_API $kern.pname $kern.name;
//! A function pointer to the fastest aligned implementation
extern VOLK_API $kern.pname $(kern.name)_a;
//! A function pointer to the fastest unaligned implementation
extern VOLK_API $kern.pname $(kern.name)_u;
//! Call into a specific implementation given by name
extern VOLK_API void $(kern.name)_manual($kern.arglist_full, const char* impl_name);
//! Get description paramaters for this kernel
extern VOLK_API volk_gnsssdr_func_desc_t $(kern.name)_get_func_desc(void);
#end for
__VOLK_DECL_END
#endif /*INCLUDED_VOLK_RUNTIME*/

Some files were not shown because too many files have changed in this diff Show More