diff --git a/CMakeLists.txt b/CMakeLists.txt index cf9d1bcda..ed856c708 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,26 +16,58 @@ # along with GNSS-SDR. If not, see . # -######################################################################## -if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) - message(FATAL_ERROR "Prevented in-tree build. This is bad practice. Try 'cd build && cmake ../' ") -endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) ######################################################################## # Project setup ######################################################################## +if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) + message(FATAL_ERROR "Prevented in-tree build. This is bad practice. Try 'cd build && cmake ../' ") +endif(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) cmake_minimum_required(VERSION 2.8) project(gnss-sdr CXX C) - list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) +file(RELATIVE_PATH RELATIVE_CMAKE_CALL ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) + + +######################################################################## +# Determine optional blocks/libraries to be built (default: not built) +# Enable them here or at the command line by doing 'cmake -DENABLE_XXX=ON ../' +######################################################################## + +option(ENABLE_GN3S "Enable the use of the GN3S dongle as signal source (experimental)" OFF) +option(ENABLE_ARRAY "Enable the use of CTTC's antenna array front-end as signal source (experimental)" OFF) +option(ENABLE_RTLSDR "Enable the use of RTL dongles as signal source (experimental)" OFF) +option(ENABLE_OPENCL "Enable building of processing blocks implemented with OpenCL (experimental)" OFF) +option(ENABLE_GPERFTOOLS "Enable linking to Gperftools libraries (tcmalloc and profiler)" OFF) +option(ENABLE_GENERIC_ARCH "Builds a portable binary" OFF) +option(ENABLE_VOLK_GNSSSDR "Enable building of volk_gnsssdr module: some volk protokernels coded by gnss-sdr" OFF) + + +############################### +# GNSS-SDR version information +############################### +# Get the current working branch +execute_process( + COMMAND git rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +# Get the latest abbreviated commit hash of the working branch +execute_process( + COMMAND git log -1 --format=%h + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE +) -# Set the version information here set(VERSION_INFO_MAJOR_VERSION 0) set(VERSION_INFO_API_COMPAT 0) -set(VERSION_INFO_MINOR_VERSION 3) +set(VERSION_INFO_MINOR_VERSION 3.git-${GIT_BRANCH}-${GIT_COMMIT_HASH}) set(VERSION ${VERSION_INFO_MAJOR_VERSION}.${VERSION_INFO_API_COMPAT}.${VERSION_INFO_MINOR_VERSION}) -file(RELATIVE_PATH RELATIVE_CMAKE_CALL ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) + ######################################################################## # Environment setup @@ -156,10 +188,16 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") endif(${DARWIN_VERSION} MATCHES "10") endif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + #select the release build type by default to get optimization flags if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE "Release") - message(STATUS "Build type not specified: defaulting to Release.") + if(ENABLE_GPERFTOOLS) + set(CMAKE_BUILD_TYPE "RelWithDebInfo") + message(STATUS "Build type not specified: defaulting to RelWithDebInfo.") + else(ENABLE_GPERFTOOLS) + set(CMAKE_BUILD_TYPE "Release") + message(STATUS "Build type not specified: defaulting to Release.") + endif(ENABLE_GPERFTOOLS) endif(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "") @@ -182,6 +220,7 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") endif(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7) endif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + ################################################################################ # Googletest - http://code.google.com/p/googletest/ ################################################################################ @@ -203,7 +242,6 @@ endif(GTEST_DIR) ################################################################################ # Boost - http://www.boost.org ################################################################################ - if(UNIX AND EXISTS "/usr/lib64") list(APPEND BOOST_LIBRARYDIR "/usr/lib64") # Fedora 64-bit fix endif(UNIX AND EXISTS "/usr/lib64") @@ -231,9 +269,7 @@ endif(NOT Boost_FOUND) ################################################################################ # GNU Radio - http://gnuradio.org/redmine/projects/gnuradio/wiki ################################################################################ - find_package(Gnuradio) - if(NOT GNURADIO_RUNTIME_FOUND) message(STATUS "CMake cannot find GNU Radio >= 3.7") if(OS_IS_LINUX) @@ -281,6 +317,40 @@ if(NOT GNURADIO_TRELLIS_FOUND) endif() +############################################################################### +# Volk_gnsssdr module +#In order to use volk_gnsssr module it is necessary to add: +# 1) include_directories(..${VOLK_GNSSSDR_INCLUDE_DIRS}..) +# 2) target_link_libraries(..${VOLK_GNSSSDR_LIBRARIES}..) +############################################################################### + +if(ENABLE_VOLK_GNSSSDR) + message(STATUS "The volk_gnsssdr module with custom protokernels coded by gnss-sdr will be compiled.") + message(STATUS "You can disable it with 'cmake -DENABLE_VOLK_GNSSSDR=OFF ../'" ) +else(ENABLE_VOLK_GNSSSDR) + message(STATUS "The volk_gnsssdr module with custom protokernels coded by gnss-sdr is not enabled. Some configurations that use custom protokernels will not work." ) + message(STATUS "Enable it with 'cmake -D ENABLE_VOLK_GNSSSDR=ON ../'." ) +endif(ENABLE_VOLK_GNSSSDR) + +if(ENABLE_VOLK_GNSSSDR) + set(VOLK_GNSSSDR_BASE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/src/algorithms/libs/volk_gnsssdr) + add_subdirectory(${VOLK_GNSSSDR_BASE_PATH}) + + set(VOLK_GNSSSDR_INCLUDE_DIRS + ${VOLK_GNSSSDR_BASE_PATH}/include + ${CMAKE_CURRENT_BINARY_DIR}/src/algorithms/libs/volk_gnsssdr/include + ) + + set(VOLK_GNSSSDR_LIBRARIES + #Path to libs of volk_gnsssdr target: ${VOLK_GNSSSDR_BASE_PATH}/lib/Debug/libvolk_gnsssdr.dylib + volk_gnsssdr + ) + + message(" * INCLUDES: ${VOLK_GNSSSDR_INCLUDE_DIRS} ") + message(" * LIBS: ${VOLK_GNSSSDR_LIBRARIES} ") + message("-- END OF: Setup volk_gnsssdr as a subproject.") +endif(ENABLE_VOLK_GNSSSDR) + ################################################################################ # gflags - http://code.google.com/p/gflags/ @@ -356,7 +426,6 @@ endif(NOT GFlags_FOUND OR LOCAL_GLOG) ################################################################################ # glog - http://code.google.com/p/google-glog/ ################################################################################ - find_package(GLOG) set(glog_RELEASE 0.3.3) if (NOT GLOG_FOUND OR LOCAL_GFLAGS) @@ -458,97 +527,14 @@ endif(NOT GLOG_FOUND OR LOCAL_GFLAGS) - -################################################################################ -# GPerftools - http://code.google.com/p/gperftools/ -################################################################################ - -set(GCC_GPERFTOOLS_FLAGS "") -find_package(Gperftools) -if ( NOT GPERFTOOLS_FOUND ) - message(STATUS "The optional library GPerftools has not been found.") -else( NOT GPERFTOOLS_FOUND ) - message (STATUS "GPerftools library found." ) - link_libraries(${GPERFTOOLS_PROFILER} ${GPERFTOOLS_TCMALLOC}) -endif( NOT GPERFTOOLS_FOUND ) -list(APPEND CMAKE_CXX_FLAGS ${GCC_GPERFTOOLS_FLAGS}) - - - - -################################################################################ -# Doxygen - http://www.stack.nl/~dimitri/doxygen/index.html -################################################################################ - -find_package(Doxygen) -if(DOXYGEN_FOUND) - message(STATUS "Doxygen found.") - message(STATUS "You can build the documentation with 'make doc'." ) - message(STATUS "When done, point your browser to ${CMAKE_SOURCE_DIR}/html/index.html") - set(HAVE_DOT ${DOXYGEN_DOT_FOUND}) - file(TO_NATIVE_PATH ${CMAKE_SOURCE_DIR} top_srcdir) - file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} top_builddir) - find_package(LATEX) - if (PDFLATEX_COMPILER) - set(GENERATE_PDF_DOCUMENTATION "YES") - set(GNSSSDR_USE_MATHJAX "NO") - else(PDFLATEX_COMPILER) - set(GENERATE_PDF_DOCUMENTATION "NO") - set(GNSSSDR_USE_MATHJAX "YES") - endif(PDFLATEX_COMPILER) - configure_file(${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile.in - ${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile - @ONLY - ) - add_custom_target(doc - ${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - COMMENT "Generating API documentation with Doxygen." VERBATIM - ) - if(LATEX_COMPILER) - message(STATUS "'make pdfmanual' will generate a manual at ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf") - add_custom_target(pdfmanual - COMMAND ${CMAKE_MAKE_PROGRAM} - COMMAND ${CMAKE_COMMAND} -E copy refman.pdf ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf - COMMAND ${CMAKE_MAKE_PROGRAM} clean - DEPENDS doc - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/docs/latex - COMMENT "Generating PDF manual with Doxygen." VERBATIM - ) - endif(LATEX_COMPILER) - message(STATUS "'make doc-clean' will clean the documentation.") - add_custom_target(doc-clean - COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/html - COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/latex - COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf - COMMENT "Cleaning documentation." VERBATIM - ) -else(DOXYGEN_FOUND) - message(STATUS " Doxygen has not been found in your system.") - message(STATUS " You can get nice code documentation by using it!") - message(STATUS " Get it from http://www.stack.nl/~dimitri/doxygen/index.html") - if(OS_IS_LINUX) - if(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat") - message(" or simply by doing 'sudo yum install doxygen-latex'.") - else(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat") - message(" or simply by doing 'sudo apt-get install doxygen-latex'.") - endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat") - endif(OS_IS_LINUX) - if(OS_IS_MACOSX) - message(STATUS " or simply by doing 'sudo port install doxygen +latex'.") - endif(OS_IS_MACOSX) -endif(DOXYGEN_FOUND) - - - ################################################################################ # Armadillo - http://arma.sourceforge.net/ ################################################################################ - if(OS_IS_LINUX) - ############################################# + ############################################################################# # Check that LAPACK is found in the system - ############################################# + # LAPACK is required for matrix decompositions (eg. SVD) and matrix inverse. + ############################################################################# find_library(LAPACK lapack) if(NOT LAPACK) message(" The LAPACK library has not been found.") @@ -562,9 +548,11 @@ if(OS_IS_LINUX) endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat") message(FATAL_ERROR "LAPACK is required to build gnss-sdr") endif(NOT LAPACK) - ############################################# + ############################################################################# # Check that BLAS is found in the system - ############################################# + # BLAS is used for matrix multiplication. + # Without BLAS, matrix multiplication will still work, but might be slower. + ############################################################################# find_library(BLAS blas) if(NOT BLAS) message(" The BLAS library has not been found.") @@ -641,31 +629,22 @@ if(NOT ARMADILLO_FOUND) endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat") message(FATAL_ERROR "The patch command is required to download and build armadillo") endif(NOT PATCH_EXECUTABLE) - set(armadillo_RELEASE 4.300.9) - set(armadillo_MD5 "d51d1beb2a335f3002702d112c4814f3") + set(armadillo_RELEASE 4.400.0) + set(armadillo_MD5 "616744dbc96af1c5d6d32c6c69f6fe94") if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}/armadillo-${armadillo_RELEASE}.tar.gz) set(ARMADILLO_PATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_no.patch) file(WRITE ${ARMADILLO_PATCH_FILE} "") - set(ARMADILLO_PATCH_FILE2 ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_no2.patch) - file(WRITE ${ARMADILLO_PATCH_FILE2} "") else(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}/armadillo-${armadillo_RELEASE}.tar.gz) - set(ARMADILLO_PATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_staticlib.patch) - set(ARMADILLO_PATCH_FILE2 ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_enable_lapack.patch) + set(ARMADILLO_PATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE}/armadillo_enable_lapack.patch) file(WRITE ${ARMADILLO_PATCH_FILE} -"30c30 -< set(ARMA_USE_LAPACK false) ---- -> set(ARMA_USE_LAPACK true) -312c312 -< add_library( armadillo SHARED \${PROJECT_SOURCE_DIR}/src/wrapper.cpp ) ---- -> add_library( armadillo STATIC \${PROJECT_SOURCE_DIR}/src/wrapper.cpp ) -") - file(WRITE ${ARMADILLO_PATCH_FILE2} "12c12 < // #define ARMA_USE_LAPACK --- -> #define ARMA_USE_LAPACK +> #define ARMA_USE_LAPACK +19c19 +< // #define ARMA_USE_BLAS +--- +> #define ARMA_USE_BLAS ") endif(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE}/armadillo-${armadillo_RELEASE}.tar.gz) ExternalProject_Add( @@ -673,9 +652,9 @@ if(NOT ARMADILLO_FOUND) PREFIX ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE} URL http://sourceforge.net/projects/arma/files/armadillo-${armadillo_RELEASE}.tar.gz DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}/download/armadillo-${armadillo_RELEASE} - URL_MD5 ${armadillo_MD5} - PATCH_COMMAND patch -N /CMakeLists.txt ${ARMADILLO_PATCH_FILE} && patch -N /include/armadillo_bits/config.hpp ${ARMADILLO_PATCH_FILE2} - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + URL_MD5 ${armadillo_MD5} + PATCH_COMMAND patch -N /include/armadillo_bits/config.hpp ${ARMADILLO_PATCH_FILE} + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DBUILD_SHARED_LIBS=OFF BUILD_IN_SOURCE 1 BUILD_COMMAND make UPDATE_COMMAND "" @@ -686,7 +665,14 @@ if(NOT ARMADILLO_FOUND) ExternalProject_Get_Property(armadillo-${armadillo_RELEASE} binary_dir) set(ARMADILLO_INCLUDE_DIRS ${binary_dir}/include ) find_library(LAPACK NAMES lapack HINTS /usr/lib /usr/local/lib /usr/lib64) - set(ARMADILLO_LIBRARIES ${LAPACK} ${GFORTRAN} ${binary_dir}/${CMAKE_FIND_LIBRARY_PREFIXES}armadillo.a) + if(OS_IS_MACOSX) + find_library(BLAS blas) + endif(OS_IS_MACOSX) + find_package(OpenBLAS) + if(OPENBLAS_FOUND) + set(BLAS ${OPENBLAS}) + endif(OPENBLAS_FOUND) + set(ARMADILLO_LIBRARIES ${BLAS} ${LAPACK} ${GFORTRAN} ${binary_dir}/${CMAKE_FIND_LIBRARY_PREFIXES}armadillo.a) set(LOCAL_ARMADILLO true CACHE STRING "Armadillo downloaded and built automatically" FORCE) # Save a copy at the thirdparty folder file(COPY ${CMAKE_CURRENT_BINARY_DIR}/armadillo-${armadillo_RELEASE} @@ -700,27 +686,6 @@ endif(NOT ARMADILLO_FOUND) -############################################################################### -# OpenCL -############################################################################### -find_package(OpenCL) -if($ENV{DISABLE_OPENCL}) - set(DISABLE_OPENCL TRUE) -endif($ENV{DISABLE_OPENCL}) -if(DISABLE_OPENCL) - set(OPENCL_FOUND FALSE) -else(DISABLE_OPENCL) - if(OPENCL_FOUND) - message(STATUS "OpenCL has been found and will be used by some processing blocks") - message(STATUS "You can disable OpenCL use by doing 'cmake -DDISABLE_OPENCL=1 ../' ") - endif(OPENCL_FOUND) -endif(DISABLE_OPENCL) -if(NOT OPENCL_FOUND) - message(STATUS "Processing blocks using OpenCL will not be built.") -endif(NOT OPENCL_FOUND) - - - ################################################################################ # OpenSSL - http://www.openssl.org ################################################################################ @@ -742,41 +707,173 @@ if(NOT OPENSSL_FOUND) endif(NOT OPENSSL_FOUND) + +################################################################################ +# Doxygen - http://www.stack.nl/~dimitri/doxygen/index.html (OPTIONAL) +################################################################################ +find_package(Doxygen) +if(DOXYGEN_FOUND) + message(STATUS "Doxygen found.") + message(STATUS "You can build the documentation with 'make doc'." ) + message(STATUS "When done, point your browser to ${CMAKE_SOURCE_DIR}/html/index.html") + set(HAVE_DOT ${DOXYGEN_DOT_FOUND}) + file(TO_NATIVE_PATH ${CMAKE_SOURCE_DIR} top_srcdir) + file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} top_builddir) + find_package(LATEX) + if (PDFLATEX_COMPILER) + set(GENERATE_PDF_DOCUMENTATION "YES") + set(GNSSSDR_USE_MATHJAX "NO") + else(PDFLATEX_COMPILER) + set(GENERATE_PDF_DOCUMENTATION "NO") + set(GNSSSDR_USE_MATHJAX "YES") + endif(PDFLATEX_COMPILER) + configure_file(${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile.in + ${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile + @ONLY + ) + add_custom_target(doc + ${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/docs/doxygen/Doxyfile + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + COMMENT "Generating API documentation with Doxygen." VERBATIM + ) + if(LATEX_COMPILER) + message(STATUS "'make pdfmanual' will generate a manual at ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf") + add_custom_target(pdfmanual + COMMAND ${CMAKE_MAKE_PROGRAM} + COMMAND ${CMAKE_COMMAND} -E copy refman.pdf ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf + COMMAND ${CMAKE_MAKE_PROGRAM} clean + DEPENDS doc + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/docs/latex + COMMENT "Generating PDF manual with Doxygen." VERBATIM + ) + endif(LATEX_COMPILER) + message(STATUS "'make doc-clean' will clean the documentation.") + add_custom_target(doc-clean + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/html + COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_SOURCE_DIR}/docs/latex + COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_SOURCE_DIR}/docs/GNSS-SDR_manual.pdf + COMMENT "Cleaning documentation." VERBATIM + ) +else(DOXYGEN_FOUND) + message(STATUS " Doxygen has not been found in your system.") + message(STATUS " You can get nice code documentation by using it!") + message(STATUS " Get it from http://www.stack.nl/~dimitri/doxygen/index.html") + if(OS_IS_LINUX) + if(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat") + message(" or simply by doing 'sudo yum install doxygen-latex'.") + else(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat") + message(" or simply by doing 'sudo apt-get install doxygen-latex'.") + endif(${LINUX_DISTRIBUTION} MATCHES "Fedora" OR ${LINUX_DISTRIBUTION} MATCHES "Red Hat") + endif(OS_IS_LINUX) + if(OS_IS_MACOSX) + message(STATUS " or simply by doing 'sudo port install doxygen +latex'.") + endif(OS_IS_MACOSX) +endif(DOXYGEN_FOUND) + + + +############################################################################### +# OpenCL (OPTIONAL) +############################################################################### +if(ENABLE_OPENCL) + find_package(OpenCL) + if($ENV{DISABLE_OPENCL}) + set(DISABLE_OPENCL TRUE) + endif($ENV{DISABLE_OPENCL}) + if(DISABLE_OPENCL) + set(OPENCL_FOUND FALSE) + else(DISABLE_OPENCL) + if(OPENCL_FOUND) + message(STATUS "OpenCL has been found and will be used by some processing blocks") + message(STATUS "You can disable OpenCL use by doing 'cmake -DENABLE_OPENCL=OFF ../' ") + endif(OPENCL_FOUND) + endif(DISABLE_OPENCL) + if(ENABLE_GENERIC_ARCH) + set(OPENCL_FOUND FALSE) + message(STATUS "ENABLE_GENERIC_ARCH is set to ON so the use of OpenCL has been disabled.") + endif(ENABLE_GENERIC_ARCH) + if(NOT OPENCL_FOUND) + message(STATUS "Processing blocks using OpenCL will not be built.") + endif(NOT OPENCL_FOUND) +else(ENABLE_OPENCL) + set(OPENCL_FOUND FALSE) +endif(ENABLE_OPENCL) + + + + +################################################################################ +# GPerftools - http://code.google.com/p/gperftools/ (OPTIONAL) +################################################################################ + +if(ENABLE_GPERFTOOLS) + find_package(Gperftools) + if ( NOT GPERFTOOLS_FOUND ) + message(STATUS "Although ENABLE_GPERFTOOLS has been set to ON, GPerftools has not been found.") + message(STATUS "Binaries will be compiled without 'tcmalloc' and 'profiler' libraries.") + message(STATUS "You can install GPerftools from http://code.google.com/p/gperftools/") + else( NOT GPERFTOOLS_FOUND ) + message(STATUS "GPerftools libraries found." ) + message(STATUS "Binaries will be compiled with 'tcmalloc' and 'profiler' libraries.") + endif( NOT GPERFTOOLS_FOUND ) +endif(ENABLE_GPERFTOOLS) + + + ################################################################################ # Setup of optional drivers ################################################################################ -if( $ENV{GN3S_DRIVER} ) - message(STATUS "GN3S_DRIVER variable found." ) - # copy firmware to install folder - # Build project gr-gn3s -else( $ENV{GN3S_DRIVER} ) - if( GN3S_DRIVER ) - message(STATUS "GN3S driver will be compiled") - else( GNSS_DRIVER ) - message(STATUS "GN3S_DRIVER is not defined." ) - message(STATUS "Define it with 'export GN3S_DRIVER=1' to add support for the GN3S dongle." ) - endif( GN3S_DRIVER ) -endif($ENV{GN3S_DRIVER} ) -if( $ENV{RAW_ARRAY_DRIVER} ) - message(STATUS "RAW_ARRAY_DRIVER variable found." ) +if($ENV{GN3S_DRIVER}) + message(STATUS "GN3S_DRIVER environment variable found." ) + set(ENABLE_GN3S ON) +endif($ENV{GN3S_DRIVER}) +if(GN3S_DRIVER) + set(ENABLE_GN3S ON) +endif(GN3S_DRIVER) +if(ENABLE_GN3S) + message(STATUS "The GN3S driver will be compiled.") + message(STATUS "You can disable it with 'cmake -DENABLE_GN3S=OFF ../'" ) +else(ENABLE_GN3S) + message(STATUS "The (optional and experimental) GN3S driver is not enabled." ) + message(STATUS "Enable it with 'cmake -DENABLE_GN3S=ON ../' to add support for the GN3S dongle." ) +endif(ENABLE_GN3S) + + +if($ENV{RAW_ARRAY_DRIVER}) + message(STATUS "RAW_ARRAY_DRIVER environment variable found." ) + set(ENABLE_ARRAY ON) +endif($ENV{RAW_ARRAY_DRIVER}) +if(RAW_ARRAY_DRIVER) + set(ENABLE_ARRAY ON) +endif(RAW_ARRAY_DRIVER) +if(ENABLE_ARRAY) + message(STATUS "CTTC's Antenna Array front-end driver will be compiled." ) + message(STATUS "You can disable it with 'cmake -DENABLE_ARRAY=OFF ../'" ) # copy firmware to install folder # Build project gr-dbfcttc -else( $ENV{RAW_ARRAY_DRIVER} ) - if( RAW_ARRAY_DRIVER ) - message(STATUS "RAW_ARRAY_DRIVER driver will be compiled") - else( RAW_ARRAY_DRIVER ) - message(STATUS "RAW_ARRAY_DRIVER is not defined." ) - message(STATUS "Define it with 'export RAW_ARRAY_DRIVER=1' to add support for the CTTC experimental array front-end." ) - endif( RAW_ARRAY_DRIVER ) -endif($ENV{RAW_ARRAY_DRIVER} ) +else(ENABLE_ARRAY) + message(STATUS "The (optional) CTTC's Antenna Array front-end driver is not enabled." ) + message(STATUS "Enable it with 'cmake -DENABLE_ARRAY=ON ../' to add support for the CTTC experimental array front-end." ) +endif(ENABLE_ARRAY) -if( $ENV{RTLSDR_DRIVER} ) - message(STATUS "RTLSDR_DRIVER variable found." ) + +if($ENV{RTLSDR_DRIVER}) + message(STATUS "RTLSDR_DRIVER environment variable found." ) + set(ENABLE_RTLSDR ON) +endif($ENV{RTLSDR_DRIVER}) +if(RAW_ARRAY_DRIVER) + set(ENABLE_RTLSDR ON) +endif(RAW_ARRAY_DRIVER) +if(ENABLE_RTLSDR) + message(STATUS "The driver for RTL-based dongles will be compiled." ) + message(STATUS "You can disable it with 'cmake -DENABLE_RTLSDR=OFF ../'" ) # find libosmosdr (done in src/algorithms/signal_sources/adapters) # find gr-osmosdr (done in src/algorithms/signal_sources/adapters) -endif($ENV{RTLSDR_DRIVER} ) - +else(ENABLE_RTLSDR) + message(STATUS "The (optional) driver for RTL-based dongles is not enabled." ) + message(STATUS "Enable it with 'cmake -DENABLE_RTLSDR=ON ../' to add support for Realtek's RTL2832U-based USB dongles." ) +endif(ENABLE_RTLSDR) ######################################################################## @@ -802,7 +899,11 @@ if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32) if(OS_IS_MACOSX) set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -march=corei7 -mfpmath=sse") else(OS_IS_MACOSX) - set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -march=native -mfpmath=sse") + if(ENABLE_GENERIC_ARCH) + set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -mtune=generic") + else(ENABLE_GENERIC_ARCH) + set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -march=native -mfpmath=sse") + endif(ENABLE_GENERIC_ARCH) endif(OS_IS_MACOSX) endif(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32) @@ -811,13 +912,18 @@ if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32) add_definitions(-fvisibility=hidden) endif() -# Set GPerftools related flags if it is available -# See http://gperftools.googlecode.com/svn/trunk/README -if(GPERFTOOLS_FOUND) - if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32) - set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free") - endif(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32) -endif(GPERFTOOLS_FOUND) +if(ENABLE_GPERFTOOLS) + # Set GPerftools related flags if it is available + # See http://gperftools.googlecode.com/svn/trunk/README + if(GPERFTOOLS_FOUND) + if(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32) + set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free") + endif(CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(MY_CXX_FLAGS "${MY_CXX_FLAGS} -fno-builtin") + endif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + endif(GPERFTOOLS_FOUND) +endif(ENABLE_GPERFTOOLS) list(APPEND CMAKE_CXX_FLAGS ${MY_CXX_FLAGS}) diff --git a/src/algorithms/libs/volk_gnsssdr/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr/CMakeLists.txt new file mode 100644 index 000000000..77481beda --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/CMakeLists.txt @@ -0,0 +1,183 @@ +# +# Copyright 2011 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +######################################################################## +# Project setup +######################################################################## +cmake_minimum_required(VERSION 2.6) +if(NOT DEFINED CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() +set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "Choose build type: None Debug Release RelWithDebInfo MinSizeRel") +project(volk_gnsssdr) +enable_language(CXX) +enable_language(C) +enable_testing() +set(VERSION 0.1) +set(LIBVER 0.0.0) + +set(CMAKE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) #allows this to be a sub-project +set(CMAKE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) #allows this to be a sub-project +set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) #location for custom "Modules" + +######################################################################## +# Environment setup +######################################################################## +IF(NOT DEFINED BOOST_ROOT) + SET(BOOST_ROOT ${CMAKE_INSTALL_PREFIX}) +ENDIF() + +IF(NOT DEFINED CROSSCOMPILE_MULTILIB) + SET(CROSSCOMPILE_MULTILIB "") +ENDIF() +SET(CROSSCOMPILE_MULTILIB ${CROSSCOMPILE_MULTILIB} CACHE STRING "Define \"true\" if you have and want to use multiple C development libs installed for cross compile") + + +######################################################################## +# Dependencies setup +######################################################################## +include(GrPython) #sets PYTHON_EXECUTABLE and PYTHON_DASH_B +VOLK_PYTHON_CHECK_MODULE("python >= 2.5" sys "sys.version.split()[0] >= '2.5'" PYTHON_MIN_VER_FOUND) +VOLK_PYTHON_CHECK_MODULE("Cheetah >= 2.0.0" Cheetah "Cheetah.Version >= '2.0.0'" CHEETAH_FOUND) + +if(NOT PYTHON_MIN_VER_FOUND) + message(FATAL_ERROR "Python 2.5 or greater required to build VOLK") +endif() + +if(NOT CHEETAH_FOUND) + message(FATAL_ERROR "Cheetah templates required to build VOLK") +endif() + +if(MSVC) + if (NOT DEFINED BOOST_ALL_DYN_LINK) + set(BOOST_ALL_DYN_LINK TRUE) + endif() + set(BOOST_ALL_DYN_LINK "${BOOST_ALL_DYN_LINK}" CACHE BOOL "boost enable dynamic linking") + if(BOOST_ALL_DYN_LINK) + add_definitions(-DBOOST_ALL_DYN_LINK) #setup boost auto-linking in msvc + else(BOOST_ALL_DYN_LINK) + unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link + endif(BOOST_ALL_DYN_LINK) +endif(MSVC) +include(VolkBoost) + +if(NOT Boost_FOUND) + message(FATAL_ERROR "VOLK Requires boost to build") +endif() + +option(ENABLE_ORC "Enable Orc" True) +if(ENABLE_ORC) + find_package(ORC) +else(ENABLE_ORC) + message(STATUS "Disabling use of ORC") +endif(ENABLE_ORC) + +######################################################################## +# Setup the package config file +######################################################################## +#set variables found in the pc.in file +set(prefix ${CMAKE_INSTALL_PREFIX}) +set(exec_prefix "\${prefix}") +set(libdir "\${exec_prefix}/lib${LIB_SUFFIX}") +set(includedir "\${prefix}/include") + +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.pc +@ONLY) + +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.pc + DESTINATION lib${LIB_SUFFIX}/pkgconfig + COMPONENT "volk_gnsssdr_devel" +) + +######################################################################## +# Install all headers in the include directories +######################################################################## +set(VOLK_RUNTIME_DIR bin) +set(VOLK_LIBRARY_DIR lib${LIB_SUFFIX}) +set(VOLK_INCLUDE_DIR include) + +install( + DIRECTORY ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr + DESTINATION include COMPONENT "volk_gnsssdr_devel" + FILES_MATCHING PATTERN "*.h" +) + +install(FILES + ${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_prefs.h + ${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_complex.h + ${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_common.h + ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h + ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h + ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h + ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_typedefs.h + ${CMAKE_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_malloc.h + DESTINATION include/volk_gnsssdr + COMPONENT "volk_gnsssdr_devel" +) + +######################################################################## +# Install cmake search routine for external use +######################################################################## + +if(NOT CMAKE_MODULES_DIR) + set(CMAKE_MODULES_DIR lib${LIB_SUFFIX}/cmake) +endif(NOT CMAKE_MODULES_DIR) + +install( + FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/VolkConfig.cmake + DESTINATION ${CMAKE_MODULES_DIR}/volk_gnsssdr + COMPONENT "volk_gnsssdr_devel" +) + +######################################################################## +# On Apple only, set install name and use rpath correctly, if not already set +######################################################################## +if(APPLE) + if(NOT CMAKE_INSTALL_NAME_DIR) + set(CMAKE_INSTALL_NAME_DIR + ${CMAKE_INSTALL_PREFIX}/${GR_LIBRARY_DIR} CACHE + PATH "Library Install Name Destination Directory" FORCE) + endif(NOT CMAKE_INSTALL_NAME_DIR) + if(NOT CMAKE_INSTALL_RPATH) + set(CMAKE_INSTALL_RPATH + ${CMAKE_INSTALL_PREFIX}/${GR_LIBRARY_DIR} CACHE + PATH "Library Install RPath" FORCE) + endif(NOT CMAKE_INSTALL_RPATH) + if(NOT CMAKE_BUILD_WITH_INSTALL_RPATH) + set(CMAKE_BUILD_WITH_INSTALL_RPATH ON CACHE + BOOL "Do Build Using Library Install RPath" FORCE) + endif(NOT CMAKE_BUILD_WITH_INSTALL_RPATH) +endif(APPLE) + +######################################################################## +# Setup the library +######################################################################## +add_subdirectory(lib) + +######################################################################## +# And the utility apps +######################################################################## +add_subdirectory(apps) +add_subdirectory(python/volk_gnsssdr_modtool) + +######################################################################## +# Print summary +######################################################################## +message(STATUS "Using install prefix: ${CMAKE_INSTALL_PREFIX}") diff --git a/src/algorithms/libs/volk_gnsssdr/apps/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr/apps/CMakeLists.txt new file mode 100644 index 000000000..3158c4280 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/apps/CMakeLists.txt @@ -0,0 +1,61 @@ +# +# Copyright 2011-2013 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +######################################################################## +# Setup profiler +######################################################################## +if(Boost_FOUND) + +if(MSVC) + include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc) +endif(MSVC) + +include_directories( + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_BINARY_DIR}/include + ${CMAKE_SOURCE_DIR}/lib + ${CMAKE_BINARY_DIR}/lib + ${Boost_INCLUDE_DIRS} +) + +# MAKE volk_gnsssdr_profile +add_executable(volk_gnsssdr_profile + ${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_profile.cc + ${CMAKE_SOURCE_DIR}/lib/qa_utils.cc +) + +target_link_libraries(volk_gnsssdr_profile volk_gnsssdr ${Boost_LIBRARIES}) + +install( + TARGETS volk_gnsssdr_profile + DESTINATION bin + COMPONENT "volk_gnsssdr" +) + +# MAKE volk_gnsssdr-config-info +add_executable(volk_gnsssdr-config-info volk_gnsssdr-config-info.cc) +target_link_libraries(volk_gnsssdr-config-info volk_gnsssdr ${Boost_LIBRARIES}) + +install( + TARGETS volk_gnsssdr-config-info + DESTINATION bin + COMPONENT "volk_gnsssdr" +) + +endif(Boost_FOUND) diff --git a/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr-config-info.cc b/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr-config-info.cc new file mode 100644 index 000000000..ec8c09525 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr-config-info.cc @@ -0,0 +1,96 @@ +/* -*- c++ -*- */ +/* + * Copyright 2013 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include +#include "volk_gnsssdr/volk_gnsssdr.h" +#include +#include + +namespace po = boost::program_options; + +int +main(int argc, char **argv) +{ + po::options_description desc("Program options: volk_gnsssdr-config-info [options]"); + po::variables_map vm; + + desc.add_options() + ("help,h", "print help message") + ("prefix", "print VOLK installation prefix") + ("builddate", "print VOLK build date (RFC2822 format)") + ("cc", "print VOLK C compiler version") + ("cflags", "print VOLK CFLAGS") + ("all-machines", "print VOLK machines built into library") + ("avail-machines", "print VOLK machines the current platform can use") + ("machine", "print the VOLK machine that will be used") + ("version,v", "print VOLK version") + ; + + try { + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + } + catch (po::error& error){ + std::cerr << "Error: " << error.what() << std::endl << std::endl; + std::cerr << desc << std::endl; + return 1; + } + + if(vm.size() == 0 || vm.count("help")) { + std::cout << desc << std::endl; + return 1; + } + + if(vm.count("prefix")) + std::cout << volk_gnsssdr_prefix() << std::endl; + + if(vm.count("builddate")) + std::cout << volk_gnsssdr_build_date() << std::endl; + + if(vm.count("version")) + std::cout << volk_gnsssdr_version() << std::endl; + + if(vm.count("cc")) + std::cout << volk_gnsssdr_c_compiler() << std::endl; + + if(vm.count("cflags")) + std::cout << volk_gnsssdr_compiler_flags() << std::endl; + + // stick an extra ';' to make output of this and avail-machines the + // same structure for easier parsing + if(vm.count("all-machines")) + std::cout << volk_gnsssdr_available_machines() << ";" << std::endl; + + if(vm.count("avail-machines")) { + volk_gnsssdr_list_machines(); + } + + if(vm.count("machine")) { + std::cout << volk_gnsssdr_get_machine() << std::endl; + } + + return 0; +} diff --git a/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr_profile.cc b/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr_profile.cc new file mode 100644 index 000000000..c9ca8756c --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/apps/volk_gnsssdr_profile.cc @@ -0,0 +1,163 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012-2014 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include "qa_utils.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = boost::filesystem; + +int main(int argc, char *argv[]) { + // Adding program options + boost::program_options::options_description desc("Options"); + desc.add_options() + ("help,h", "Print help messages") + ("benchmark,b", + boost::program_options::value()->default_value( false ) + ->implicit_value( true ), + "Run all kernels (benchmark mode)") + ("tests-regex,R", + boost::program_options::value(), + "Run tests matching regular expression.") + ; + + // Handle the options that were given + boost::program_options::variables_map vm; + bool benchmark_mode; + std::string kernel_regex; + bool store_results = true; + try { + boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), vm); + boost::program_options::notify(vm); + benchmark_mode = vm.count("benchmark")?vm["benchmark"].as():false; + if ( vm.count("tests-regex" ) ) { + kernel_regex = vm["tests-regex"].as(); + store_results = false; + std::cout << "Warning: using a regexp will not save results to a config" << std::endl; + } + else { + kernel_regex = ".*"; + store_results = true; + } + } catch (boost::program_options::error& error) { + std::cerr << "Error: " << error.what() << std::endl << std::endl; + std::cerr << desc << std::endl; + return 1; + } + /** --help option +*/ + if ( vm.count("help") ) + { + std::cout << "The VOLK profiler." << std::endl + << desc << std::endl; + return 0; + } + + + // Run tests + std::vector results; + + //VOLK_PROFILE(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_16i_max_star_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_32u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_64u_popcnt, 0, 0, 2046, 10000, &results, benchmark_mode, kernel_regex); + //VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, lv_32fc_t(1.0, 0.5), 204602, 1000, &results, benchmark_mode, kernel_regex); + + // Until we can update the config on a kernel by kernel basis + // do not overwrite volk_gnsssdr_config when using a regex. + + //GNSS-SDR PROTO-KERNELS + //lv_32fc_t sfv = lv_cmake((float)1, (float)2); + //VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, sfv, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_16i_s32f_convert_32f, 1e-4, 32768.0, 204602, 10000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_32f_accumulator_s32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_32f_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8i_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8i_max_s8i, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_32f_x2_add_32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_32fc_conjugate_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_32fc_magnitude_squared_32f, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_32fc_x2_dot_prod_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_32fc_x2_multiply_32fc, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + VOLK_PROFILE(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex); + if(store_results) { + char path[1024]; + volk_gnsssdr_get_config_path(path); + + const fs::path config_path(path); + + if (not fs::exists(config_path.branch_path())) + { + std::cout << "Creating " << config_path.branch_path() << "..." << std::endl; + fs::create_directories(config_path.branch_path()); + } + + std::cout << "Writing " << config_path << "..." << std::endl; + std::ofstream config(config_path.string().c_str()); + if(!config.is_open()) { //either we don't have write access or we don't have the dir yet + std::cout << "Error opening file " << config_path << std::endl; + } + + config << "\ +#this file is generated by volk_gnsssdr_profile.\n\ +#the function name is followed by the preferred architecture.\n\ +"; + + BOOST_FOREACH(std::string result, results) { + config << result << std::endl; + } + config.close(); + } + else { + std::cout << "Warning: config not generated" << std::endl; + } +} diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/CMakeParseArgumentsCopy.cmake b/src/algorithms/libs/volk_gnsssdr/cmake/CMakeParseArgumentsCopy.cmake new file mode 100644 index 000000000..7ce4c49ae --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/cmake/CMakeParseArgumentsCopy.cmake @@ -0,0 +1,138 @@ +# CMAKE_PARSE_ARGUMENTS( args...) +# +# CMAKE_PARSE_ARGUMENTS() is intended to be used in macros or functions for +# parsing the arguments given to that macro or function. +# It processes the arguments and defines a set of variables which hold the +# values of the respective options. +# +# The argument contains all options for the respective macro, +# i.e. keywords which can be used when calling the macro without any value +# following, like e.g. the OPTIONAL keyword of the install() command. +# +# The argument contains all keywords for this macro +# which are followed by one value, like e.g. DESTINATION keyword of the +# install() command. +# +# The argument contains all keywords for this macro +# which can be followed by more than one value, like e.g. the TARGETS or +# FILES keywords of the install() command. +# +# When done, CMAKE_PARSE_ARGUMENTS() will have defined for each of the +# keywords listed in , and +# a variable composed of the given +# followed by "_" and the name of the respective keyword. +# These variables will then hold the respective value from the argument list. +# For the keywords this will be TRUE or FALSE. +# +# All remaining arguments are collected in a variable +# _UNPARSED_ARGUMENTS, this can be checked afterwards to see whether +# your macro was called with unrecognized parameters. +# +# As an example here a my_install() macro, which takes similar arguments as the +# real install() command: +# +# function(MY_INSTALL) +# set(options OPTIONAL FAST) +# set(oneValueArgs DESTINATION RENAME) +# set(multiValueArgs TARGETS CONFIGURATIONS) +# cmake_parse_arguments(MY_INSTALL "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) +# ... +# +# Assume my_install() has been called like this: +# my_install(TARGETS foo bar DESTINATION bin OPTIONAL blub) +# +# After the cmake_parse_arguments() call the macro will have set the following +# variables: +# MY_INSTALL_OPTIONAL = TRUE +# MY_INSTALL_FAST = FALSE (this option was not used when calling my_install() +# MY_INSTALL_DESTINATION = "bin" +# MY_INSTALL_RENAME = "" (was not used) +# MY_INSTALL_TARGETS = "foo;bar" +# MY_INSTALL_CONFIGURATIONS = "" (was not used) +# MY_INSTALL_UNPARSED_ARGUMENTS = "blub" (no value expected after "OPTIONAL" +# +# You can the continue and process these variables. +# +# Keywords terminate lists of values, e.g. if directly after a one_value_keyword +# another recognized keyword follows, this is interpreted as the beginning of +# the new option. +# E.g. my_install(TARGETS foo DESTINATION OPTIONAL) would result in +# MY_INSTALL_DESTINATION set to "OPTIONAL", but MY_INSTALL_DESTINATION would +# be empty and MY_INSTALL_OPTIONAL would be set to TRUE therefor. + +#============================================================================= +# Copyright 2010 Alexander Neundorf +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + + +if(__CMAKE_PARSE_ARGUMENTS_INCLUDED) + return() +endif() +set(__CMAKE_PARSE_ARGUMENTS_INCLUDED TRUE) + + +function(CMAKE_PARSE_ARGUMENTS prefix _optionNames _singleArgNames _multiArgNames) + # first set all result variables to empty/FALSE + foreach(arg_name ${_singleArgNames} ${_multiArgNames}) + set(${prefix}_${arg_name}) + endforeach(arg_name) + + foreach(option ${_optionNames}) + set(${prefix}_${option} FALSE) + endforeach(option) + + set(${prefix}_UNPARSED_ARGUMENTS) + + set(insideValues FALSE) + set(currentArgName) + + # now iterate over all arguments and fill the result variables + foreach(currentArg ${ARGN}) + list(FIND _optionNames "${currentArg}" optionIndex) # ... then this marks the end of the arguments belonging to this keyword + list(FIND _singleArgNames "${currentArg}" singleArgIndex) # ... then this marks the end of the arguments belonging to this keyword + list(FIND _multiArgNames "${currentArg}" multiArgIndex) # ... then this marks the end of the arguments belonging to this keyword + + if(${optionIndex} EQUAL -1 AND ${singleArgIndex} EQUAL -1 AND ${multiArgIndex} EQUAL -1) + if(insideValues) + if("${insideValues}" STREQUAL "SINGLE") + set(${prefix}_${currentArgName} ${currentArg}) + set(insideValues FALSE) + elseif("${insideValues}" STREQUAL "MULTI") + list(APPEND ${prefix}_${currentArgName} ${currentArg}) + endif() + else(insideValues) + list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg}) + endif(insideValues) + else() + if(NOT ${optionIndex} EQUAL -1) + set(${prefix}_${currentArg} TRUE) + set(insideValues FALSE) + elseif(NOT ${singleArgIndex} EQUAL -1) + set(currentArgName ${currentArg}) + set(${prefix}_${currentArgName}) + set(insideValues "SINGLE") + elseif(NOT ${multiArgIndex} EQUAL -1) + set(currentArgName ${currentArg}) + set(${prefix}_${currentArgName}) + set(insideValues "MULTI") + endif() + endif() + + endforeach(currentArg) + + # propagate the result variables to the caller: + foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames}) + set(${prefix}_${arg_name} ${${prefix}_${arg_name}} PARENT_SCOPE) + endforeach(arg_name) + set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} PARENT_SCOPE) + +endfunction(CMAKE_PARSE_ARGUMENTS _options _singleArgs _multiArgs) diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/FindORC.cmake b/src/algorithms/libs/volk_gnsssdr/cmake/FindORC.cmake new file mode 100644 index 000000000..f21513f72 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/cmake/FindORC.cmake @@ -0,0 +1,36 @@ +FIND_PACKAGE(PkgConfig) +PKG_CHECK_MODULES(PC_ORC "orc-0.4 > 0.4.11") + + + + +FIND_PROGRAM(ORCC_EXECUTABLE orcc + HINTS ${PC_ORC_TOOLSDIR} + PATHS ${ORC_ROOT}/bin ${CMAKE_INSTALL_PREFIX}/bin) + +FIND_PATH(ORC_INCLUDE_DIR NAMES orc/orc.h + HINTS ${PC_ORC_INCLUDEDIR} + PATHS ${ORC_ROOT}/include/orc-0.4 ${CMAKE_INSTALL_PREFIX}/include/orc-0.4) + + +FIND_PATH(ORC_LIBRARY_DIR NAMES ${CMAKE_SHARED_LIBRARY_PREFIX}orc-0.4${CMAKE_SHARED_LIBRARY_SUFFIX} + HINTS ${PC_ORC_LIBDIR} + PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}) + +FIND_LIBRARY(ORC_LIB orc-0.4 + HINTS ${PC_ORC_LIBRARY_DIRS} + PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} ${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}) + +LIST(APPEND ORC_LIBRARY + ${ORC_LIB} +) + + +SET(ORC_INCLUDE_DIRS ${ORC_INCLUDE_DIR}) +SET(ORC_LIBRARIES ${ORC_LIBRARY}) +SET(ORC_LIBRARY_DIRS ${ORC_LIBRARY_DIR}) + +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(ORC "orc files" ORC_LIBRARY ORC_INCLUDE_DIR ORCC_EXECUTABLE) + +mark_as_advanced(ORC_INCLUDE_DIR ORC_LIBRARY ORCC_EXECUTABLE) diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/GrPython.cmake b/src/algorithms/libs/volk_gnsssdr/cmake/GrPython.cmake new file mode 100644 index 000000000..b7b561b7b --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/cmake/GrPython.cmake @@ -0,0 +1,234 @@ +# Copyright 2010-2011,2013 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. + +if(DEFINED __INCLUDED_VOLK_PYTHON_CMAKE) + return() +endif() +set(__INCLUDED_VOLK_PYTHON_CMAKE TRUE) + +######################################################################## +# Setup the python interpreter: +# This allows the user to specify a specific interpreter, +# or finds the interpreter via the built-in cmake module. +######################################################################## +#this allows the user to override PYTHON_EXECUTABLE +if(PYTHON_EXECUTABLE) + + set(PYTHONINTERP_FOUND TRUE) + +#otherwise if not set, try to automatically find it +else(PYTHON_EXECUTABLE) + + #use the built-in find script + find_package(PythonInterp 2) + + #and if that fails use the find program routine + if(NOT PYTHONINTERP_FOUND) + find_program(PYTHON_EXECUTABLE NAMES python python2 python2.7 python2.6 python2.5) + if(PYTHON_EXECUTABLE) + set(PYTHONINTERP_FOUND TRUE) + endif(PYTHON_EXECUTABLE) + endif(NOT PYTHONINTERP_FOUND) + +endif(PYTHON_EXECUTABLE) + +#make the path to the executable appear in the cmake gui +set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} CACHE FILEPATH "python interpreter") + +#make sure we can use -B with python (introduced in 2.6) +if(PYTHON_EXECUTABLE) + execute_process( + COMMAND ${PYTHON_EXECUTABLE} -B -c "" + OUTPUT_QUIET ERROR_QUIET + RESULT_VARIABLE PYTHON_HAS_DASH_B_RESULT + ) + if(PYTHON_HAS_DASH_B_RESULT EQUAL 0) + set(PYTHON_DASH_B "-B") + endif() +endif(PYTHON_EXECUTABLE) + +######################################################################## +# Check for the existence of a python module: +# - desc a string description of the check +# - mod the name of the module to import +# - cmd an additional command to run +# - have the result variable to set +######################################################################## +macro(VOLK_PYTHON_CHECK_MODULE desc mod cmd have) + message(STATUS "") + message(STATUS "Python checking for ${desc}") + execute_process( + COMMAND ${PYTHON_EXECUTABLE} -c " +######################################### +try: import ${mod} +except: + try: ${mod} + except: exit(-1) +try: assert ${cmd} +except: exit(-1) +#########################################" + RESULT_VARIABLE ${have} + ) + if(${have} EQUAL 0) + message(STATUS "Python checking for ${desc} - found") + set(${have} TRUE) + else(${have} EQUAL 0) + message(STATUS "Python checking for ${desc} - not found") + set(${have} FALSE) + endif(${have} EQUAL 0) +endmacro(VOLK_PYTHON_CHECK_MODULE) + +######################################################################## +# Sets the python installation directory VOLK_PYTHON_DIR +######################################################################## +execute_process(COMMAND ${PYTHON_EXECUTABLE} -c " +from distutils import sysconfig +print sysconfig.get_python_lib(plat_specific=True, prefix='') +" OUTPUT_VARIABLE VOLK_PYTHON_DIR OUTPUT_STRIP_TRAILING_WHITESPACE +) +file(TO_CMAKE_PATH ${VOLK_PYTHON_DIR} VOLK_PYTHON_DIR) + +######################################################################## +# Create an always-built target with a unique name +# Usage: VOLK_UNIQUE_TARGET( ) +######################################################################## +function(VOLK_UNIQUE_TARGET desc) + file(RELATIVE_PATH reldir ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import re, hashlib +unique = hashlib.md5('${reldir}${ARGN}').hexdigest()[:5] +print(re.sub('\\W', '_', '${desc} ${reldir} ' + unique))" + OUTPUT_VARIABLE _target OUTPUT_STRIP_TRAILING_WHITESPACE) + add_custom_target(${_target} ALL DEPENDS ${ARGN}) +endfunction(VOLK_UNIQUE_TARGET) + +######################################################################## +# Install python sources (also builds and installs byte-compiled python) +######################################################################## +function(VOLK_PYTHON_INSTALL) + include(CMakeParseArgumentsCopy) + CMAKE_PARSE_ARGUMENTS(VOLK_PYTHON_INSTALL "" "DESTINATION;COMPONENT" "FILES;PROGRAMS" ${ARGN}) + + #################################################################### + if(VOLK_PYTHON_INSTALL_FILES) + #################################################################### + install(${ARGN}) #installs regular python files + + #create a list of all generated files + unset(pysrcfiles) + unset(pycfiles) + unset(pyofiles) + foreach(pyfile ${VOLK_PYTHON_INSTALL_FILES}) + get_filename_component(pyfile ${pyfile} ABSOLUTE) + list(APPEND pysrcfiles ${pyfile}) + + #determine if this file is in the source or binary directory + file(RELATIVE_PATH source_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${pyfile}) + string(LENGTH "${source_rel_path}" source_rel_path_len) + file(RELATIVE_PATH binary_rel_path ${CMAKE_CURRENT_BINARY_DIR} ${pyfile}) + string(LENGTH "${binary_rel_path}" binary_rel_path_len) + + #and set the generated path appropriately + if(${source_rel_path_len} GREATER ${binary_rel_path_len}) + set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${binary_rel_path}) + else() + set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${source_rel_path}) + endif() + list(APPEND pycfiles ${pygenfile}c) + list(APPEND pyofiles ${pygenfile}o) + + #ensure generation path exists + get_filename_component(pygen_path ${pygenfile} PATH) + file(MAKE_DIRECTORY ${pygen_path}) + + endforeach(pyfile) + + #the command to generate the pyc files + add_custom_command( + DEPENDS ${pysrcfiles} OUTPUT ${pycfiles} + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pycfiles} + ) + + #the command to generate the pyo files + add_custom_command( + DEPENDS ${pysrcfiles} OUTPUT ${pyofiles} + COMMAND ${PYTHON_EXECUTABLE} -O ${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pyofiles} + ) + + #create install rule and add generated files to target list + set(python_install_gen_targets ${pycfiles} ${pyofiles}) + install(FILES ${python_install_gen_targets} + DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION} + COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT} + ) + + + #################################################################### + elseif(VOLK_PYTHON_INSTALL_PROGRAMS) + #################################################################### + file(TO_NATIVE_PATH ${PYTHON_EXECUTABLE} pyexe_native) + + if (CMAKE_CROSSCOMPILING) + set(pyexe_native "/usr/bin/env python") + endif() + + foreach(pyfile ${VOLK_PYTHON_INSTALL_PROGRAMS}) + get_filename_component(pyfile_name ${pyfile} NAME) + get_filename_component(pyfile ${pyfile} ABSOLUTE) + string(REPLACE "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" pyexefile "${pyfile}.exe") + list(APPEND python_install_gen_targets ${pyexefile}) + + get_filename_component(pyexefile_path ${pyexefile} PATH) + file(MAKE_DIRECTORY ${pyexefile_path}) + + add_custom_command( + OUTPUT ${pyexefile} DEPENDS ${pyfile} + COMMAND ${PYTHON_EXECUTABLE} -c + "open('${pyexefile}','w').write('\#!${pyexe_native}\\n'+open('${pyfile}').read())" + COMMENT "Shebangin ${pyfile_name}" + VERBATIM + ) + + #on windows, python files need an extension to execute + get_filename_component(pyfile_ext ${pyfile} EXT) + if(WIN32 AND NOT pyfile_ext) + set(pyfile_name "${pyfile_name}.py") + endif() + + install(PROGRAMS ${pyexefile} RENAME ${pyfile_name} + DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION} + COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT} + ) + endforeach(pyfile) + + endif() + + VOLK_UNIQUE_TARGET("pygen" ${python_install_gen_targets}) + +endfunction(VOLK_PYTHON_INSTALL) + +######################################################################## +# Write the python helper script that generates byte code files +######################################################################## +file(WRITE ${CMAKE_BINARY_DIR}/python_compile_helper.py " +import sys, py_compile +files = sys.argv[1:] +srcs, gens = files[:len(files)/2], files[len(files)/2:] +for src, gen in zip(srcs, gens): + py_compile.compile(file=src, cfile=gen, doraise=True) +") diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/VolkBoost.cmake b/src/algorithms/libs/volk_gnsssdr/cmake/VolkBoost.cmake new file mode 100644 index 000000000..318820e10 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/cmake/VolkBoost.cmake @@ -0,0 +1,98 @@ +# Copyright 2010-2011 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. + +if(DEFINED __INCLUDED_VOLK_BOOST_CMAKE) + return() +endif() +set(__INCLUDED_VOLK_BOOST_CMAKE TRUE) + +######################################################################## +# Setup Boost and handle some system specific things +######################################################################## + +set(BOOST_REQUIRED_COMPONENTS + filesystem + system + unit_test_framework + program_options +) + +if(UNIX AND NOT BOOST_ROOT AND EXISTS "/usr/lib64") + list(APPEND BOOST_LIBRARYDIR "/usr/lib64") #fedora 64-bit fix +endif(UNIX AND NOT BOOST_ROOT AND EXISTS "/usr/lib64") + +if(MSVC) + set(BOOST_REQUIRED_COMPONENTS ${BOOST_REQUIRED_COMPONENTS} chrono) + + if (NOT DEFINED BOOST_ALL_DYN_LINK) + set(BOOST_ALL_DYN_LINK TRUE) + endif() + set(BOOST_ALL_DYN_LINK "${BOOST_ALL_DYN_LINK}" CACHE BOOL "boost enable dynamic linking") + if(BOOST_ALL_DYN_LINK) + add_definitions(-DBOOST_ALL_DYN_LINK) #setup boost auto-linking in msvc + else(BOOST_ALL_DYN_LINK) + unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link + endif(BOOST_ALL_DYN_LINK) +endif(MSVC) + +find_package(Boost "1.35" COMPONENTS ${BOOST_REQUIRED_COMPONENTS}) + +# This does not allow us to disable specific versions. It is used +# internally by cmake to know the formation newer versions. As newer +# Boost version beyond what is shown here are produced, we must extend +# this list. To disable Boost versions, see below. +set(Boost_ADDITIONAL_VERSIONS + "1.35.0" "1.35" "1.36.0" "1.36" "1.37.0" "1.37" "1.38.0" "1.38" "1.39.0" "1.39" + "1.40.0" "1.40" "1.41.0" "1.41" "1.42.0" "1.42" "1.43.0" "1.43" "1.44.0" "1.44" + "1.45.0" "1.45" "1.46.0" "1.46" "1.47.0" "1.47" "1.48.0" "1.48" "1.49.0" "1.49" + "1.50.0" "1.50" "1.51.0" "1.51" "1.52.0" "1.52" "1.53.0" "1.53" "1.54.0" "1.54" + "1.55.0" "1.55" "1.56.0" "1.56" "1.57.0" "1.57" "1.58.0" "1.58" "1.59.0" "1.59" + "1.60.0" "1.60" "1.61.0" "1.61" "1.62.0" "1.62" "1.63.0" "1.63" "1.64.0" "1.64" + "1.65.0" "1.65" "1.66.0" "1.66" "1.67.0" "1.67" "1.68.0" "1.68" "1.69.0" "1.69" +) + +# Boost 1.52 disabled, see https://svn.boost.org/trac/boost/ticket/7669 +# Similar problems with Boost 1.46 and 1.47. + +OPTION(ENABLE_BAD_BOOST "Enable known bad versions of Boost" OFF) +if(ENABLE_BAD_BOOST) + MESSAGE(STATUS "Enabling use of known bad versions of Boost.") +endif(ENABLE_BAD_BOOST) + +# For any unsuitable Boost version, add the version number below in +# the following format: XXYYZZ +# Where: +# XX is the major version ('10' for version 1) +# YY is the minor version number ('46' for 1.46) +# ZZ is the patcher version number (typically just '00') +set(Boost_NOGO_VERSIONS + 104600 104601 104700 105200 + ) + +foreach(ver ${Boost_NOGO_VERSIONS}) + if(${Boost_VERSION} EQUAL ${ver}) + if(NOT ENABLE_BAD_BOOST) + MESSAGE(STATUS "WARNING: Found a known bad version of Boost (v${Boost_VERSION}). Disabling.") + set(Boost_FOUND FALSE) + else(NOT ENABLE_BAD_BOOST) + MESSAGE(STATUS "WARNING: Found a known bad version of Boost (v${Boost_VERSION}). Continuing anyway.") + set(Boost_FOUND TRUE) + endif(NOT ENABLE_BAD_BOOST) + endif(${Boost_VERSION} EQUAL ${ver}) +endforeach(ver) diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/VolkConfig.cmake b/src/algorithms/libs/volk_gnsssdr/cmake/VolkConfig.cmake new file mode 100644 index 000000000..7d58b1923 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/cmake/VolkConfig.cmake @@ -0,0 +1,26 @@ +INCLUDE(FindPkgConfig) +PKG_CHECK_MODULES(PC_VOLK volk_gnsssdr) + +FIND_PATH( + VOLK_INCLUDE_DIRS + NAMES volk_gnsssdr/volk_gnsssdr.h + HINTS $ENV{VOLK_DIR}/include + ${PC_VOLK_INCLUDEDIR} + PATHS /usr/local/include + /usr/include +) + +FIND_LIBRARY( + VOLK_LIBRARIES + NAMES volk_gnsssdr + HINTS $ENV{VOLK_DIR}/lib + ${PC_VOLK_LIBDIR} + PATHS /usr/local/lib + /usr/local/lib64 + /usr/lib + /usr/lib64 +) + +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS) +MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS) diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/msvc/config.h b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/config.h new file mode 100644 index 000000000..43792c783 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/config.h @@ -0,0 +1,58 @@ +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_CONFIG_H_ // [ +#define _MSC_CONFIG_H_ + +//////////////////////////////////////////////////////////////////////// +// enable inline functions for C code +//////////////////////////////////////////////////////////////////////// +#ifndef __cplusplus +# define inline __inline +#endif + +//////////////////////////////////////////////////////////////////////// +// signed size_t +//////////////////////////////////////////////////////////////////////// +#include +typedef ptrdiff_t ssize_t; + +//////////////////////////////////////////////////////////////////////// +// rint functions +//////////////////////////////////////////////////////////////////////// +#include +static inline long lrint(double x){return (long)(x > 0.0 ? x + 0.5 : x - 0.5);} +static inline long lrintf(float x){return (long)(x > 0.0f ? x + 0.5f : x - 0.5f);} +static inline long long llrint(double x){return (long long)(x > 0.0 ? x + 0.5 : x - 0.5);} +static inline long long llrintf(float x){return (long long)(x > 0.0f ? x + 0.5f : x - 0.5f);} +static inline double rint(double x){return (x > 0.0)? floor(x + 0.5) : ceil(x - 0.5);} +static inline float rintf(float x){return (x > 0.0f)? floorf(x + 0.5f) : ceilf(x - 0.5f);} + +//////////////////////////////////////////////////////////////////////// +// math constants +//////////////////////////////////////////////////////////////////////// +#define INFINITY HUGE_VAL + +# define M_E 2.7182818284590452354 /* e */ +# define M_LOG2E 1.4426950408889634074 /* log_2 e */ +# define M_LOG10E 0.43429448190325182765 /* log_10 e */ +# define M_LN2 0.69314718055994530942 /* log_e 2 */ +# define M_LN10 2.30258509299404568402 /* log_e 10 */ +# define M_PI 3.14159265358979323846 /* pi */ +# define M_PI_2 1.57079632679489661923 /* pi/2 */ +# define M_PI_4 0.78539816339744830962 /* pi/4 */ +# define M_1_PI 0.31830988618379067154 /* 1/pi */ +# define M_2_PI 0.63661977236758134308 /* 2/pi */ +# define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ +# define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +# define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ + +//////////////////////////////////////////////////////////////////////// +// random and srandom +//////////////////////////////////////////////////////////////////////// +#include +static inline long int random (void) { return rand(); } +static inline void srandom (unsigned int seed) { srand(seed); } + +#endif // _MSC_CONFIG_H_ ] diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/msvc/inttypes.h b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/inttypes.h new file mode 100644 index 000000000..0a1b60fc1 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/inttypes.h @@ -0,0 +1,301 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdbool.h b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdbool.h new file mode 100644 index 000000000..ca4581d37 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdbool.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2005, 2006 Apple Computer, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + +#ifndef STDBOOL_WIN32_H +#define STDBOOL_WIN32_H + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef __cplusplus + +typedef unsigned char bool; + +#define true 1 +#define false 0 + +#ifndef CASSERT +#define CASSERT(exp, name) typedef int dummy##name [(exp) ? 1 : -1]; +#endif + +CASSERT(sizeof(bool) == 1, bool_is_one_byte) +CASSERT(true, true_is_true) +CASSERT(!false, false_is_false) + +#endif + +#endif diff --git a/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdint.h b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdint.h new file mode 100644 index 000000000..108bc8982 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/cmake/msvc/stdint.h @@ -0,0 +1,251 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#ifndef INTMAX_C +#define INTMAX_C INT64_C +#endif +#ifndef UINTMAX_C +#define UINTMAX_C UINT64_C +#endif + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/src/algorithms/libs/volk_gnsssdr/gen/archs.xml b/src/algorithms/libs/volk_gnsssdr/gen/archs.xml new file mode 100644 index 000000000..e570fe5d2 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/gen/archs.xml @@ -0,0 +1,204 @@ + + + + + + + + -maltivec + 16 + + + + + -mfloat-abi=softfp + + + + -mfloat-abi=hard + + + + -mfpu=neon + -funsafe-math-optimizations + 16 + + + + + -m32 + + + + + 0x80000001 + + + 3 + 0x80000001 + 29 + + -m64 + -m64 + + + + + 3 + 0x80000001 + 31 + + -m3dnow + -m3dnow + 8 + + + + + 3 + 0x80000001 + 5 + + -msse4.2 + -msse4.2 + 16 + + + + + 2 + 0x00000001 + 23 + + -mpopcnt + -mpopcnt + /arch:AVX + + + + + 3 + 0x00000001 + 23 + + -mmmx + -mmmx + /arch:SSE + 8 + + + + + 3 + 0x00000001 + 25 + + -msse + -msse + /arch:SSE + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + xmmintrin.h + 16 + + + + + 3 + 0x00000001 + 26 + + -msse2 + -msse2 + /arch:SSE2 + 16 + + + + + + + + + + + + 2 + 0x00000001 + 0 + + -msse3 + -msse3 + /arch:AVX + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + pmmintrin.h + 16 + + + + + 2 + 0x00000001 + 9 + + -mssse3 + -mssse3 + /arch:AVX + 16 + + + + + 2 + 0x80000001 + 6 + + -msse4a + -msse4a + 16 + + + + + 2 + 0x00000001 + 19 + + -msse4.1 + -msse4.1 + /arch:AVX + 16 + + + + + 2 + 0x00000001 + 20 + + -msse4.2 + -msse4.2 + /arch:AVX + 16 + + + + + 2 + 0x00000001 + 28 + + + + 2 + 0x00000001 + 27 + + + + -mavx + -mavx + /arch:AVX + 32 + + + diff --git a/src/algorithms/libs/volk_gnsssdr/gen/machines.xml b/src/algorithms/libs/volk_gnsssdr/gen/machines.xml new file mode 100644 index 000000000..357bf7519 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/gen/machines.xml @@ -0,0 +1,55 @@ + + + +generic orc| + + + + + +generic neon softfp|hardfp orc| + + + + +generic 32|64| mmx| sse sse2 orc| + + + +generic 32|64 mmx sse sse2 sse3 orc| + + + +generic 32|64 mmx sse sse2 sse3 ssse3 orc| + + + +generic 32|64 mmx sse sse2 sse3 sse4_a popcount orc| + + + +generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 orc| + + + +generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount orc| + + + + +generic 32|64| mmx| sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount avx orc| + + + +generic altivec + + + diff --git a/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_arch_defs.py b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_arch_defs.py new file mode 100644 index 000000000..3c75e1374 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_arch_defs.py @@ -0,0 +1,85 @@ +# +# Copyright 2012 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +archs = list() +arch_dict = dict() + +class arch_class: + def __init__(self, flags, checks, **kwargs): + for key, cast, failval in ( + ('name', str, None), + ('environment', str, None), + ('include', str, None), + ('alignment', int, 1) + ): + try: setattr(self, key, cast(kwargs[key])) + except: setattr(self, key, failval) + self.checks = checks + assert(self.name) + self._flags = flags + + def is_supported(self, compiler): + if not self._flags.keys(): return True + return compiler in self._flags.keys() + + def get_flags(self, compiler): + try: return self._flags[compiler] + except KeyError: return list() + + def __repr__(self): return self.name + +def register_arch(**kwargs): + arch = arch_class(**kwargs) + archs.append(arch) + arch_dict[arch.name] = arch + +######################################################################## +# register the arches +######################################################################## +#TODO skip the XML and put it here +from xml.dom import minidom +import os +gendir = os.path.dirname(__file__) +archs_xml = minidom.parse(os.path.join(gendir, 'archs.xml')).getElementsByTagName('arch') +for arch_xml in archs_xml: + kwargs = dict() + for attr in arch_xml.attributes.keys(): + kwargs[attr] = arch_xml.attributes[attr].value + for node in arch_xml.childNodes: + try: + name = node.tagName + val = arch_xml.getElementsByTagName(name)[0].firstChild.data + kwargs[name] = val + except: pass + checks = list() + for check_xml in arch_xml.getElementsByTagName("check"): + name = check_xml.attributes["name"].value + params = list() + for param_xml in check_xml.getElementsByTagName("param"): + params.append(param_xml.firstChild.data) + checks.append([name, params]) + flags = dict() + for flag_xml in arch_xml.getElementsByTagName("flag"): + name = flag_xml.attributes["compiler"].value + if not flags.has_key(name): flags[name] = list() + flags[name].append(flag_xml.firstChild.data) + #force kwargs keys to be of type str, not unicode for py25 + kwargs = dict((str(k), v) for k, v in kwargs.iteritems()) + register_arch(flags=flags, checks=checks, **kwargs) + +if __name__ == '__main__': + print archs diff --git a/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_compile_utils.py b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_compile_utils.py new file mode 100644 index 000000000..05de9a546 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_compile_utils.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# +# Copyright 2012 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import optparse +import volk_gnsssdr_arch_defs +import volk_gnsssdr_machine_defs + +def do_arch_flags_list(compiler): + output = list() + for arch in volk_gnsssdr_arch_defs.archs: + if not arch.is_supported(compiler): continue + fields = [arch.name] + arch.get_flags(compiler) + output.append(','.join(fields)) + print ';'.join(output) + +def do_machines_list(arch_names): + output = list() + for machine in volk_gnsssdr_machine_defs.machines: + machine_arch_set = set(machine.arch_names) + if set(arch_names).intersection(machine_arch_set) == machine_arch_set: + output.append(machine.name) + print ';'.join(output) + +def do_machine_flags_list(compiler, machine_name): + output = list() + machine = volk_gnsssdr_machine_defs.machine_dict[machine_name] + for arch in machine.archs: + output.extend(arch.get_flags(compiler)) + print ' '.join(output) + +def main(): + parser = optparse.OptionParser() + parser.add_option('--mode', type='string') + parser.add_option('--compiler', type='string') + parser.add_option('--archs', type='string') + parser.add_option('--machine', type='string') + (opts, args) = parser.parse_args() + + if opts.mode == 'arch_flags': return do_arch_flags_list(opts.compiler.lower()) + if opts.mode == 'machines': return do_machines_list(opts.archs.split(';')) + if opts.mode == 'machine_flags': return do_machine_flags_list(opts.compiler.lower(), opts.machine) + +if __name__ == '__main__': main() diff --git a/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py new file mode 100644 index 000000000..b3f03f627 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_kernel_defs.py @@ -0,0 +1,209 @@ +# +# Copyright 2011-2012 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +import os +import re +import sys +import glob + +######################################################################## +# Strip comments from a c/cpp file. +# Input is code string, output is code string without comments. +# http://stackoverflow.com/questions/241327/python-snippet-to-remove-c-and-c-comments +######################################################################## +def comment_remover(text): + def replacer(match): + s = match.group(0) + if s.startswith('/'): + return "" + else: + return s + pattern = re.compile( + r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', + re.DOTALL | re.MULTILINE + ) + return re.sub(pattern, replacer, text) + +######################################################################## +# Split code into nested sections according to ifdef preprocessor macros +######################################################################## +def split_into_nested_ifdef_sections(code): + sections = list() + section = '' + header = 'text' + in_section_depth = 0 + for i, line in enumerate(code.splitlines()): + m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line) + line_is = 'normal' + if m: + p0, p1, fcn, stuff = m.groups() + if fcn in ('if', 'ifndef', 'ifdef'): line_is = 'if' + if fcn in ('else', 'elif'): line_is = 'else' + if fcn in ('endif',): line_is = 'end' + + if line_is == 'if': in_section_depth += 1 + if line_is == 'end': in_section_depth -= 1 + + if in_section_depth == 1 and line_is == 'if': + sections.append((header, section)) + section = '' + header = line + continue + + if in_section_depth == 1 and line_is == 'else': + sections.append((header, section)) + section = '' + header = line + continue + + if in_section_depth == 0 and line_is == 'end': + sections.append((header, section)) + section = '' + header = 'text' + continue + + section += line + '\n' + + sections.append((header, section)) #and pack remainder into sections + sections = [sec for sec in sections if sec[1].strip()] #filter empty sections + + #recurse into non-text sections to fill subsections + for i, (header, section) in enumerate(sections): + if header == 'text': continue + sections[i] = (header, split_into_nested_ifdef_sections(section)) + + return sections + +######################################################################## +# Recursive print of sections to test code above +######################################################################## +def print_sections(sections, indent = ' '): + for header, body in sections: + if header == 'text': + print indent, ('\n'+indent).join(body.splitlines()) + continue + print indent.replace(' ', '-') + '>', header + print_sections(body, indent + ' ') + +######################################################################## +# Flatten a section to just body text +######################################################################## +def flatten_section_text(sections): + output = '' + for hdr, bdy in sections: + if hdr != 'text': output += flatten_section_text(bdy) + else: output += bdy + return output + +######################################################################## +# Extract kernel info from section, represent as an implementation +######################################################################## +class impl_class: + def __init__(self, kern_name, header, body): + #extract LV_HAVE_* + self.deps = set(map(str.lower, re.findall('LV_HAVE_(\w+)', header))) + #extract function suffix and args + body = flatten_section_text(body) + try: + fcn_matcher = re.compile('^.*(%s\\w*)\\s*\\((.*)$'%kern_name, re.DOTALL | re.MULTILINE) + body = body.split('{')[0].rsplit(')', 1)[0] #get the part before the open ){ bracket + m = fcn_matcher.match(body) + impl_name, the_rest = m.groups() + self.name = impl_name.replace(kern_name+'_', '') + self.args = list() + fcn_args = the_rest.split(',') + for fcn_arg in fcn_args: + arg_matcher = re.compile('^\s*(.*\\W)\s*(\w+)\s*$', re.DOTALL | re.MULTILINE) + m = arg_matcher.match(fcn_arg) + arg_type, arg_name = m.groups() + self.args.append((arg_type, arg_name)) + except Exception as ex: + raise Exception, 'I cant parse the function prototype from: %s in %s\n%s'%(kern_name, body, ex) + + assert self.name + self.is_aligned = self.name.startswith('a_') + + def __repr__(self): + return self.name + +######################################################################## +# Get sets of LV_HAVE_* from the code +######################################################################## +def extract_lv_haves(code): + haves = list() + for line in code.splitlines(): + if not line.strip().startswith('#'): continue + have_set = set(map(str.lower, re.findall('LV_HAVE_(\w+)', line))) + if have_set: haves.append(have_set) + return haves + +######################################################################## +# Represent a processing kernel, parse from file +######################################################################## +class kernel_class: + def __init__(self, kernel_file): + self.name = os.path.splitext(os.path.basename(kernel_file))[0] + self.pname = self.name.replace('volk_gnsssdr_', 'p_') + code = open(kernel_file, 'r').read() + code = comment_remover(code) + sections = split_into_nested_ifdef_sections(code) + self._impls = list() + for header, section in sections: + if 'ifndef' not in header.lower(): continue + for sub_hdr, body in section: + if 'if' not in sub_hdr.lower(): continue + if 'LV_HAVE_' not in sub_hdr: continue + self._impls.append(impl_class( + kern_name=self.name, header=sub_hdr, body=body, + )) + assert(self._impls) + self.has_dispatcher = False + for impl in self._impls: + if impl.name == 'dispatcher': + self._impls.remove(impl) + self.has_dispatcher = True + break + self.args = self._impls[0].args + self.arglist_types = ', '.join([a[0] for a in self.args]) + self.arglist_full = ', '.join(['%s %s'%a for a in self.args]) + self.arglist_names = ', '.join([a[1] for a in self.args]) + + def get_impls(self, archs): + archs = set(archs) + impls = list() + for impl in self._impls: + if impl.deps.intersection(archs) == impl.deps: + impls.append(impl) + return impls + + def __repr__(self): + return self.name + +######################################################################## +# Extract information from the VOLK kernels +######################################################################## +__file__ = os.path.abspath(__file__) +srcdir = os.path.dirname(os.path.dirname(__file__)) +kernel_files = glob.glob(os.path.join(srcdir, "kernels", "volk_gnsssdr", "*.h")) +kernels = map(kernel_class, kernel_files) + +if __name__ == '__main__': + print kernels diff --git a/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_machine_defs.py b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_machine_defs.py new file mode 100644 index 000000000..174106634 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_machine_defs.py @@ -0,0 +1,74 @@ +# +# Copyright 2012 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +from volk_gnsssdr_arch_defs import arch_dict + +machines = list() +machine_dict = dict() + +class machine_class: + def __init__(self, name, archs): + self.name = name + self.archs = list() + self.arch_names = list() + for arch_name in archs: + if not arch_name: continue + arch = arch_dict[arch_name] + self.archs.append(arch) + self.arch_names.append(arch_name) + self.alignment = max(map(lambda a: a.alignment, self.archs)) + + def __repr__(self): return self.name + +def register_machine(name, archs): + for i, arch_name in enumerate(archs): + if '|' in arch_name: #handle special arch names with the '|' + for arch_sub in arch_name.split('|'): + if arch_sub: + register_machine(name+'_'+arch_sub, archs[:i] + [arch_sub] + archs[i+1:]) + else: + register_machine(name, archs[:i] + archs[i+1:]) + return + machine = machine_class(name=name, archs=archs) + machines.append(machine) + machine_dict[machine.name] = machine + +######################################################################## +# register the machines +######################################################################## +#TODO skip the XML and put it here +from xml.dom import minidom +import os +gendir = os.path.dirname(__file__) +machines_xml = minidom.parse(os.path.join(gendir, 'machines.xml')).getElementsByTagName('machine') +for machine_xml in machines_xml: + kwargs = dict() + for attr in machine_xml.attributes.keys(): + kwargs[attr] = machine_xml.attributes[attr].value + for node in machine_xml.childNodes: + try: + name = node.tagName + val = machine_xml.getElementsByTagName(name)[0].firstChild.data + kwargs[name] = val + except: pass + kwargs['archs'] = kwargs['archs'].split() + #force kwargs keys to be of type str, not unicode for py25 + kwargs = dict((str(k), v) for k, v in kwargs.iteritems()) + register_machine(**kwargs) + +if __name__ == '__main__': + print machines diff --git a/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_tmpl_utils.py b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_tmpl_utils.py new file mode 100644 index 000000000..c4577af62 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/gen/volk_gnsssdr_tmpl_utils.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# +# Copyright 2012 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +import os +import re +import sys +import optparse +import volk_gnsssdr_arch_defs +import volk_gnsssdr_machine_defs +import volk_gnsssdr_kernel_defs +from Cheetah import Template + +def __escape_pre_processor(code): + out = list() + for line in code.splitlines(): + m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line) + if m: + p0, p1, fcn, stuff = m.groups() + conly = fcn in ('include', 'define', 'ifdef', 'ifndef', 'endif', 'elif', 'pragma') + both = fcn in ('if', 'else') + istmpl = '$' in stuff + if 'defined' in stuff: istmpl = False + if conly or (both and not istmpl): + line = '%s\\#%s%s%s'%(p0, p1, fcn, stuff) + out.append(line) + return '\n'.join(out) + +def __parse_tmpl(_tmpl, **kwargs): + defs = { + 'archs': volk_gnsssdr_arch_defs.archs, + 'arch_dict': volk_gnsssdr_arch_defs.arch_dict, + 'machines': volk_gnsssdr_machine_defs.machines, + 'machine_dict': volk_gnsssdr_machine_defs.machine_dict, + 'kernels': volk_gnsssdr_kernel_defs.kernels, + } + defs.update(kwargs) + _tmpl = __escape_pre_processor(_tmpl) + _tmpl = """ + +/* this file was generated by volk_gnsssdr template utils, do not edit! */ + +""" + _tmpl + return str(Template.Template(_tmpl, defs)) + +def main(): + parser = optparse.OptionParser() + parser.add_option('--input', type='string') + parser.add_option('--output', type='string') + (opts, args) = parser.parse_args() + + output = __parse_tmpl(open(opts.input).read(), args=args) + if opts.output: open(opts.output, 'w').write(output) + else: print output + +if __name__ == '__main__': main() diff --git a/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/constants.h b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/constants.h new file mode 100644 index 000000000..f08960557 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/constants.h @@ -0,0 +1,39 @@ +/* -*- c++ -*- */ +/* + * Copyright 2006,2009,2013 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_CONSTANTS_H +#define INCLUDED_VOLK_CONSTANTS_H + +#include + +__VOLK_DECL_BEGIN + +VOLK_API char* volk_gnsssdr_prefix(); +VOLK_API char* volk_gnsssdr_build_date(); +VOLK_API char* volk_gnsssdr_version(); +VOLK_API char* volk_gnsssdr_c_compiler(); +VOLK_API char* volk_gnsssdr_compiler_flags(); +VOLK_API char* volk_gnsssdr_available_machines(); + +__VOLK_DECL_END + +#endif /* INCLUDED_VOLK_CONSTANTS_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_common.h b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_common.h new file mode 100644 index 000000000..c48057cd9 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_common.h @@ -0,0 +1,96 @@ +#ifndef INCLUDED_LIBVOLK_COMMON_H +#define INCLUDED_LIBVOLK_COMMON_H + +//////////////////////////////////////////////////////////////////////// +// Cross-platform attribute macros +//////////////////////////////////////////////////////////////////////// +#if defined __GNUC__ +# define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x))) +# define __VOLK_ATTR_UNUSED __attribute__((unused)) +# define __VOLK_ATTR_INLINE __attribute__((always_inline)) +# define __VOLK_ATTR_DEPRECATED __attribute__((deprecated)) +# if __GNUC__ >= 4 +# define __VOLK_ATTR_EXPORT __attribute__((visibility("default"))) +# define __VOLK_ATTR_IMPORT __attribute__((visibility("default"))) +# else +# define __VOLK_ATTR_EXPORT +# define __VOLK_ATTR_IMPORT +# endif +#elif _MSC_VER +# define __VOLK_ATTR_ALIGNED(x) __declspec(align(x)) +# define __VOLK_ATTR_UNUSED +# define __VOLK_ATTR_INLINE __forceinline +# define __VOLK_ATTR_DEPRECATED __declspec(deprecated) +# define __VOLK_ATTR_EXPORT __declspec(dllexport) +# define __VOLK_ATTR_IMPORT __declspec(dllimport) +#else +# define __VOLK_ATTR_ALIGNED(x) +# define __VOLK_ATTR_UNUSED +# define __VOLK_ATTR_INLINE +# define __VOLK_ATTR_DEPRECATED +# define __VOLK_ATTR_EXPORT +# define __VOLK_ATTR_IMPORT +#endif + +//////////////////////////////////////////////////////////////////////// +// Ignore annoying warnings in MSVC +//////////////////////////////////////////////////////////////////////// +#if defined(_MSC_VER) +# pragma warning(disable: 4244) //'conversion' conversion from 'type1' to 'type2', possible loss of data +# pragma warning(disable: 4305) //'identifier' : truncation from 'type1' to 'type2' +#endif + +//////////////////////////////////////////////////////////////////////// +// C-linkage declaration macros +// FIXME: due to the usage of complex.h, require gcc for c-linkage +//////////////////////////////////////////////////////////////////////// +#if defined(__cplusplus) && (__GNUC__) +# define __VOLK_DECL_BEGIN extern "C" { +# define __VOLK_DECL_END } +#else +# define __VOLK_DECL_BEGIN +# define __VOLK_DECL_END +#endif + +//////////////////////////////////////////////////////////////////////// +// Define VOLK_API for library symbols +// http://gcc.gnu.org/wiki/Visibility +//////////////////////////////////////////////////////////////////////// +#ifdef volk_gnsssdr_EXPORTS +# define VOLK_API __VOLK_ATTR_EXPORT +#else +# define VOLK_API __VOLK_ATTR_IMPORT +#endif + +//////////////////////////////////////////////////////////////////////// +// The bit128 union used by some +//////////////////////////////////////////////////////////////////////// +#include + +#ifdef LV_HAVE_SSE +#include +#endif + +#ifdef LV_HAVE_SSE2 +#include +#endif + +union bit128{ + uint16_t i16[8]; + uint32_t i[4]; + float f[4]; + double d[2]; + + #ifdef LV_HAVE_SSE + __m128 float_vec; + #endif + + #ifdef LV_HAVE_SSE2 + __m128i int_vec; + __m128d double_vec; + #endif +}; + +#define bit128_p(x) ((union bit128 *)(x)) + +#endif /*INCLUDED_LIBVOLK_COMMON_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_complex.h b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_complex.h new file mode 100644 index 000000000..5bd925044 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_complex.h @@ -0,0 +1,86 @@ +#ifndef INCLUDE_VOLK_COMPLEX_H +#define INCLUDE_VOLK_COMPLEX_H + +/*! + * \brief Provide typedefs and operators for all complex types in C and C++. + * + * The typedefs encompass all signed integer and floating point types. + * Each operator function is intended to work across all data types. + * Under C++, these operators are defined as inline templates. + * Under C, these operators are defined as preprocessor macros. + * The use of macros makes the operators agnostic to the type. + * + * The following operator functions are defined: + * - lv_cmake - make a complex type from components + * - lv_creal - get the real part of the complex number + * - lv_cimag - get the imaginary part of the complex number + * - lv_conj - take the conjugate of the complex number + */ + +#ifdef __cplusplus + +#include +#include + +typedef std::complex lv_8sc_t; +typedef std::complex lv_16sc_t; +typedef std::complex lv_32sc_t; +typedef std::complex lv_64sc_t; +typedef std::complex lv_32fc_t; +typedef std::complex lv_64fc_t; + +template inline std::complex lv_cmake(const T &r, const T &i){ + return std::complex(r, i); +} + +template inline typename T::value_type lv_creal(const T &x){ + return x.real(); +} + +template inline typename T::value_type lv_cimag(const T &x){ + return x.imag(); +} + +template inline T lv_conj(const T &x){ + return std::conj(x); +} + +#else /* __cplusplus */ + +#include + +typedef char complex lv_8sc_t; +typedef short complex lv_16sc_t; +typedef long complex lv_32sc_t; +typedef long long complex lv_64sc_t; +typedef float complex lv_32fc_t; +typedef double complex lv_64fc_t; + +#define lv_cmake(r, i) ((r) + _Complex_I*(i)) + +// When GNUC is available, use the complex extensions. +// The extensions always return the correct value type. +// http://gcc.gnu.org/onlinedocs/gcc/Complex.html +#ifdef __GNUC__ + +#define lv_creal(x) (__real__(x)) + +#define lv_cimag(x) (__imag__(x)) + +#define lv_conj(x) (~(x)) + +// When not available, use the c99 complex function family, +// which always returns double regardless of the input type. +#else /* __GNUC__ */ + +#define lv_creal(x) (creal(x)) + +#define lv_cimag(x) (cimag(x)) + +#define lv_conj(x) (conj(x)) + +#endif /* __GNUC__ */ + +#endif /* __cplusplus */ + +#endif /* INCLUDE_VOLK_COMPLEX_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_malloc.h b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_malloc.h new file mode 100644 index 000000000..7136bc135 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_malloc.h @@ -0,0 +1,66 @@ +/* -*- c -*- */ +/* + * Copyright 2014 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_MALLOC_H +#define INCLUDED_VOLK_MALLOC_H + +#include +#include + +__VOLK_DECL_BEGIN + +/*! + * \brief Allocate \p size bytes of data aligned to \p alignment. + * + * \details + * Because we don't have a standard method to allocate buffers in + * memory that are guaranteed to be on an alignment, VOLK handles this + * itself. The volk_gnsssdr_malloc function behaves like malloc in that it + * returns a pointer to the allocated memory. However, it also takes + * in an alignment specfication, which is usually something like 16 or + * 32 to ensure that the aligned memory is located on a particular + * byte boundary for use with SIMD. + * + * Internally, the volk_gnsssdr_malloc first checks if the compiler is C11 + * compliant and uses the new aligned_alloc method. If not, it checks + * if the system is POSIX compliant and uses posix_memalign. If that + * fails, volk_gnsssdr_malloc handles the memory allocation and alignment + * internally. + * + * Because of the ways in which volk_gnsssdr_malloc may allocate memory, it is + * important to always free volk_gnsssdr_malloc pointers using volk_gnsssdr_free. + * + * \param size The number of bytes to allocate. + * \param alignment The byte alignment of the allocated memory. + * \return pointer to aligned memory. + */ +VOLK_API void *volk_gnsssdr_malloc(size_t size, size_t alignment); + +/*! + * \brief Free's memory allocated by volk_gnsssdr_malloc. + * \param aptr The aligned pointer allocaed by volk_gnsssdr_malloc. + */ +VOLK_API void volk_gnsssdr_free(void *aptr); + +__VOLK_DECL_END + +#endif /* INCLUDED_VOLK_MALLOC_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_prefs.h b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_prefs.h new file mode 100644 index 000000000..6e13fc07a --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_prefs.h @@ -0,0 +1,28 @@ +#ifndef INCLUDED_VOLK_PREFS_H +#define INCLUDED_VOLK_PREFS_H + +#include +#include + +__VOLK_DECL_BEGIN + +typedef struct volk_gnsssdr_arch_pref +{ + char name[128]; //name of the kernel + char impl_a[128]; //best aligned impl + char impl_u[128]; //best unaligned impl +} volk_gnsssdr_arch_pref_t; + +//////////////////////////////////////////////////////////////////////// +// get path to volk_gnsssdr_config profiling info +//////////////////////////////////////////////////////////////////////// +VOLK_API void volk_gnsssdr_get_config_path(char *); + +//////////////////////////////////////////////////////////////////////// +// load prefs into global prefs struct +//////////////////////////////////////////////////////////////////////// +VOLK_API size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **); + +__VOLK_DECL_END + +#endif //INCLUDED_VOLK_PREFS_H diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/README.txt b/src/algorithms/libs/volk_gnsssdr/kernels/README.txt new file mode 100644 index 000000000..69ee93d06 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/README.txt @@ -0,0 +1,67 @@ +######################################################################## +# How to create custom kernel dispatchers +######################################################################## +A kernel dispatcher is kernel implementation that calls other kernel implementations. +By default, a dispatcher is generated by the build system for every kernel such that: + * the best aligned implemention is called when all pointer arguments are aligned, + * and otherwise the best unaligned implementation is called. + +The author of a VOLK kernel may create a custom dispatcher, +to be called in place of the automatically generated one. +A custom dispatcher may be useful to handle head and tail cases, +or to implement different alignment and bounds checking logic. + +######################################################################## +# Code for an example dispatcher w/ tail case +######################################################################## +#include + +#ifdef LV_HAVE_DISPATCHER + +static inline void volk_gnsssdr_32f_x2_add_32f_dispatcher(float* cVector, const float* aVector, const float* bVector, unsigned int num_points) +{ + const unsigned int num_points_r = num_points%4; + const unsigned int num_points_x = num_points - num_points_r; + + if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(cVector, VOLK_OR_PTR(aVector, bVector)))) + { + volk_gnsssdr_32f_x2_add_32f_a(cVector, aVector, bVector, num_points_x); + } + else + { + volk_gnsssdr_32f_x2_add_32f_u(cVector, aVector, bVector, num_points_x); + } + + volk_gnsssdr_32f_x2_add_32f_g(cVector+num_points_x, aVector+num_points_x, bVector+num_points_x, num_points_r); +} + +#endif //LV_HAVE_DISPATCHER + +######################################################################## +# Code for an example dispatcher w/ tail case and accumulator +######################################################################## +#include + +#ifdef LV_HAVE_DISPATCHER + +static inline void volk_gnsssdr_32f_x2_dot_prod_32f_dispatcher(float * result, const float * input, const float * taps, unsigned int num_points) +{ + const unsigned int num_points_r = num_points%16; + const unsigned int num_points_x = num_points - num_points_r; + + if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(input, taps))) + { + volk_gnsssdr_32f_x2_dot_prod_32f_a(result, input, taps, num_points_x); + } + else + { + volk_gnsssdr_32f_x2_dot_prod_32f_u(result, input, taps, num_points_x); + } + + float result_tail = 0; + volk_gnsssdr_32f_x2_dot_prod_32f_g(&result_tail, input+num_points_x, taps+num_points_x, num_points_r); + + *result += result_tail; +} + +#endif //LV_HAVE_DISPATCHER diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_s32f_convert_32f.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_s32f_convert_32f.h new file mode 100644 index 000000000..ccb13171c --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16i_s32f_convert_32f.h @@ -0,0 +1,241 @@ +#ifndef INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H +#define INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H + +#include +#include + +#ifdef LV_HAVE_SSE4_1 +#include + + /*! + \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value + \param inputVector The 16 bit input data buffer + \param outputVector The floating point output data buffer + \param scalar The value divided against each point in the output buffer + \param num_points The number of data values to be converted + \note Output buffer does NOT need to be properly aligned + */ +static inline void volk_gnsssdr_16i_s32f_convert_32f_u_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int eighthPoints = num_points / 8; + + float* outputVectorPtr = outputVector; + __m128 invScalar = _mm_set_ps1(1.0/scalar); + int16_t* inputPtr = (int16_t*)inputVector; + __m128i inputVal; + __m128i inputVal2; + __m128 ret; + + for(;number < eighthPoints; number++){ + + // Load the 8 values + inputVal = _mm_loadu_si128((__m128i*)inputPtr); + + // Shift the input data to the right by 64 bits ( 8 bytes ) + inputVal2 = _mm_srli_si128(inputVal, 8); + + // Convert the lower 4 values into 32 bit words + inputVal = _mm_cvtepi16_epi32(inputVal); + inputVal2 = _mm_cvtepi16_epi32(inputVal2); + + ret = _mm_cvtepi32_ps(inputVal); + ret = _mm_mul_ps(ret, invScalar); + _mm_storeu_ps(outputVectorPtr, ret); + outputVectorPtr += 4; + + ret = _mm_cvtepi32_ps(inputVal2); + ret = _mm_mul_ps(ret, invScalar); + _mm_storeu_ps(outputVectorPtr, ret); + + outputVectorPtr += 4; + + inputPtr += 8; + } + + number = eighthPoints * 8; + for(; number < num_points; number++){ + outputVector[number] =((float)(inputVector[number])) / scalar; + } +} +#endif /* LV_HAVE_SSE4_1 */ + +#ifdef LV_HAVE_SSE +#include + + /*! + \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value + \param inputVector The 16 bit input data buffer + \param outputVector The floating point output data buffer + \param scalar The value divided against each point in the output buffer + \param num_points The number of data values to be converted + \note Output buffer does NOT need to be properly aligned + */ +static inline void volk_gnsssdr_16i_s32f_convert_32f_u_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* outputVectorPtr = outputVector; + __m128 invScalar = _mm_set_ps1(1.0/scalar); + int16_t* inputPtr = (int16_t*)inputVector; + __m128 ret; + + for(;number < quarterPoints; number++){ + ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0])); + + ret = _mm_mul_ps(ret, invScalar); + _mm_storeu_ps(outputVectorPtr, ret); + + inputPtr += 4; + outputVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + outputVector[number] = (float)(inputVector[number]) / scalar; + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value + \param inputVector The 16 bit input data buffer + \param outputVector The floating point output data buffer + \param scalar The value divided against each point in the output buffer + \param num_points The number of data values to be converted + \note Output buffer does NOT need to be properly aligned + */ +static inline void volk_gnsssdr_16i_s32f_convert_32f_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ + float* outputVectorPtr = outputVector; + const int16_t* inputVectorPtr = inputVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar; + } +} +#endif /* LV_HAVE_GENERIC */ + + + + +#endif /* INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_u_H */ +#ifndef INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H +#define INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H + +#include +#include + +#ifdef LV_HAVE_SSE4_1 +#include + + /*! + \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value + \param inputVector The 16 bit input data buffer + \param outputVector The floating point output data buffer + \param scalar The value divided against each point in the output buffer + \param num_points The number of data values to be converted + */ +static inline void volk_gnsssdr_16i_s32f_convert_32f_a_sse4_1(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int eighthPoints = num_points / 8; + + float* outputVectorPtr = outputVector; + __m128 invScalar = _mm_set_ps1(1.0/scalar); + int16_t* inputPtr = (int16_t*)inputVector; + __m128i inputVal; + __m128i inputVal2; + __m128 ret; + + for(;number < eighthPoints; number++){ + + // Load the 8 values + inputVal = _mm_loadu_si128((__m128i*)inputPtr); + + // Shift the input data to the right by 64 bits ( 8 bytes ) + inputVal2 = _mm_srli_si128(inputVal, 8); + + // Convert the lower 4 values into 32 bit words + inputVal = _mm_cvtepi16_epi32(inputVal); + inputVal2 = _mm_cvtepi16_epi32(inputVal2); + + ret = _mm_cvtepi32_ps(inputVal); + ret = _mm_mul_ps(ret, invScalar); + _mm_storeu_ps(outputVectorPtr, ret); + outputVectorPtr += 4; + + ret = _mm_cvtepi32_ps(inputVal2); + ret = _mm_mul_ps(ret, invScalar); + _mm_storeu_ps(outputVectorPtr, ret); + + outputVectorPtr += 4; + + inputPtr += 8; + } + + number = eighthPoints * 8; + for(; number < num_points; number++){ + outputVector[number] =((float)(inputVector[number])) / scalar; + } +} +#endif /* LV_HAVE_SSE4_1 */ + +#ifdef LV_HAVE_SSE +#include + + /*! + \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value + \param inputVector The 16 bit input data buffer + \param outputVector The floating point output data buffer + \param scalar The value divided against each point in the output buffer + \param num_points The number of data values to be converted + */ +static inline void volk_gnsssdr_16i_s32f_convert_32f_a_sse(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* outputVectorPtr = outputVector; + __m128 invScalar = _mm_set_ps1(1.0/scalar); + int16_t* inputPtr = (int16_t*)inputVector; + __m128 ret; + + for(;number < quarterPoints; number++){ + ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), (float)(inputPtr[1]), (float)(inputPtr[0])); + + ret = _mm_mul_ps(ret, invScalar); + _mm_storeu_ps(outputVectorPtr, ret); + + inputPtr += 4; + outputVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + outputVector[number] = (float)(inputVector[number]) / scalar; + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Converts the input 16 bit integer data into floating point data, and divides the each floating point output data point by the scalar value + \param inputVector The 16 bit input data buffer + \param outputVector The floating point output data buffer + \param scalar The value divided against each point in the output buffer + \param num_points The number of data values to be converted + */ +static inline void volk_gnsssdr_16i_s32f_convert_32f_a_generic(float* outputVector, const int16_t* inputVector, const float scalar, unsigned int num_points){ + float* outputVectorPtr = outputVector; + const int16_t* inputVectorPtr = inputVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar; + } +} +#endif /* LV_HAVE_GENERIC */ + + + + +#endif /* INCLUDED_volk_gnsssdr_16i_s32f_convert_32f_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_accumulator_s32f.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_accumulator_s32f.h new file mode 100644 index 000000000..82f1b3efd --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_accumulator_s32f.h @@ -0,0 +1,68 @@ +#ifndef INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H +#define INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE +#include +/*! + \brief Accumulates the values in the input buffer + \param result The accumulated result + \param inputBuffer The buffer of data to be accumulated + \param num_points The number of values in inputBuffer to be accumulated +*/ +static inline void volk_gnsssdr_32f_accumulator_s32f_a_sse(float* result, const float* inputBuffer, unsigned int num_points){ + float returnValue = 0; + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* aPtr = inputBuffer; + __VOLK_ATTR_ALIGNED(16) float tempBuffer[4]; + + __m128 accumulator = _mm_setzero_ps(); + __m128 aVal = _mm_setzero_ps(); + + for(;number < quarterPoints; number++){ + aVal = _mm_load_ps(aPtr); + accumulator = _mm_add_ps(accumulator, aVal); + aPtr += 4; + } + _mm_store_ps(tempBuffer,accumulator); // Store the results back into the C container + returnValue = tempBuffer[0]; + returnValue += tempBuffer[1]; + returnValue += tempBuffer[2]; + returnValue += tempBuffer[3]; + + number = quarterPoints * 4; + for(;number < num_points; number++){ + returnValue += (*aPtr++); + } + *result = returnValue; +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Accumulates the values in the input buffer + \param result The accumulated result + \param inputBuffer The buffer of data to be accumulated + \param num_points The number of values in inputBuffer to be accumulated +*/ +static inline void volk_gnsssdr_32f_accumulator_s32f_generic(float* result, const float* inputBuffer, unsigned int num_points){ + const float* aPtr = inputBuffer; + unsigned int number = 0; + float returnValue = 0; + + for(;number < num_points; number++){ + returnValue += (*aPtr++); + } + *result = returnValue; +} +#endif /* LV_HAVE_GENERIC */ + + + + +#endif /* INCLUDED_volk_gnsssdr_32f_accumulator_s32f_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_16u.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_16u.h new file mode 100644 index 000000000..c815609b2 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_16u.h @@ -0,0 +1,149 @@ +#ifndef INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H +#define INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE4_1 +#include + +static inline void volk_gnsssdr_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) { + if(num_points > 0){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* inputPtr = (float*)src0; + + __m128 indexIncrementValues = _mm_set1_ps(4); + __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4); + + float max = src0[0]; + float index = 0; + __m128 maxValues = _mm_set1_ps(max); + __m128 maxValuesIndex = _mm_setzero_ps(); + __m128 compareResults; + __m128 currentValues; + + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; + + for(;number < quarterPoints; number++){ + + currentValues = _mm_load_ps(inputPtr); inputPtr += 4; + currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues); + + compareResults = _mm_cmpgt_ps(maxValues, currentValues); + + maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults); + maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults); + } + + // Calculate the largest value from the remaining 4 points + _mm_store_ps(maxValuesBuffer, maxValues); + _mm_store_ps(maxIndexesBuffer, maxValuesIndex); + + for(number = 0; number < 4; number++){ + if(maxValuesBuffer[number] > max){ + index = maxIndexesBuffer[number]; + max = maxValuesBuffer[number]; + } + } + + number = quarterPoints * 4; + for(;number < num_points; number++){ + if(src0[number] > max){ + index = number; + max = src0[number]; + } + } + target[0] = (unsigned int)index; + } +} + +#endif /*LV_HAVE_SSE4_1*/ + +#ifdef LV_HAVE_SSE +#include + +static inline void volk_gnsssdr_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) { + if(num_points > 0){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* inputPtr = (float*)src0; + + __m128 indexIncrementValues = _mm_set1_ps(4); + __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4); + + float max = src0[0]; + float index = 0; + __m128 maxValues = _mm_set1_ps(max); + __m128 maxValuesIndex = _mm_setzero_ps(); + __m128 compareResults; + __m128 currentValues; + + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; + + for(;number < quarterPoints; number++){ + + currentValues = _mm_load_ps(inputPtr); inputPtr += 4; + currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues); + + compareResults = _mm_cmpgt_ps(maxValues, currentValues); + + maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes)); + + maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues)); + } + + // Calculate the largest value from the remaining 4 points + _mm_store_ps(maxValuesBuffer, maxValues); + _mm_store_ps(maxIndexesBuffer, maxValuesIndex); + + for(number = 0; number < 4; number++){ + if(maxValuesBuffer[number] > max){ + index = maxIndexesBuffer[number]; + max = maxValuesBuffer[number]; + } + } + + number = quarterPoints * 4; + for(;number < num_points; number++){ + if(src0[number] > max){ + index = number; + max = src0[number]; + } + } + target[0] = (unsigned int)index; + } +} + +#endif /*LV_HAVE_SSE*/ + +#ifdef LV_HAVE_GENERIC +static inline void volk_gnsssdr_32f_index_max_16u_generic(unsigned int* target, const float* src0, unsigned int num_points) { + if(num_points > 0){ + float max = src0[0]; + unsigned int index = 0; + + unsigned int i = 1; + + for(; i < num_points; ++i) { + + if(src0[i] > max){ + index = i; + max = src0[i]; + } + + } + target[0] = index; + } +} + +#endif /*LV_HAVE_GENERIC*/ + + +#endif /*INCLUDED_volk_gnsssdr_32f_index_max_16u_a_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_x2_add_32f.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_x2_add_32f.h new file mode 100644 index 000000000..ee647b2d7 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_x2_add_32f.h @@ -0,0 +1,147 @@ +#ifndef INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H +#define INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H + +#include +#include + +#ifdef LV_HAVE_SSE +#include +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector +*/ +static inline void volk_gnsssdr_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + + __m128 aVal, bVal, cVal; + for(;number < quarterPoints; number++){ + + aVal = _mm_loadu_ps(aPtr); + bVal = _mm_loadu_ps(bPtr); + + cVal = _mm_add_ps(aVal, bVal); + + _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 4; + bPtr += 4; + cPtr += 4; + } + + number = quarterPoints * 4; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) + (*bPtr++); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector +*/ +static inline void volk_gnsssdr_32f_x2_add_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) + (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_32f_x2_add_32f_u_H */ +#ifndef INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H +#define INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H + +#include +#include + +#ifdef LV_HAVE_SSE +#include +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector +*/ +static inline void volk_gnsssdr_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + + __m128 aVal, bVal, cVal; + for(;number < quarterPoints; number++){ + + aVal = _mm_load_ps(aPtr); + bVal = _mm_load_ps(bPtr); + + cVal = _mm_add_ps(aVal, bVal); + + _mm_store_ps(cPtr,cVal); // Store the results back into the C container + + aPtr += 4; + bPtr += 4; + cPtr += 4; + } + + number = quarterPoints * 4; + for(;number < num_points; number++){ + *cPtr++ = (*aPtr++) + (*bPtr++); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector +*/ +static inline void volk_gnsssdr_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + float* cPtr = cVector; + const float* aPtr = aVector; + const float* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) + (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector +*/ +extern void volk_gnsssdr_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); +static inline void volk_gnsssdr_32f_x2_add_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ + volk_gnsssdr_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + + +#endif /* INCLUDED_volk_gnsssdr_32f_x2_add_32f_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_conjugate_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_conjugate_32fc.h new file mode 100644 index 000000000..a3b8848aa --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_conjugate_32fc.h @@ -0,0 +1,127 @@ +#ifndef INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H +#define INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Takes the conjugate of a complex vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of complex values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + + __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); + + for(;number < halfPoints; number++){ + + x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi + + x = _mm_xor_ps(x, conjugator); // conjugate register + + _mm_storeu_ps((float*)c,x); // Store the results back into the C container + + a += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = lv_conj(*a); + } +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Takes the conjugate of a complex vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of complex values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_32fc_conjugate_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = lv_conj(*aPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_u_H */ +#ifndef INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H +#define INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Takes the conjugate of a complex vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of complex values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + + __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); + + for(;number < halfPoints; number++){ + + x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi + + x = _mm_xor_ps(x, conjugator); // conjugate register + + _mm_store_ps((float*)c,x); // Store the results back into the C container + + a += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = lv_conj(*a); + } +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Takes the conjugate of a complex vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of complex values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = lv_conj(*aPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_32fc_conjugate_32fc_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_magnitude_squared_32f.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_magnitude_squared_32f.h new file mode 100644 index 000000000..ce28f866e --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_magnitude_squared_32f.h @@ -0,0 +1,228 @@ +#ifndef INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_u_H +#define INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_u_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_gnsssdr_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + __m128 cplxValue1, cplxValue2, result; + for(;number < quarterPoints; number++){ + cplxValue1 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values + cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values + + result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values + + _mm_storeu_ps(magnitudeVectorPtr, result); + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + float val1Real = *complexVectorPtr++; + float val1Imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); + } +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_SSE +#include + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_gnsssdr_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + __m128 cplxValue1, cplxValue2, iValue, qValue, result; + for(;number < quarterPoints; number++){ + cplxValue1 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_loadu_ps(complexVectorPtr); + complexVectorPtr += 4; + + // Arrange in i1i2i3i4 format + iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0)); + // Arrange in q1q2q3q4 format + qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1)); + + iValue = _mm_mul_ps(iValue, iValue); // Square the I values + qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values + + result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values + + _mm_storeu_ps(magnitudeVectorPtr, result); + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + float val1Real = *complexVectorPtr++; + float val1Imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_gnsssdr_32fc_magnitude_squared_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + unsigned int number = 0; + for(number = 0; number < num_points; number++){ + const float real = *complexVectorPtr++; + const float imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (real*real) + (imag*imag); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */ +#ifndef INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_a_H +#define INCLUDED_volk_gnsssdr_32fc_magnitude_squared_32f_a_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + __m128 cplxValue1, cplxValue2, result; + for(;number < quarterPoints; number++){ + cplxValue1 = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values + cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values + + result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values + + _mm_store_ps(magnitudeVectorPtr, result); + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + float val1Real = *complexVectorPtr++; + float val1Imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); + } +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_SSE +#include + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int quarterPoints = num_points / 4; + + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + + __m128 cplxValue1, cplxValue2, iValue, qValue, result; + for(;number < quarterPoints; number++){ + cplxValue1 = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + cplxValue2 = _mm_load_ps(complexVectorPtr); + complexVectorPtr += 4; + + // Arrange in i1i2i3i4 format + iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0)); + // Arrange in q1q2q3q4 format + qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1)); + + iValue = _mm_mul_ps(iValue, iValue); // Square the I values + qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values + + result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values + + _mm_store_ps(magnitudeVectorPtr, result); + magnitudeVectorPtr += 4; + } + + number = quarterPoints * 4; + for(; number < num_points; number++){ + float val1Real = *complexVectorPtr++; + float val1Imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Calculates the magnitude squared of the complexVector and stores the results in the magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_gnsssdr_32fc_magnitude_squared_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ + const float* complexVectorPtr = (float*)complexVector; + float* magnitudeVectorPtr = magnitudeVector; + unsigned int number = 0; + for(number = 0; number < num_points; number++){ + const float real = *complexVectorPtr++; + const float imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (real*real) + (imag*imag); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32fc_multiply_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32fc_multiply_32fc.h new file mode 100644 index 000000000..d5135d89f --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_s32fc_multiply_32fc.h @@ -0,0 +1,178 @@ +#ifndef INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_u_H +#define INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_u_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +/*! + \brief Multiplies the input vector by a scalar and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x, yl, yh, z, tmp1, tmp2; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + + // Set up constant scalar vector + yl = _mm_set_ps1(lv_creal(scalar)); + yh = _mm_set_ps1(lv_cimag(scalar)); + + for(;number < halfPoints; number++){ + + x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + _mm_storeu_ps((float*)c,z); // Store the results back into the C container + + a += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = (*a) * scalar; + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Multiplies the input vector by a scalar and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector +*/ +static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + unsigned int number = num_points; + + // unwrap loop + while (number >= 8){ + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + number -= 8; + } + + // clean up any remaining + while (number-- > 0) + *cPtr++ = *aPtr++ * scalar; +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */ +#ifndef INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_a_H +#define INCLUDED_volk_gnsssdr_32fc_s32fc_multiply_32fc_a_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x, yl, yh, z, tmp1, tmp2; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + + // Set up constant scalar vector + yl = _mm_set_ps1(lv_creal(scalar)); + yh = _mm_set_ps1(lv_cimag(scalar)); + + for(;number < halfPoints; number++){ + + x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + _mm_store_ps((float*)c,z); // Store the results back into the C container + + a += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = (*a) * scalar; + } +} +#endif /* LV_HAVE_SSE */ + + +#ifdef LV_HAVE_GENERIC + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + unsigned int number = num_points; + + // unwrap loop + while (number >= 8){ + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + number -= 8; + } + + // clean up any remaining + while (number-- > 0) + *cPtr++ = *aPtr++ * scalar; +} +#endif /* LV_HAVE_GENERIC */ + + + + + +#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_dot_prod_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_dot_prod_32fc.h new file mode 100644 index 000000000..08a10aa6e --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_dot_prod_32fc.h @@ -0,0 +1,763 @@ +#ifndef INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H +#define INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H + +#include +#include +#include +#include + + +#ifdef LV_HAVE_GENERIC + + +static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) { + + float * res = (float*) result; + float * in = (float*) input; + float * tp = (float*) taps; + unsigned int n_2_ccomplex_blocks = num_points/2; + unsigned int isodd = num_points & 1; + + float sum0[2] = {0,0}; + float sum1[2] = {0,0}; + unsigned int i = 0; + + for(i = 0; i < n_2_ccomplex_blocks; ++i) { + sum0[0] += in[0] * tp[0] - in[1] * tp[1]; + sum0[1] += in[0] * tp[1] + in[1] * tp[0]; + sum1[0] += in[2] * tp[2] - in[3] * tp[3]; + sum1[1] += in[2] * tp[3] + in[3] * tp[2]; + + in += 4; + tp += 4; + } + + res[0] = sum0[0] + sum1[0]; + res[1] = sum0[1] + sum1[1]; + + // Cleanup if we had an odd number of points + for(i = 0; i < isodd; ++i) { + *result += input[num_points - 1] * taps[num_points - 1]; + } +} + +#endif /*LV_HAVE_GENERIC*/ + + + +#if LV_HAVE_SSE && LV_HAVE_64 + +static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) { + + const unsigned int num_bytes = num_points*8; + unsigned int isodd = num_points & 1; + + asm + ( + "# ccomplex_dotprod_generic (float* result, const float *input,\n\t" + "# const float *taps, unsigned num_bytes)\n\t" + "# float sum0 = 0;\n\t" + "# float sum1 = 0;\n\t" + "# float sum2 = 0;\n\t" + "# float sum3 = 0;\n\t" + "# do {\n\t" + "# sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t" + "# sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t" + "# sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t" + "# sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t" + "# input += 4;\n\t" + "# taps += 4; \n\t" + "# } while (--n_2_ccomplex_blocks != 0);\n\t" + "# result[0] = sum0 + sum2;\n\t" + "# result[1] = sum1 + sum3;\n\t" + "# TODO: prefetch and better scheduling\n\t" + " xor %%r9, %%r9\n\t" + " xor %%r10, %%r10\n\t" + " movq %%rcx, %%rax\n\t" + " movq %%rcx, %%r8\n\t" + " movq %[rsi], %%r9\n\t" + " movq %[rdx], %%r10\n\t" + " xorps %%xmm6, %%xmm6 # zero accumulators\n\t" + " movups 0(%%r9), %%xmm0\n\t" + " xorps %%xmm7, %%xmm7 # zero accumulators\n\t" + " movups 0(%%r10), %%xmm2\n\t" + " shr $5, %%rax # rax = n_2_ccomplex_blocks / 2\n\t" + " shr $4, %%r8\n\t" + " jmp .%=L1_test\n\t" + " # 4 taps / loop\n\t" + " # something like ?? cycles / loop\n\t" + ".%=Loop1: \n\t" + "# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t" + "# movups (%%r9), %%xmmA\n\t" + "# movups (%%r10), %%xmmB\n\t" + "# movups %%xmmA, %%xmmZ\n\t" + "# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t" + "# mulps %%xmmB, %%xmmA\n\t" + "# mulps %%xmmZ, %%xmmB\n\t" + "# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t" + "# xorps %%xmmPN, %%xmmA\n\t" + "# movups %%xmmA, %%xmmZ\n\t" + "# unpcklps %%xmmB, %%xmmA\n\t" + "# unpckhps %%xmmB, %%xmmZ\n\t" + "# movups %%xmmZ, %%xmmY\n\t" + "# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t" + "# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t" + "# addps %%xmmZ, %%xmmA\n\t" + "# addps %%xmmA, %%xmmC\n\t" + "# A=xmm0, B=xmm2, Z=xmm4\n\t" + "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t" + " movups 16(%%r9), %%xmm1\n\t" + " movups %%xmm0, %%xmm4\n\t" + " mulps %%xmm2, %%xmm0\n\t" + " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t" + " movups 16(%%r10), %%xmm3\n\t" + " movups %%xmm1, %%xmm5\n\t" + " addps %%xmm0, %%xmm6\n\t" + " mulps %%xmm3, %%xmm1\n\t" + " shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t" + " addps %%xmm1, %%xmm6\n\t" + " mulps %%xmm4, %%xmm2\n\t" + " movups 32(%%r9), %%xmm0\n\t" + " addps %%xmm2, %%xmm7\n\t" + " mulps %%xmm5, %%xmm3\n\t" + " add $32, %%r9\n\t" + " movups 32(%%r10), %%xmm2\n\t" + " addps %%xmm3, %%xmm7\n\t" + " add $32, %%r10\n\t" + ".%=L1_test:\n\t" + " dec %%rax\n\t" + " jge .%=Loop1\n\t" + " # We've handled the bulk of multiplies up to here.\n\t" + " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t" + " # If so, we've got 2 more taps to do.\n\t" + " and $1, %%r8\n\t" + " je .%=Leven\n\t" + " # The count was odd, do 2 more taps.\n\t" + " # Note that we've already got mm0/mm2 preloaded\n\t" + " # from the main loop.\n\t" + " movups %%xmm0, %%xmm4\n\t" + " mulps %%xmm2, %%xmm0\n\t" + " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t" + " addps %%xmm0, %%xmm6\n\t" + " mulps %%xmm4, %%xmm2\n\t" + " addps %%xmm2, %%xmm7\n\t" + ".%=Leven:\n\t" + " # neg inversor\n\t" + " xorps %%xmm1, %%xmm1\n\t" + " mov $0x80000000, %%r9\n\t" + " movd %%r9, %%xmm1\n\t" + " shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t" + " # pfpnacc\n\t" + " xorps %%xmm1, %%xmm6\n\t" + " movups %%xmm6, %%xmm2\n\t" + " unpcklps %%xmm7, %%xmm6\n\t" + " unpckhps %%xmm7, %%xmm2\n\t" + " movups %%xmm2, %%xmm3\n\t" + " shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t" + " shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t" + " addps %%xmm2, %%xmm6\n\t" + " # xmm6 = r1 i2 r3 i4\n\t" + " movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t" + " addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t" + " movlps %%xmm6, (%[rdi]) # store low 2x32 bits (complex) to memory\n\t" + : + :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result) + :"rax", "r8", "r9", "r10" + ); + + + if(isodd) { + *result += input[num_points - 1] * taps[num_points - 1]; + } + + return; + +} + +#endif /* LV_HAVE_SSE && LV_HAVE_64 */ + + + + +#ifdef LV_HAVE_SSE3 + +#include + +static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) { + + lv_32fc_t dotProduct; + memset(&dotProduct, 0x0, 2*sizeof(float)); + + unsigned int number = 0; + const unsigned int halfPoints = num_points/2; + unsigned int isodd = num_points & 1; + + __m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal; + + const lv_32fc_t* a = input; + const lv_32fc_t* b = taps; + + dotProdVal = _mm_setzero_ps(); + + for(;number < halfPoints; number++){ + + x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together + + a += 2; + b += 2; + } + + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2]; + + _mm_storeu_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector + + dotProduct += ( dotProductVector[0] + dotProductVector[1] ); + + if(isodd) { + dotProduct += input[num_points - 1] * taps[num_points - 1]; + } + + *result = dotProduct; +} + +#endif /*LV_HAVE_SSE3*/ + +#ifdef LV_HAVE_SSE4_1 + +#include + +static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_u_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) { + + unsigned int i = 0; + const unsigned int qtr_points = num_points/4; + const unsigned int isodd = num_points & 3; + + __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1; + float *p_input, *p_taps; + __m64 *p_result; + + p_result = (__m64*)result; + p_input = (float*)input; + p_taps = (float*)taps; + + static const __m128i neg = {0x000000000000000080000000}; + + real0 = _mm_setzero_ps(); + real1 = _mm_setzero_ps(); + im0 = _mm_setzero_ps(); + im1 = _mm_setzero_ps(); + + for(; i < qtr_points; ++i) { + xmm0 = _mm_loadu_ps(p_input); + xmm1 = _mm_loadu_ps(p_taps); + + p_input += 4; + p_taps += 4; + + xmm2 = _mm_loadu_ps(p_input); + xmm3 = _mm_loadu_ps(p_taps); + + p_input += 4; + p_taps += 4; + + xmm4 = _mm_unpackhi_ps(xmm0, xmm2); + xmm5 = _mm_unpackhi_ps(xmm1, xmm3); + xmm0 = _mm_unpacklo_ps(xmm0, xmm2); + xmm2 = _mm_unpacklo_ps(xmm1, xmm3); + + //imaginary vector from input + xmm1 = _mm_unpackhi_ps(xmm0, xmm4); + //real vector from input + xmm3 = _mm_unpacklo_ps(xmm0, xmm4); + //imaginary vector from taps + xmm0 = _mm_unpackhi_ps(xmm2, xmm5); + //real vector from taps + xmm2 = _mm_unpacklo_ps(xmm2, xmm5); + + xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1); + xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1); + + xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2); + xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2); + + real0 = _mm_add_ps(xmm4, real0); + real1 = _mm_add_ps(xmm5, real1); + im0 = _mm_add_ps(xmm6, im0); + im1 = _mm_add_ps(xmm7, im1); + } + + real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec); + + im0 = _mm_add_ps(im0, im1); + real0 = _mm_add_ps(real0, real1); + + im0 = _mm_add_ps(im0, real0); + + _mm_storel_pi(p_result, im0); + + for(i = num_points-isodd; i < num_points; i++) { + *result += input[i] * taps[i]; + } +} + +#endif /*LV_HAVE_SSE4_1*/ + + + + +#endif /*INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_u_H*/ +#ifndef INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H +#define INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H + +#include +#include +#include +#include + + +#ifdef LV_HAVE_GENERIC + + +static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) { + + const unsigned int num_bytes = num_points*8; + + float * res = (float*) result; + float * in = (float*) input; + float * tp = (float*) taps; + unsigned int n_2_ccomplex_blocks = num_bytes >> 4; + unsigned int isodd = num_points & 1; + + float sum0[2] = {0,0}; + float sum1[2] = {0,0}; + unsigned int i = 0; + + for(i = 0; i < n_2_ccomplex_blocks; ++i) { + sum0[0] += in[0] * tp[0] - in[1] * tp[1]; + sum0[1] += in[0] * tp[1] + in[1] * tp[0]; + sum1[0] += in[2] * tp[2] - in[3] * tp[3]; + sum1[1] += in[2] * tp[3] + in[3] * tp[2]; + + in += 4; + tp += 4; + } + + res[0] = sum0[0] + sum1[0]; + res[1] = sum0[1] + sum1[1]; + + for(i = 0; i < isodd; ++i) { + *result += input[num_points - 1] * taps[num_points - 1]; + } +} + +#endif /*LV_HAVE_GENERIC*/ + + +#if LV_HAVE_SSE && LV_HAVE_64 + + +static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) { + + const unsigned int num_bytes = num_points*8; + unsigned int isodd = num_points & 1; + + asm + ( + "# ccomplex_dotprod_generic (float* result, const float *input,\n\t" + "# const float *taps, unsigned num_bytes)\n\t" + "# float sum0 = 0;\n\t" + "# float sum1 = 0;\n\t" + "# float sum2 = 0;\n\t" + "# float sum3 = 0;\n\t" + "# do {\n\t" + "# sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t" + "# sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t" + "# sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t" + "# sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t" + "# input += 4;\n\t" + "# taps += 4; \n\t" + "# } while (--n_2_ccomplex_blocks != 0);\n\t" + "# result[0] = sum0 + sum2;\n\t" + "# result[1] = sum1 + sum3;\n\t" + "# TODO: prefetch and better scheduling\n\t" + " xor %%r9, %%r9\n\t" + " xor %%r10, %%r10\n\t" + " movq %%rcx, %%rax\n\t" + " movq %%rcx, %%r8\n\t" + " movq %[rsi], %%r9\n\t" + " movq %[rdx], %%r10\n\t" + " xorps %%xmm6, %%xmm6 # zero accumulators\n\t" + " movaps 0(%%r9), %%xmm0\n\t" + " xorps %%xmm7, %%xmm7 # zero accumulators\n\t" + " movaps 0(%%r10), %%xmm2\n\t" + " shr $5, %%rax # rax = n_2_ccomplex_blocks / 2\n\t" + " shr $4, %%r8\n\t" + " jmp .%=L1_test\n\t" + " # 4 taps / loop\n\t" + " # something like ?? cycles / loop\n\t" + ".%=Loop1: \n\t" + "# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t" + "# movaps (%%r9), %%xmmA\n\t" + "# movaps (%%r10), %%xmmB\n\t" + "# movaps %%xmmA, %%xmmZ\n\t" + "# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t" + "# mulps %%xmmB, %%xmmA\n\t" + "# mulps %%xmmZ, %%xmmB\n\t" + "# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t" + "# xorps %%xmmPN, %%xmmA\n\t" + "# movaps %%xmmA, %%xmmZ\n\t" + "# unpcklps %%xmmB, %%xmmA\n\t" + "# unpckhps %%xmmB, %%xmmZ\n\t" + "# movaps %%xmmZ, %%xmmY\n\t" + "# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t" + "# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t" + "# addps %%xmmZ, %%xmmA\n\t" + "# addps %%xmmA, %%xmmC\n\t" + "# A=xmm0, B=xmm2, Z=xmm4\n\t" + "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t" + " movaps 16(%%r9), %%xmm1\n\t" + " movaps %%xmm0, %%xmm4\n\t" + " mulps %%xmm2, %%xmm0\n\t" + " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t" + " movaps 16(%%r10), %%xmm3\n\t" + " movaps %%xmm1, %%xmm5\n\t" + " addps %%xmm0, %%xmm6\n\t" + " mulps %%xmm3, %%xmm1\n\t" + " shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t" + " addps %%xmm1, %%xmm6\n\t" + " mulps %%xmm4, %%xmm2\n\t" + " movaps 32(%%r9), %%xmm0\n\t" + " addps %%xmm2, %%xmm7\n\t" + " mulps %%xmm5, %%xmm3\n\t" + " add $32, %%r9\n\t" + " movaps 32(%%r10), %%xmm2\n\t" + " addps %%xmm3, %%xmm7\n\t" + " add $32, %%r10\n\t" + ".%=L1_test:\n\t" + " dec %%rax\n\t" + " jge .%=Loop1\n\t" + " # We've handled the bulk of multiplies up to here.\n\t" + " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t" + " # If so, we've got 2 more taps to do.\n\t" + " and $1, %%r8\n\t" + " je .%=Leven\n\t" + " # The count was odd, do 2 more taps.\n\t" + " # Note that we've already got mm0/mm2 preloaded\n\t" + " # from the main loop.\n\t" + " movaps %%xmm0, %%xmm4\n\t" + " mulps %%xmm2, %%xmm0\n\t" + " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t" + " addps %%xmm0, %%xmm6\n\t" + " mulps %%xmm4, %%xmm2\n\t" + " addps %%xmm2, %%xmm7\n\t" + ".%=Leven:\n\t" + " # neg inversor\n\t" + " xorps %%xmm1, %%xmm1\n\t" + " mov $0x80000000, %%r9\n\t" + " movd %%r9, %%xmm1\n\t" + " shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t" + " # pfpnacc\n\t" + " xorps %%xmm1, %%xmm6\n\t" + " movaps %%xmm6, %%xmm2\n\t" + " unpcklps %%xmm7, %%xmm6\n\t" + " unpckhps %%xmm7, %%xmm2\n\t" + " movaps %%xmm2, %%xmm3\n\t" + " shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t" + " shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t" + " addps %%xmm2, %%xmm6\n\t" + " # xmm6 = r1 i2 r3 i4\n\t" + " movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t" + " addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t" + " movlps %%xmm6, (%[rdi]) # store low 2x32 bits (complex) to memory\n\t" + : + :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result) + :"rax", "r8", "r9", "r10" + ); + + + if(isodd) { + *result += input[num_points - 1] * taps[num_points - 1]; + } + + return; + +} + +#endif + +#if LV_HAVE_SSE && LV_HAVE_32 + +static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) { + + volk_gnsssdr_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_points); + +#if 0 + const unsigned int num_bytes = num_points*8; + unsigned int isodd = num_points & 1; + + asm volatile + ( + " #pushl %%ebp\n\t" + " #movl %%esp, %%ebp\n\t" + " movl 12(%%ebp), %%eax # input\n\t" + " movl 16(%%ebp), %%edx # taps\n\t" + " movl 20(%%ebp), %%ecx # n_bytes\n\t" + " xorps %%xmm6, %%xmm6 # zero accumulators\n\t" + " movaps 0(%%eax), %%xmm0\n\t" + " xorps %%xmm7, %%xmm7 # zero accumulators\n\t" + " movaps 0(%%edx), %%xmm2\n\t" + " shrl $5, %%ecx # ecx = n_2_ccomplex_blocks / 2\n\t" + " jmp .%=L1_test\n\t" + " # 4 taps / loop\n\t" + " # something like ?? cycles / loop\n\t" + ".%=Loop1: \n\t" + "# complex prod: C += A * B, w/ temp Z & Y (or B), xmmPN=$0x8000000080000000\n\t" + "# movaps (%%eax), %%xmmA\n\t" + "# movaps (%%edx), %%xmmB\n\t" + "# movaps %%xmmA, %%xmmZ\n\t" + "# shufps $0xb1, %%xmmZ, %%xmmZ # swap internals\n\t" + "# mulps %%xmmB, %%xmmA\n\t" + "# mulps %%xmmZ, %%xmmB\n\t" + "# # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t" + "# xorps %%xmmPN, %%xmmA\n\t" + "# movaps %%xmmA, %%xmmZ\n\t" + "# unpcklps %%xmmB, %%xmmA\n\t" + "# unpckhps %%xmmB, %%xmmZ\n\t" + "# movaps %%xmmZ, %%xmmY\n\t" + "# shufps $0x44, %%xmmA, %%xmmZ # b01000100\n\t" + "# shufps $0xee, %%xmmY, %%xmmA # b11101110\n\t" + "# addps %%xmmZ, %%xmmA\n\t" + "# addps %%xmmA, %%xmmC\n\t" + "# A=xmm0, B=xmm2, Z=xmm4\n\t" + "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t" + " movaps 16(%%eax), %%xmm1\n\t" + " movaps %%xmm0, %%xmm4\n\t" + " mulps %%xmm2, %%xmm0\n\t" + " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t" + " movaps 16(%%edx), %%xmm3\n\t" + " movaps %%xmm1, %%xmm5\n\t" + " addps %%xmm0, %%xmm6\n\t" + " mulps %%xmm3, %%xmm1\n\t" + " shufps $0xb1, %%xmm5, %%xmm5 # swap internals\n\t" + " addps %%xmm1, %%xmm6\n\t" + " mulps %%xmm4, %%xmm2\n\t" + " movaps 32(%%eax), %%xmm0\n\t" + " addps %%xmm2, %%xmm7\n\t" + " mulps %%xmm5, %%xmm3\n\t" + " addl $32, %%eax\n\t" + " movaps 32(%%edx), %%xmm2\n\t" + " addps %%xmm3, %%xmm7\n\t" + " addl $32, %%edx\n\t" + ".%=L1_test:\n\t" + " decl %%ecx\n\t" + " jge .%=Loop1\n\t" + " # We've handled the bulk of multiplies up to here.\n\t" + " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t" + " # If so, we've got 2 more taps to do.\n\t" + " movl 20(%%ebp), %%ecx # n_2_ccomplex_blocks\n\t" + " shrl $4, %%ecx\n\t" + " andl $1, %%ecx\n\t" + " je .%=Leven\n\t" + " # The count was odd, do 2 more taps.\n\t" + " # Note that we've already got mm0/mm2 preloaded\n\t" + " # from the main loop.\n\t" + " movaps %%xmm0, %%xmm4\n\t" + " mulps %%xmm2, %%xmm0\n\t" + " shufps $0xb1, %%xmm4, %%xmm4 # swap internals\n\t" + " addps %%xmm0, %%xmm6\n\t" + " mulps %%xmm4, %%xmm2\n\t" + " addps %%xmm2, %%xmm7\n\t" + ".%=Leven:\n\t" + " # neg inversor\n\t" + " movl 8(%%ebp), %%eax \n\t" + " xorps %%xmm1, %%xmm1\n\t" + " movl $0x80000000, (%%eax)\n\t" + " movss (%%eax), %%xmm1\n\t" + " shufps $0x11, %%xmm1, %%xmm1 # b00010001 # 0 -0 0 -0\n\t" + " # pfpnacc\n\t" + " xorps %%xmm1, %%xmm6\n\t" + " movaps %%xmm6, %%xmm2\n\t" + " unpcklps %%xmm7, %%xmm6\n\t" + " unpckhps %%xmm7, %%xmm2\n\t" + " movaps %%xmm2, %%xmm3\n\t" + " shufps $0x44, %%xmm6, %%xmm2 # b01000100\n\t" + " shufps $0xee, %%xmm3, %%xmm6 # b11101110\n\t" + " addps %%xmm2, %%xmm6\n\t" + " # xmm6 = r1 i2 r3 i4\n\t" + " #movl 8(%%ebp), %%eax # @result\n\t" + " movhlps %%xmm6, %%xmm4 # xmm4 = r3 i4 ?? ??\n\t" + " addps %%xmm4, %%xmm6 # xmm6 = r1+r3 i2+i4 ?? ??\n\t" + " movlps %%xmm6, (%%eax) # store low 2x32 bits (complex) to memory\n\t" + " #popl %%ebp\n\t" + : + : + : "eax", "ecx", "edx" + ); + + + int getem = num_bytes % 16; + + if(isodd) { + *result += (input[num_points - 1] * taps[num_points - 1]); + } + + return; +#endif +} + +#endif /*LV_HAVE_SSE*/ + +#ifdef LV_HAVE_SSE3 + +#include + +static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) { + + const unsigned int num_bytes = num_points*8; + unsigned int isodd = num_points & 1; + + lv_32fc_t dotProduct; + memset(&dotProduct, 0x0, 2*sizeof(float)); + + unsigned int number = 0; + const unsigned int halfPoints = num_bytes >> 4; + + __m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal; + + const lv_32fc_t* a = input; + const lv_32fc_t* b = taps; + + dotProdVal = _mm_setzero_ps(); + + for(;number < halfPoints; number++){ + + x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication results together + + a += 2; + b += 2; + } + + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2]; + + _mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back into the dot product vector + + dotProduct += ( dotProductVector[0] + dotProductVector[1] ); + + if(isodd) { + dotProduct += input[num_points - 1] * taps[num_points - 1]; + } + + *result = dotProduct; +} + +#endif /*LV_HAVE_SSE3*/ + +#ifdef LV_HAVE_SSE4_1 + +#include + +static inline void volk_gnsssdr_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) { + + unsigned int i = 0; + const unsigned int qtr_points = num_points/4; + const unsigned int isodd = num_points & 3; + + __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, im1; + float *p_input, *p_taps; + __m64 *p_result; + + static const __m128i neg = {0x000000000000000080000000}; + + p_result = (__m64*)result; + p_input = (float*)input; + p_taps = (float*)taps; + + real0 = _mm_setzero_ps(); + real1 = _mm_setzero_ps(); + im0 = _mm_setzero_ps(); + im1 = _mm_setzero_ps(); + + for(; i < qtr_points; ++i) { + xmm0 = _mm_load_ps(p_input); + xmm1 = _mm_load_ps(p_taps); + + p_input += 4; + p_taps += 4; + + xmm2 = _mm_load_ps(p_input); + xmm3 = _mm_load_ps(p_taps); + + p_input += 4; + p_taps += 4; + + xmm4 = _mm_unpackhi_ps(xmm0, xmm2); + xmm5 = _mm_unpackhi_ps(xmm1, xmm3); + xmm0 = _mm_unpacklo_ps(xmm0, xmm2); + xmm2 = _mm_unpacklo_ps(xmm1, xmm3); + + //imaginary vector from input + xmm1 = _mm_unpackhi_ps(xmm0, xmm4); + //real vector from input + xmm3 = _mm_unpacklo_ps(xmm0, xmm4); + //imaginary vector from taps + xmm0 = _mm_unpackhi_ps(xmm2, xmm5); + //real vector from taps + xmm2 = _mm_unpacklo_ps(xmm2, xmm5); + + xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1); + xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1); + + xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2); + xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2); + + real0 = _mm_add_ps(xmm4, real0); + real1 = _mm_add_ps(xmm5, real1); + im0 = _mm_add_ps(xmm6, im0); + im1 = _mm_add_ps(xmm7, im1); + } + + real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec); + + im0 = _mm_add_ps(im0, im1); + real0 = _mm_add_ps(real0, real1); + + im0 = _mm_add_ps(im0, real0); + + _mm_storel_pi(p_result, im0); + + for(i = num_points-isodd; i < num_points; i++) { + *result += input[i] * taps[i]; + } +} + +#endif /*LV_HAVE_SSE4_1*/ + +#endif /*INCLUDED_volk_gnsssdr_32fc_x2_dot_prod_32fc_a_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_multiply_32fc.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_multiply_32fc.h new file mode 100644 index 000000000..e2b17c401 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x2_multiply_32fc.h @@ -0,0 +1,170 @@ +#ifndef INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H +#define INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x, y, yl, yh, z, tmp1, tmp2; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + const lv_32fc_t* b = bVector; + + for(;number < halfPoints; number++){ + + x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + _mm_storeu_ps((float*)c,z); // Store the results back into the C container + + a += 2; + b += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = (*a) * (*b); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_32fc_x2_multiply_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + const lv_32fc_t* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + + +#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */ +#ifndef INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H +#define INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + __m128 x, y, yl, yh, z, tmp1, tmp2; + lv_32fc_t* c = cVector; + const lv_32fc_t* a = aVector; + const lv_32fc_t* b = bVector; + for(;number < halfPoints; number++){ + + x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + _mm_store_ps((float*)c,z); // Store the results back into the C container + + a += 2; + b += 2; + c += 2; + } + + if((num_points % 2) != 0) { + *c = (*a) * (*b); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + lv_32fc_t* cPtr = cVector; + const lv_32fc_t* aPtr = aVector; + const lv_32fc_t* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC + /*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +extern void volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points); +static inline void volk_gnsssdr_32fc_x2_multiply_32fc_u_orc(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ + volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + + + + + +#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3.h new file mode 100644 index 000000000..7e05be9cf --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3.h @@ -0,0 +1,409 @@ +#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H +#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H + +#include +#include +#include +#include +#include + +/*! + * TODO: Code the SSE4 version and benchmark it + */ +#ifdef LV_HAVE_SSE3 +#include + + + /*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points) +{ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + lv_32fc_t dotProduct_E; + memset(&dotProduct_E, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_P; + memset(&dotProduct_P, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_L; + memset(&dotProduct_L, 0x0, 2*sizeof(float)); + + // Aux vars + __m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L; + + z_E = _mm_setzero_ps(); + z_P = _mm_setzero_ps(); + z_L = _mm_setzero_ps(); + + //input and output vectors + //lv_32fc_t* _input_BB = input_BB; + const lv_32fc_t* _input = input; + const lv_32fc_t* _carrier = carrier; + const lv_32fc_t* _E_code = E_code; + const lv_32fc_t* _P_code = P_code; + const lv_32fc_t* _L_code = L_code; + + for(;number < halfPoints; number++) + { + // carrier wipe-off (vector point-to-point product) + x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + //_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container + + // correlation E,P,L (3x vector scalar product) + // Early + //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi + x = z; + + y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together + + // Prompt + //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together + + // Late + //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together + + /*pointer increment*/ + _carrier += 2; + _input += 2; + //_input_BB += 2; + _E_code += 2; + _P_code += 2; + _L_code +=2; + } + + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2]; + //__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB; + + _mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector + _mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector + _mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector + + dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] ); + dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] ); + dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] ); + + if((num_points % 2) != 0) + { + //_input_BB = (*_input) * (*_carrier); + dotProduct_E += (*_input) * (*_E_code)*(*_carrier); + dotProduct_P += (*_input) * (*_P_code)*(*_carrier); + dotProduct_L += (*_input) * (*_L_code)*(*_carrier); + } + + *E_out = dotProduct_E; + *P_out = dotProduct_P; + *L_out = dotProduct_L; +} + +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points) +{ + lv_32fc_t bb_signal_sample; + + bb_signal_sample = lv_cmake(0, 0); + + *E_out = 0; + *P_out = 0; + *L_out = 0; + // perform Early, Prompt and Late correlation + for(int i=0; i < num_points; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = input[i] * carrier[i]; + // Now get early, late, and prompt values for each + *E_out += bb_signal_sample * E_code[i]; + *P_out += bb_signal_sample * P_code[i]; + *L_out += bb_signal_sample * L_code[i]; + } +} + +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_u_H */ + + +#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H +#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H + +#include +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +/*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_sse3(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points) +{ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + lv_32fc_t dotProduct_E; + memset(&dotProduct_E, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_P; + memset(&dotProduct_P, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_L; + memset(&dotProduct_L, 0x0, 2*sizeof(float)); + + // Aux vars + __m128 x, y, yl, yh, z, tmp1, tmp2, z_E, z_P, z_L; + + z_E = _mm_setzero_ps(); + z_P = _mm_setzero_ps(); + z_L = _mm_setzero_ps(); + + //input and output vectors + //lv_32fc_t* _input_BB = input_BB; + const lv_32fc_t* _input = input; + const lv_32fc_t* _carrier = carrier; + const lv_32fc_t* _E_code = E_code; + const lv_32fc_t* _P_code = P_code; + const lv_32fc_t* _L_code = L_code; + + for(;number < halfPoints; number++) + { + // carrier wipe-off (vector point-to-point product) + x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + //_mm_storeu_ps((float*)_input_BB,z); // Store the results back into the _input_BB container + + // correlation E,P,L (3x vector scalar product) + // Early + //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi + x = z; + + y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together + + // Prompt + //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together + + // Late + //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + + z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together + + /*pointer increment*/ + _carrier += 2; + _input += 2; + //_input_BB += 2; + _E_code += 2; + _P_code += 2; + _L_code +=2; + } + + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2]; + //__VOLK_ATTR_ALIGNED(16) lv_32fc_t _input_BB; + + _mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector + _mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector + _mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector + + dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] ); + dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] ); + dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] ); + + if((num_points % 2) != 0) + { + //_input_BB = (*_input) * (*_carrier); + dotProduct_E += (*_input) * (*_E_code)*(*_carrier); + dotProduct_P += (*_input) * (*_P_code)*(*_carrier); + dotProduct_L += (*_input) * (*_L_code)*(*_carrier); + } + + *E_out = dotProduct_E; + *P_out = dotProduct_P; + *L_out = dotProduct_L; +} + +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, unsigned int num_points) +{ + lv_32fc_t bb_signal_sample; + + bb_signal_sample = lv_cmake(0, 0); + + *E_out = 0; + *P_out = 0; + *L_out = 0; + // perform Early, Prompt and Late correlation + for(int i=0; i < num_points; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = input[i] * carrier[i]; + // Now get early, late, and prompt values for each + *E_out += bb_signal_sample * E_code[i]; + *P_out += bb_signal_sample * P_code[i]; + *L_out += bb_signal_sample * L_code[i]; + } +} + +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5.h new file mode 100644 index 000000000..378cea204 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5.h @@ -0,0 +1,524 @@ +/*! + * \file volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5 + * \brief Volk protokernel: performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation with 64 bits vectors + * \authors
    + *
  • Javier Arribas, 2011. jarribas(at)cttc.es + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that performs the carrier wipe-off mixing and the + * VE, Early, Prompt, Late and VL correlation with 64 bits vectors (32 bits the + * real part and 32 bits the imaginary part): + * - The carrier wipe-off is done by multiplying the input signal by the + * carrier (multiplication of 64 bits vectors) It returns the input + * signal in base band (BB) + * - VE values are calculated by multiplying the input signal in BB by the + * VE code (multiplication of 64 bits vectors), accumulating the results + * - Early values are calculated by multiplying the input signal in BB by the + * early code (multiplication of 64 bits vectors), accumulating the results + * - Prompt values are calculated by multiplying the input signal in BB by the + * prompt code (multiplication of 64 bits vectors), accumulating the results + * - Late values are calculated by multiplying the input signal in BB by the + * late code (multiplication of 64 bits vectors), accumulating the results + * - VL values are calculated by multiplying the input signal in BB by the + * VL code (multiplication of 64 bits vectors), accumulating the results + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H +#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H + +#include +#include +#include +#include +#include + +/*! + * TODO: Code the SSE4 version and benchmark it + */ +#ifdef LV_HAVE_SSE3 +#include + + + /*! + \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation + \param input The input signal input + \param carrier The carrier signal input + \param VE_code VE PRN code replica input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param VL_code VL PRN code replica input + \param VE_out VE correlation output + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param VL_out VL correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points) +{ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + lv_32fc_t dotProduct_VE; + memset(&dotProduct_VE, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_E; + memset(&dotProduct_E, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_P; + memset(&dotProduct_P, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_L; + memset(&dotProduct_L, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_VL; + memset(&dotProduct_VL, 0x0, 2*sizeof(float)); + + // Aux vars + __m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL; + __m128 bb_signal_sample, bb_signal_sample_shuffled; + + z_VE = _mm_setzero_ps(); + z_E = _mm_setzero_ps(); + z_P = _mm_setzero_ps(); + z_L = _mm_setzero_ps(); + z_VL = _mm_setzero_ps(); + + //input and output vectors + const lv_32fc_t* _input = input; + const lv_32fc_t* _carrier = carrier; + const lv_32fc_t* _VE_code = VE_code; + const lv_32fc_t* _E_code = E_code; + const lv_32fc_t* _P_code = P_code; + const lv_32fc_t* _L_code = L_code; + const lv_32fc_t* _VL_code = VL_code; + + for(;number < halfPoints; number++) + { + // carrier wipe-off (vector point-to-point product) + x = _mm_loadu_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_loadu_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br + + // correlation VE,E,P,L,VL (5x vector scalar product) + // VE + y = _mm_loadu_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together + + // Early + y = _mm_loadu_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together + + // Prompt + y = _mm_loadu_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together + + // Late + y = _mm_loadu_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together + + // VL + //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_loadu_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together + + /*pointer increment*/ + _carrier += 2; + _input += 2; + _VE_code += 2; + _E_code += 2; + _P_code += 2; + _L_code +=2; + _VL_code +=2; + } + + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2]; + + _mm_storeu_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector + _mm_storeu_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector + _mm_storeu_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector + _mm_storeu_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector + _mm_storeu_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector + + dotProduct_VE += ( dotProductVector_VE[0] + dotProductVector_VE[1] ); + dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] ); + dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] ); + dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] ); + dotProduct_VL += ( dotProductVector_VL[0] + dotProductVector_VL[1] ); + + if((num_points % 2) != 0) + { + dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier); + dotProduct_E += (*_input) * (*_E_code)*(*_carrier); + dotProduct_P += (*_input) * (*_P_code)*(*_carrier); + dotProduct_L += (*_input) * (*_L_code)*(*_carrier); + dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier); + } + + *VE_out = dotProduct_VE; + *E_out = dotProduct_E; + *P_out = dotProduct_P; + *L_out = dotProduct_L; + *VL_out = dotProduct_VL; +} + +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation + \param input The input signal input + \param carrier The carrier signal input + \param VE_code VE PRN code replica input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param VL_code VL PRN code replica input + \param VE_out VE correlation output + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param VL_out VL correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points) +{ + lv_32fc_t bb_signal_sample; + + bb_signal_sample = lv_cmake(0, 0); + + *VE_out = 0; + *E_out = 0; + *P_out = 0; + *L_out = 0; + *VL_out = 0; + // perform Early, Prompt and Late correlation + for(int i=0; i < num_points; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = input[i] * carrier[i]; + // Now get early, late, and prompt values for each + *VE_out += bb_signal_sample * VE_code[i]; + *E_out += bb_signal_sample * E_code[i]; + *P_out += bb_signal_sample * P_code[i]; + *L_out += bb_signal_sample * L_code[i]; + *VL_out += bb_signal_sample * VL_code[i]; + } +} + +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_u_H */ + + +#ifndef INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H +#define INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H + +#include +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +/*! + \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation + \param input The input signal input + \param carrier The carrier signal input + \param VE_code VE PRN code replica input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param VL_code VL PRN code replica input + \param VE_out VE correlation output + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param VL_out VL correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_sse3(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points) +{ + unsigned int number = 0; + const unsigned int halfPoints = num_points / 2; + + lv_32fc_t dotProduct_VE; + memset(&dotProduct_VE, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_E; + memset(&dotProduct_E, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_P; + memset(&dotProduct_P, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_L; + memset(&dotProduct_L, 0x0, 2*sizeof(float)); + lv_32fc_t dotProduct_VL; + memset(&dotProduct_VL, 0x0, 2*sizeof(float)); + + // Aux vars + __m128 x, y, yl, yh, z, tmp1, tmp2, z_VE, z_E, z_P, z_L, z_VL; + __m128 bb_signal_sample, bb_signal_sample_shuffled; + + z_VE = _mm_setzero_ps(); + z_E = _mm_setzero_ps(); + z_P = _mm_setzero_ps(); + z_L = _mm_setzero_ps(); + z_VL = _mm_setzero_ps(); + + //input and output vectors + const lv_32fc_t* _input = input; + const lv_32fc_t* _carrier = carrier; + const lv_32fc_t* _VE_code = VE_code; + const lv_32fc_t* _E_code = E_code; + const lv_32fc_t* _P_code = P_code; + const lv_32fc_t* _L_code = L_code; + const lv_32fc_t* _VL_code = VL_code; + + for(;number < halfPoints; number++) + { + // carrier wipe-off (vector point-to-point product) + x = _mm_load_ps((float*)_input); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_load_ps((float*)_carrier); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + + x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br + + tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + bb_signal_sample = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + bb_signal_sample_shuffled = _mm_shuffle_ps(bb_signal_sample,bb_signal_sample,0xB1); // Re-arrange bb_signal_sample to be ai,ar,bi,br + + // correlation VE,E,P,L,VL (5x vector scalar product) + // VE + y = _mm_load_ps((float*)_VE_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + z_VE = _mm_add_ps(z_VE, z); // Add the complex multiplication results together + + // Early + y = _mm_load_ps((float*)_E_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + z_E = _mm_add_ps(z_E, z); // Add the complex multiplication results together + + // Prompt + y = _mm_load_ps((float*)_P_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + z_P = _mm_add_ps(z_P, z); // Add the complex multiplication results together + + // Late + y = _mm_load_ps((float*)_L_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + z_L = _mm_add_ps(z_L, z); // Add the complex multiplication results together + + // VL + //x = _mm_load_ps((float*)_input_BB); // Load the ar + ai, br + bi as ar,ai,br,bi + y = _mm_load_ps((float*)_VL_code); // Load the cr + ci, dr + di as cr,ci,dr,di + + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + + tmp1 = _mm_mul_ps(bb_signal_sample,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + tmp2 = _mm_mul_ps(bb_signal_sample_shuffled,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + + z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di + z_VL = _mm_add_ps(z_VL, z); // Add the complex multiplication results together + + /*pointer increment*/ + _carrier += 2; + _input += 2; + _VE_code += 2; + _E_code += 2; + _P_code += 2; + _L_code +=2; + _VL_code +=2; + } + + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VE[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_E[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_P[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_L[2]; + __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector_VL[2]; + + _mm_store_ps((float*)dotProductVector_VE,z_VE); // Store the results back into the dot product vector + _mm_store_ps((float*)dotProductVector_E,z_E); // Store the results back into the dot product vector + _mm_store_ps((float*)dotProductVector_P,z_P); // Store the results back into the dot product vector + _mm_store_ps((float*)dotProductVector_L,z_L); // Store the results back into the dot product vector + _mm_store_ps((float*)dotProductVector_VL,z_VL); // Store the results back into the dot product vector + + dotProduct_VE += ( dotProductVector_VE[0] + dotProductVector_VE[1] ); + dotProduct_E += ( dotProductVector_E[0] + dotProductVector_E[1] ); + dotProduct_P += ( dotProductVector_P[0] + dotProductVector_P[1] ); + dotProduct_L += ( dotProductVector_L[0] + dotProductVector_L[1] ); + dotProduct_VL += ( dotProductVector_VL[0] + dotProductVector_VL[1] ); + + if((num_points % 2) != 0) + { + dotProduct_VE += (*_input) * (*_VE_code)*(*_carrier); + dotProduct_E += (*_input) * (*_E_code)*(*_carrier); + dotProduct_P += (*_input) * (*_P_code)*(*_carrier); + dotProduct_L += (*_input) * (*_L_code)*(*_carrier); + dotProduct_VL += (*_input) * (*_VL_code)*(*_carrier); + } + + *VE_out = dotProduct_VE; + *E_out = dotProduct_E; + *P_out = dotProduct_P; + *L_out = dotProduct_L; + *VL_out = dotProduct_VL; + +} + +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Performs the carrier wipe-off mixing and the VE, Early, Prompt, Late and VL correlation + \param input The input signal input + \param carrier The carrier signal input + \param VE_code VE PRN code replica input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param VL_code VL PRN code replica input + \param VE_out VE correlation output + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param VL_out VL correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_generic(lv_32fc_t* VE_out, lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, lv_32fc_t* VL_out, const lv_32fc_t* input, const lv_32fc_t* carrier, const lv_32fc_t* VE_code, const lv_32fc_t* E_code, const lv_32fc_t* P_code, const lv_32fc_t* L_code, const lv_32fc_t* VL_code, unsigned int num_points) +{ + lv_32fc_t bb_signal_sample; + + bb_signal_sample = lv_cmake(0, 0); + + *VE_out = 0; + *E_out = 0; + *P_out = 0; + *L_out = 0; + *VL_out = 0; + // perform Early, Prompt and Late correlation + for(int i=0; i < num_points; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = input[i] * carrier[i]; + // Now get early, late, and prompt values for each + *VE_out += bb_signal_sample * VE_code[i]; + *E_out += bb_signal_sample * E_code[i]; + *P_out += bb_signal_sample * P_code[i]; + *L_out += bb_signal_sample * L_code[i]; + *VL_out += bb_signal_sample * VL_code[i]; + } +} + +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_gnsssdr_volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h new file mode 100644 index 000000000..c9079b652 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h @@ -0,0 +1,183 @@ +/*! + * \file volk_gnsssdr_8i_accumulator_s8i.h + * \brief Volk protokernel: 8 bits (char) scalar accumulator + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that implements an accumulator of char values + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H +#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +/*! + \brief Accumulates the values in the input buffer + \param result The accumulated result + \param inputBuffer The buffer of data to be accumulated + \param num_points The number of values in inputBuffer to be accumulated + */ +static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const char* inputBuffer, unsigned int num_points){ + char returnValue = 0; + const unsigned int sse_iters = num_points / 16; + + const char* aPtr = inputBuffer; + + __VOLK_ATTR_ALIGNED(16) char tempBuffer[16]; + __m128i accumulator = _mm_setzero_si128(); + __m128i aVal = _mm_setzero_si128(); + + for(unsigned int number = 0; number < sse_iters; number++){ + aVal = _mm_lddqu_si128((__m128i*)aPtr); + accumulator = _mm_add_epi8(accumulator, aVal); + aPtr += 16; + } + _mm_storeu_si128((__m128i*)tempBuffer,accumulator); + + for(int i = 0; i<16; ++i){ + returnValue += tempBuffer[i]; + } + + for(int i = 0; i<(num_points % 16); ++i){ + returnValue += (*aPtr++); + } + + *result = returnValue; +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Accumulates the values in the input buffer + \param result The accumulated result + \param inputBuffer The buffer of data to be accumulated + \param num_points The number of values in inputBuffer to be accumulated + */ +static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const char* inputBuffer, unsigned int num_points){ + const char* aPtr = inputBuffer; + char returnValue = 0; + + for(unsigned int number = 0;number < num_points; number++){ + returnValue += (*aPtr++); + } + *result = returnValue; +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H */ + + +#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H +#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +/*! + \brief Accumulates the values in the input buffer + \param result The accumulated result + \param inputBuffer The buffer of data to be accumulated + \param num_points The number of values in inputBuffer to be accumulated + */ +static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const char* inputBuffer, unsigned int num_points){ + char returnValue = 0; + const unsigned int sse_iters = num_points / 16; + + const char* aPtr = inputBuffer; + + __VOLK_ATTR_ALIGNED(16) char tempBuffer[16]; + __m128i accumulator = _mm_setzero_si128(); + __m128i aVal = _mm_setzero_si128(); + + for(unsigned int number = 0; number < sse_iters; number++){ + aVal = _mm_load_si128((__m128i*)aPtr); + accumulator = _mm_add_epi8(accumulator, aVal); + aPtr += 16; + } + _mm_store_si128((__m128i*)tempBuffer,accumulator); + + for(int i = 0; i<16; ++i){ + returnValue += tempBuffer[i]; + } + + for(int i = 0; i<(num_points % 16); ++i){ + returnValue += (*aPtr++); + } + + *result = returnValue; +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Accumulates the values in the input buffer + \param result The accumulated result + \param inputBuffer The buffer of data to be accumulated + \param num_points The number of values in inputBuffer to be accumulated + */ +static inline void volk_gnsssdr_8i_accumulator_s8i_a_generic(char* result, const char* inputBuffer, unsigned int num_points){ + const char* aPtr = inputBuffer; + char returnValue = 0; + + for(unsigned int number = 0;number < num_points; number++){ + returnValue += (*aPtr++); + } + *result = returnValue; +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC +/*! + \brief Accumulates the values in the input buffer + \param result The accumulated result + \param inputBuffer The buffer of data to be accumulated + \param num_points The number of values in inputBuffer to be accumulated + */ +extern void volk_gnsssdr_8i_accumulator_s8i_a_orc_impl(short* result, const char* inputBuffer, unsigned int num_points); +static inline void volk_gnsssdr_8i_accumulator_s8i_u_orc(char* result, const char* inputBuffer, unsigned int num_points){ + + short res = 0; + char* resc = (char*)&res; + resc++; + + volk_gnsssdr_8i_accumulator_s8i_a_orc_impl(&res, inputBuffer, num_points); + + *result = *resc; +} +#endif /* LV_HAVE_ORC */ + +#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H */ + diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h new file mode 100644 index 000000000..0bb85a1dc --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h @@ -0,0 +1,493 @@ +/*! + * \file volk_gnsssdr_8i_index_max_16u.h + * \brief Volk protokernel: calculates the index of the maximum value in a group of 8 bits (char) scalars + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that returns the index of the maximum value of a group of 8 bits (char) scalars + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H +#define INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H + +#include +#include +#include + +#ifdef LV_HAVE_AVX +#include "immintrin.h" +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points) { + if(num_points > 0){ + const unsigned int sse_iters = num_points / 32; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32]; + __m256i ones, compareResults, currentValues; + __m128i compareResultslo, compareResultshi, maxValues, lo, hi; + + ones = _mm256_set1_epi8(0xFF); + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm256_lddqu_si256((__m256i*)inputPtr); + + lo = _mm256_castsi256_si128(currentValues); + hi = _mm256_extractf128_si256(currentValues,1); + + compareResultslo = _mm_cmpgt_epi8(maxValues, lo); + compareResultshi = _mm_cmpgt_epi8(maxValues, hi); + + //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h + compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1); + + if (!_mm256_testc_si256(compareResults, ones)) + { + _mm256_storeu_si256((__m256i*)¤tValuesBuffer, currentValues); + + for(int i = 0; i < 32; i++) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + maxValues = _mm_set1_epi8(max); + } + + inputPtr += 32; + } + + for(int i = 0; i<(num_points % 32); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_AVX*/ + +#ifdef LV_HAVE_SSE4_1 +#include +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) { + if(num_points > 0){ + const unsigned int sse_iters = num_points / 16; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm_lddqu_si128((__m128i*)inputPtr); + + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + + if (!_mm_test_all_ones(compareResults)) + { + _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues); + + for(int i = 0; i < 16; i++) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + maxValues = _mm_set1_epi8(max); + } + + inputPtr += 16; + } + + for(int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_SSE4_1*/ + +#ifdef LV_HAVE_SSE2 +#include +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points) { + if(num_points > 0){ + const unsigned int sse_iters = num_points / 16; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + unsigned short mask; + __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm_loadu_si128((__m128i*)inputPtr); + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + mask = _mm_movemask_epi8(compareResults); + + if (mask != 0xFFFF) + { + _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues); + mask = ~mask; + int i = 0; + while (mask > 0) + { + if ((mask & 1) == 1) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + i++; + mask >>= 1; + } + maxValues = _mm_set1_epi8(max); + } + inputPtr += 16; + } + + for(int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_SSE2*/ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points) { + + if(num_points > 0) + { + char max = src0[0]; + unsigned int index = 0; + + for(unsigned int i = 1; i < num_points; ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_GENERIC*/ + +#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H*/ + + +#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H +#define INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H + +#include +#include +#include + +#ifdef LV_HAVE_AVX +#include "immintrin.h" +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points) { + if(num_points > 0){ + const unsigned int sse_iters = num_points / 32; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32]; + __m256i ones, compareResults, currentValues; + __m128i compareResultslo, compareResultshi, maxValues, lo, hi; + + ones = _mm256_set1_epi8(0xFF); + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm256_load_si256((__m256i*)inputPtr); + + lo = _mm256_castsi256_si128(currentValues); + hi = _mm256_extractf128_si256(currentValues,1); + + compareResultslo = _mm_cmpgt_epi8(maxValues, lo); + compareResultshi = _mm_cmpgt_epi8(maxValues, hi); + + //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h + compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1); + + if (!_mm256_testc_si256(compareResults, ones)) + { + _mm256_store_si256((__m256i*)¤tValuesBuffer, currentValues); + + for(int i = 0; i < 32; i++) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + maxValues = _mm_set1_epi8(max); + } + + inputPtr += 32; + } + + for(int i = 0; i<(num_points % 32); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_AVX*/ + +#ifdef LV_HAVE_SSE4_1 +#include "smmintrin.h" +#include "emmintrin.h" +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) { + if(num_points > 0){ + const unsigned int sse_iters = num_points / 16; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm_load_si128((__m128i*)inputPtr); + + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + + if (!_mm_test_all_ones(compareResults)) + { + _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues); + + for(int i = 0; i < 16; i++) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + maxValues = _mm_set1_epi8(max); + } + + inputPtr += 16; + } + + for(int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_SSE4_1*/ + +#ifdef LV_HAVE_SSE2 +#include "emmintrin.h" +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points) { + if(num_points > 0){ + const unsigned int sse_iters = num_points / 16; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + unsigned short mask; + __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm_load_si128((__m128i*)inputPtr); + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + mask = _mm_movemask_epi8(compareResults); + + if (mask != 0xFFFF) + { + _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues); + mask = ~mask; + int i = 0; + while (mask > 0) + { + if ((mask & 1) == 1) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + i++; + mask >>= 1; + } + maxValues = _mm_set1_epi8(max); + } + inputPtr += 16; + } + + for(int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_SSE2*/ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_a_generic(unsigned int* target, const char* src0, unsigned int num_points) { + + if(num_points > 0) + { + char max = src0[0]; + unsigned int index = 0; + + for(unsigned int i = 1; i < num_points; ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_GENERIC*/ + +#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h new file mode 100644 index 000000000..ef362fd57 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h @@ -0,0 +1,327 @@ +/*! + * \file volk_gnsssdr_8i_max_s8i.h + * \brief Volk protokernel: calculates the maximum value in a group of 8 bits (char) scalars + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that returns the maximum value of a group of 8 bits (char) scalars + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_u_H +#define INCLUDED_volk_gnsssdr_8i_max_s8i_u_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE4_1 +#include +/*! + \brief Returns the max value in src0 + \param target The max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char target, const char* src0, unsigned int num_points) { + if(num_points > 0){ + const unsigned int sse_iters = num_points / 16; + + char* inputPtr = (char*)src0; + char max = src0[0]; + __VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm_loadu_si128((__m128i*)inputPtr); + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + maxValues = _mm_blendv_epi8(currentValues, maxValues, compareResults); + inputPtr += 16; + } + + _mm_storeu_si128((__m128i*)maxValuesBuffer, maxValues); + + for(int i = 0; i<16; ++i) + { + if(maxValuesBuffer[i] > max) + { + max = maxValuesBuffer[i]; + } + } + + for(int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + max = src0[i]; + } + } + target = max; + } +} + +#endif /*LV_HAVE_SSE4_1*/ + +#ifdef LV_HAVE_SSE2 +#include +/*! + \brief Returns the max value in src0 + \param target The max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char target, const char* src0, unsigned int num_points) { + if(num_points > 0){ + const unsigned int sse_iters = num_points / 16; + + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned short mask; + __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm_loadu_si128((__m128i*)inputPtr); + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + mask = _mm_movemask_epi8(compareResults); + + if (mask != 0xFFFF) + { + _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues); + mask = ~mask; + int i = 0; + while (mask > 0) + { + if ((mask & 1) == 1) + { + if(currentValuesBuffer[i] > max) + { + max = currentValuesBuffer[i]; + } + } + i++; + mask >>= 1; + } + maxValues = _mm_set1_epi8(max); + } + inputPtr += 16; + } + + for(int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + max = src0[i]; + } + } + target = max; + } +} + +#endif /*LV_HAVE_SSE2*/ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Returns the max value in src0 + \param target The max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_max_s8i_generic(char target, const char* src0, unsigned int num_points) { + if(num_points > 0) + { + char max = src0[0]; + + for(unsigned int i = 1; i < num_points; ++i) + { + if(src0[i] > max) + { + max = src0[i]; + } + } + target = max; + } +} + +#endif /*LV_HAVE_GENERIC*/ + +#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_u_H*/ + + +#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_a_H +#define INCLUDED_volk_gnsssdr_8i_max_s8i_a_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE4_1 +#include "smmintrin.h" +/*! + \brief Returns the max value in src0 + \param target The max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char target, const char* src0, unsigned int num_points) { + if(num_points > 0){ + const unsigned int sse_iters = num_points / 16; + + char* inputPtr = (char*)src0; + char max = src0[0]; + __VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm_load_si128((__m128i*)inputPtr); + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + maxValues = _mm_blendv_epi8(currentValues, maxValues, compareResults); + inputPtr += 16; + } + + _mm_store_si128((__m128i*)maxValuesBuffer, maxValues); + + for(int i = 0; i<16; ++i) + { + if(maxValuesBuffer[i] > max) + { + max = maxValuesBuffer[i]; + } + } + + for(int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + max = src0[i]; + } + } + target = max; + } +} + +#endif /*LV_HAVE_SSE4_1*/ + +#ifdef LV_HAVE_SSE2 +#include "emmintrin.h" +/*! + \brief Returns the max value in src0 + \param target The max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char target, const char* src0, unsigned int num_points) { + if(num_points > 0){ + const unsigned int sse_iters = num_points / 16; + + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned short mask; + __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm_load_si128((__m128i*)inputPtr); + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + mask = _mm_movemask_epi8(compareResults); + + if (mask != 0xFFFF) + { + _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues); + mask = ~mask; + int i = 0; + while (mask > 0) + { + if ((mask & 1) == 1) + { + if(currentValuesBuffer[i] > max) + { + max = currentValuesBuffer[i]; + } + } + i++; + mask >>= 1; + } + maxValues = _mm_set1_epi8(max); + } + inputPtr += 16; + } + + for(int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + max = src0[i]; + } + } + target = max; + } +} + +#endif /*LV_HAVE_SSE2*/ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Returns the max value in src0 + \param target The max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_max_s8i_a_generic(char target, const char* src0, unsigned int num_points) { + if(num_points > 0) + { + if(num_points > 0) + { + char max = src0[0]; + + for(unsigned int i = 1; i < num_points; ++i) + { + if(src0[i] > max) + { + max = src0[i]; + } + } + target = max; + } + } +} + +#endif /*LV_HAVE_GENERIC*/ + +#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_a_H*/ \ No newline at end of file diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h new file mode 100644 index 000000000..4a2bd5ab2 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h @@ -0,0 +1,184 @@ +/*! + * \file volk_gnsssdr_8i_x2_add_8i.h + * \brief Volk protokernel: adds pairs of 8 bits (char) scalars + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that adds pairs of 8 bits (char) scalars + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H +#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H + +#include +#include + +#ifdef LV_HAVE_SSE2 +#include "pmmintrin.h" +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector + */ +static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 16; + + char* cPtr = cVector; + const char* aPtr = aVector; + const char* bPtr= bVector; + + __m128i aVal, bVal, cVal; + + for(int number = 0; number < sse_iters; number++){ + + aVal = _mm_lddqu_si128((__m128i*)aPtr); + bVal = _mm_lddqu_si128((__m128i*)bPtr); + + cVal = _mm_add_epi8(aVal, bVal); + + _mm_storeu_si128((__m128i*)cPtr,cVal); // Store the results back into the C container + + aPtr += 16; + bPtr += 16; + cPtr += 16; + } + + for(int i = 0; i<(num_points % 16); ++i) + { + *cPtr++ = (*aPtr++) + (*bPtr++); + } +} +#endif /* LV_HAVE_SSE2 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector + */ +static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){ + char* cPtr = cVector; + const char* aPtr = aVector; + const char* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) + (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H */ + + +#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H +#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H + +#include +#include + +#ifdef LV_HAVE_SSE2 +#include "pmmintrin.h" +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector + */ +static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 16; + + char* cPtr = cVector; + const char* aPtr = aVector; + const char* bPtr= bVector; + + __m128i aVal, bVal, cVal; + + for(int number = 0; number < sse_iters; number++){ + + aVal = _mm_load_si128((__m128i*)aPtr); + bVal = _mm_load_si128((__m128i*)bPtr); + + cVal = _mm_add_epi8(aVal, bVal); + + _mm_store_si128((__m128i*)cPtr,cVal); // Store the results back into the C container + + aPtr += 16; + bPtr += 16; + cPtr += 16; + } + + for(int i = 0; i<(num_points % 16); ++i) + { + *cPtr++ = (*aPtr++) + (*bPtr++); + } +} +#endif /* LV_HAVE_SSE2 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector + */ +static inline void volk_gnsssdr_8i_x2_add_8i_a_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){ + char* cPtr = cVector; + const char* aPtr = aVector; + const char* bPtr= bVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) + (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC +/*! + \brief Adds the two input vectors and store their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be added + \param bVector One of the vectors to be added + \param num_points The number of values in aVector and bVector to be added together and stored into cVector + */ +extern void volk_gnsssdr_8i_x2_add_8i_a_orc_impl(char* cVector, const char* aVector, const char* bVector, unsigned int num_points); +static inline void volk_gnsssdr_8i_x2_add_8i_u_orc(char* cVector, const char* aVector, const char* bVector, unsigned int num_points){ + volk_gnsssdr_8i_x2_add_8i_a_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + +#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h new file mode 100644 index 000000000..231796274 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h @@ -0,0 +1,326 @@ +/*! + * \file volk_gnsssdr_8ic_conjugate_8ic.h + * \brief Volk protokernel: calculates the conjugate of a 16 bits vector + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that calculates the conjugate of a + * 16 bits vector (8 bits the real part and 8 bits the imaginary part) + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H +#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H + +#include +#include +#include + +#ifdef LV_HAVE_AVX +#include "immintrin.h" +/*! + \brief Takes the conjugate of an unsigned char vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){ + const unsigned int sse_iters = num_points / 16; + + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + + __m256 tmp; + __m128i tmp128lo, tmp128hi; + __m256 conjugator1 = _mm256_castsi256_ps(_mm256_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255)); + __m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + + for (int i = 0; i < sse_iters; ++i) + { + tmp = _mm256_loadu_ps((float*)a); + tmp = _mm256_xor_ps(tmp, conjugator1); + tmp128lo = _mm256_castsi256_si128(_mm256_castps_si256(tmp)); + tmp128lo = _mm_add_epi8(tmp128lo, conjugator2); + tmp128hi = _mm256_extractf128_si256(_mm256_castps_si256(tmp),1); + tmp128hi = _mm_add_epi8(tmp128hi, conjugator2); + //tmp = _mm256_set_m128i(tmp128hi , tmp128lo); //not defined in some versions of immintrin.h + tmp = _mm256_insertf128_si256(_mm256_castsi128_si256(tmp128lo),(tmp128hi),1); + _mm256_storeu_ps((float*)c, tmp); + + a += 16; + c += 16; + } + + for (int i = 0; i<(num_points % 16); ++i) + { + *c++ = lv_conj(*a++); + } +} +#endif /* LV_HAVE_AVX */ + +#ifdef LV_HAVE_SSSE3 +#include "tmmintrin.h" +/*! + \brief Takes the conjugate of an unsigned char vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){ + const unsigned int sse_iters = num_points / 8; + + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + __m128i tmp; + + __m128i conjugator = _mm_setr_epi8(1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1); + + for (int i = 0; i < sse_iters; ++i) + { + tmp = _mm_lddqu_si128((__m128i*)a); + tmp = _mm_sign_epi8(tmp, conjugator); + _mm_storeu_si128((__m128i*)c, tmp); + a += 8; + c += 8; + } + + for (int i = 0; i<(num_points % 8); ++i) + { + *c++ = lv_conj(*a++); + } + +} +#endif /* LV_HAVE_SSSE3 */ + +#ifdef LV_HAVE_SSE3 +#include +/*! + \brief Takes the conjugate of an unsigned char vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){ + const unsigned int sse_iters = num_points / 8; + + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + __m128i tmp; + + __m128i conjugator1 = _mm_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + __m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + + for (int i = 0; i < sse_iters; ++i) + { + tmp = _mm_lddqu_si128((__m128i*)a); + tmp = _mm_xor_si128(tmp, conjugator1); + tmp = _mm_add_epi8(tmp, conjugator2); + _mm_storeu_si128((__m128i*)c, tmp); + a += 8; + c += 8; + } + + for (int i = 0; i<(num_points % 8); ++i) + { + *c++ = lv_conj(*a++); + } + +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Takes the conjugate of an unsigned char vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){ + lv_8sc_t* cPtr = cVector; + const lv_8sc_t* aPtr = aVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = lv_conj(*aPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H */ + + +#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H +#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H + +#include +#include +#include + +#ifdef LV_HAVE_AVX +#include "immintrin.h" +/*! + \brief Takes the conjugate of an unsigned char vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){ + const unsigned int sse_iters = num_points / 16; + + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + + __m256 tmp; + __m128i tmp128lo, tmp128hi; + __m256 conjugator1 = _mm256_castsi256_ps(_mm256_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255)); + __m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + + for (int i = 0; i < sse_iters; ++i) + { + tmp = _mm256_load_ps((float*)a); + tmp = _mm256_xor_ps(tmp, conjugator1); + tmp128lo = _mm256_castsi256_si128(_mm256_castps_si256(tmp)); + tmp128lo = _mm_add_epi8(tmp128lo, conjugator2); + tmp128hi = _mm256_extractf128_si256(_mm256_castps_si256(tmp),1); + tmp128hi = _mm_add_epi8(tmp128hi, conjugator2); + //tmp = _mm256_set_m128i(tmp128hi , tmp128lo); //not defined in some versions of immintrin.h + tmp = _mm256_insertf128_si256(_mm256_castsi128_si256(tmp128lo),(tmp128hi),1); + _mm256_store_ps((float*)c, tmp); + + a += 16; + c += 16; + } + + for (int i = 0; i<(num_points % 16); ++i) + { + *c++ = lv_conj(*a++); + } +} +#endif /* LV_HAVE_AVX */ + +#ifdef LV_HAVE_SSSE3 +#include "tmmintrin.h" +/*! + \brief Takes the conjugate of an unsigned char vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){ + const unsigned int sse_iters = num_points / 8; + + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + __m128i tmp; + + __m128i conjugator = _mm_setr_epi8(1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1); + + for (int i = 0; i < sse_iters; ++i) + { + tmp = _mm_load_si128((__m128i*)a); + tmp = _mm_sign_epi8(tmp, conjugator); + _mm_store_si128((__m128i*)c, tmp); + a += 8; + c += 8; + } + + for (int i = 0; i<(num_points % 8); ++i) + { + *c++ = lv_conj(*a++); + } + +} +#endif /* LV_HAVE_SSSE3 */ + +#ifdef LV_HAVE_SSE3 +#include +/*! + \brief Takes the conjugate of an unsigned char vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){ + const unsigned int sse_iters = num_points / 8; + + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + __m128i tmp; + + __m128i conjugator1 = _mm_setr_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + __m128i conjugator2 = _mm_setr_epi8(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + + for (int i = 0; i < sse_iters; ++i) + { + tmp = _mm_load_si128((__m128i*)a); + tmp = _mm_xor_si128(tmp, conjugator1); + tmp = _mm_add_epi8(tmp, conjugator2); + _mm_store_si128((__m128i*)c, tmp); + a += 8; + c += 8; + } + + for (int i = 0; i<(num_points % 8); ++i) + { + *c++ = lv_conj(*a++); + } + +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Takes the conjugate of an unsigned char vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_conjugate_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){ + lv_8sc_t* cPtr = cVector; + const lv_8sc_t* aPtr = aVector; + unsigned int number = 0; + + for(number = 0; number < num_points; number++){ + *cPtr++ = lv_conj(*aPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC +/*! + \brief Takes the conjugate of an unsigned char vector. + \param cVector The vector where the results will be stored + \param aVector Vector to be conjugated + \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector + */ +extern void volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points); +static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points){ + volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl(cVector, aVector, num_points); +} +#endif /* LV_HAVE_ORC */ + +#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h new file mode 100644 index 000000000..1eab648fe --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h @@ -0,0 +1,320 @@ +/*! + * \file volk_gnsssdr_8ic_magnitude_squared_8i.h + * \brief Volk protokernel: calculates the magnitude squared of a 16 bits vector + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that calculates the magnitude squared of a + * 16 bits vector (8 bits the real part and 8 bits the imaginary part) + * result = (real*real) + (imag*imag) + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H +#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +#include "tmmintrin.h" +/*! + \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 16; + + const char* complexVectorPtr = (char*)complexVector; + char* magnitudeVectorPtr = magnitudeVector; + + __m128i zero, result8; + __m128i avector, avectorhi, avectorlo, avectorlomult, avectorhimult, aadded, maska; + __m128i bvector, bvectorhi, bvectorlo, bvectorlomult, bvectorhimult, badded, maskb; + + zero = _mm_setzero_si128(); + maska = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); + maskb = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); + + for(int number = 0;number < sse_iters; number++) + { + avector = _mm_lddqu_si128((__m128i*)complexVectorPtr); + avectorlo = _mm_unpacklo_epi8 (avector, zero); + avectorhi = _mm_unpackhi_epi8 (avector, zero); + avectorlomult = _mm_mullo_epi16 (avectorlo, avectorlo); + avectorhimult = _mm_mullo_epi16 (avectorhi, avectorhi); + aadded = _mm_hadd_epi16 (avectorlomult, avectorhimult); + + complexVectorPtr += 16; + + bvector = _mm_lddqu_si128((__m128i*)complexVectorPtr); + bvectorlo = _mm_unpacklo_epi8 (bvector, zero); + bvectorhi = _mm_unpackhi_epi8 (bvector, zero); + bvectorlomult = _mm_mullo_epi16 (bvectorlo, bvectorlo); + bvectorhimult = _mm_mullo_epi16 (bvectorhi, bvectorhi); + badded = _mm_hadd_epi16 (bvectorlomult, bvectorhimult); + + complexVectorPtr += 16; + + result8 = _mm_or_si128(_mm_shuffle_epi8(aadded, maska), _mm_shuffle_epi8(badded, maskb)); + + _mm_storeu_si128((__m128i*)magnitudeVectorPtr, result8); + + magnitudeVectorPtr += 16; + + + } + + for (int i = 0; i<(num_points % 16); ++i) + { + const char valReal = *complexVectorPtr++; + const char valImag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (valReal * valReal) + (valImag * valImag); + } +} +#endif /* LV_HAVE_SSE3 */ + +//#ifdef LV_HAVE_SSE +//#include +///*! +// \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector +// \param complexVector The vector containing the complex input values +// \param magnitudeVector The vector containing the real output values +// \param num_points The number of complex values in complexVector to be calculated and stored into cVector +// */ +//static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +// unsigned int number = 0; +// const unsigned int quarterPoints = num_points / 4; +// +// const float* complexVectorPtr = (float*)complexVector; +// float* magnitudeVectorPtr = magnitudeVector; +// +// __m128 cplxValue1, cplxValue2, iValue, qValue, result; +// for(;number < quarterPoints; number++){ +// cplxValue1 = _mm_loadu_ps(complexVectorPtr); +// complexVectorPtr += 4; +// +// cplxValue2 = _mm_loadu_ps(complexVectorPtr); +// complexVectorPtr += 4; +// +// // Arrange in i1i2i3i4 format +// iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0)); +// // Arrange in q1q2q3q4 format +// qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1)); +// +// iValue = _mm_mul_ps(iValue, iValue); // Square the I values +// qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values +// +// result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values +// +// _mm_storeu_ps(magnitudeVectorPtr, result); +// magnitudeVectorPtr += 4; +// } +// +// number = quarterPoints * 4; +// for(; number < num_points; number++){ +// float val1Real = *complexVectorPtr++; +// float val1Imag = *complexVectorPtr++; +// *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); +// } +//} +//#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){ + const char* complexVectorPtr = (char*)complexVector; + char* magnitudeVectorPtr = magnitudeVector; + + for(int number = 0; number < num_points; number++){ + const char real = *complexVectorPtr++; + const char imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (real*real) + (imag*imag); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */ + + +#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H +#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +/*! + \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 16; + + const char* complexVectorPtr = (char*)complexVector; + char* magnitudeVectorPtr = magnitudeVector; + + __m128i zero, result8; + __m128i avector, avectorhi, avectorlo, avectorlomult, avectorhimult, aadded, maska; + __m128i bvector, bvectorhi, bvectorlo, bvectorlomult, bvectorhimult, badded, maskb; + + zero = _mm_setzero_si128(); + maska = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); + maskb = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); + + for(int number = 0;number < sse_iters; number++) + { + avector = _mm_load_si128((__m128i*)complexVectorPtr); + avectorlo = _mm_unpacklo_epi8 (avector, zero); + avectorhi = _mm_unpackhi_epi8 (avector, zero); + avectorlomult = _mm_mullo_epi16 (avectorlo, avectorlo); + avectorhimult = _mm_mullo_epi16 (avectorhi, avectorhi); + aadded = _mm_hadd_epi16 (avectorlomult, avectorhimult); + + complexVectorPtr += 16; + + bvector = _mm_load_si128((__m128i*)complexVectorPtr); + bvectorlo = _mm_unpacklo_epi8 (bvector, zero); + bvectorhi = _mm_unpackhi_epi8 (bvector, zero); + bvectorlomult = _mm_mullo_epi16 (bvectorlo, bvectorlo); + bvectorhimult = _mm_mullo_epi16 (bvectorhi, bvectorhi); + badded = _mm_hadd_epi16 (bvectorlomult, bvectorhimult); + + complexVectorPtr += 16; + + result8 = _mm_or_si128(_mm_shuffle_epi8(aadded, maska), _mm_shuffle_epi8(badded, maskb)); + + _mm_store_si128((__m128i*)magnitudeVectorPtr, result8); + + magnitudeVectorPtr += 16; + + + } + + for (int i = 0; i<(num_points % 16); ++i) + { + const char valReal = *complexVectorPtr++; + const char valImag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (valReal * valReal) + (valImag * valImag); + } +} +#endif /* LV_HAVE_SSE3 */ + +//#ifdef LV_HAVE_SSE +//#include +///*! +// \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector +// \param complexVector The vector containing the complex input values +// \param magnitudeVector The vector containing the real output values +// \param num_points The number of complex values in complexVector to be calculated and stored into cVector +// */ +//static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){ +// unsigned int number = 0; +// const unsigned int quarterPoints = num_points / 4; +// +// const float* complexVectorPtr = (float*)complexVector; +// float* magnitudeVectorPtr = magnitudeVector; +// +// __m128 cplxValue1, cplxValue2, iValue, qValue, result; +// for(;number < quarterPoints; number++){ +// cplxValue1 = _mm_load_ps(complexVectorPtr); +// complexVectorPtr += 4; +// +// cplxValue2 = _mm_load_ps(complexVectorPtr); +// complexVectorPtr += 4; +// +// // Arrange in i1i2i3i4 format +// iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0)); +// // Arrange in q1q2q3q4 format +// qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1)); +// +// iValue = _mm_mul_ps(iValue, iValue); // Square the I values +// qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values +// +// result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values +// +// _mm_store_ps(magnitudeVectorPtr, result); +// magnitudeVectorPtr += 4; +// } +// +// number = quarterPoints * 4; +// for(; number < num_points; number++){ +// float val1Real = *complexVectorPtr++; +// float val1Imag = *complexVectorPtr++; +// *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); +// } +//} +//#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){ + const char* complexVectorPtr = (char*)complexVector; + char* magnitudeVectorPtr = magnitudeVector; + + for(int number = 0; number < num_points; number++){ + const char real = *complexVectorPtr++; + const char imag = *complexVectorPtr++; + *magnitudeVectorPtr++ = (real*real) + (imag*imag); + } +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC +/*! + \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector + \param complexVector The vector containing the complex input values + \param magnitudeVector The vector containing the real output values + \param num_points The number of complex values in complexVector to be calculated and stored into cVector + */ +extern void volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points); +static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_orc(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points){ + volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl(magnitudeVector, complexVector, num_points); +} +#endif /* LV_HAVE_ORC */ + +#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h new file mode 100644 index 000000000..e0578f13a --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h @@ -0,0 +1,271 @@ +/*! + * \file volk_gnsssdr_8ic_s8ic_multiply_8ic.h + * \brief Volk protokernel: multiplies a group of 16 bits vectors by one constant vector + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that multiplies a group of 16 bits vectors + * (8 bits the real part and 8 bits the imaginary part) by one constant vector + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H +#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +/*! + \brief Multiplies the input vector by a scalar and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector + \param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector + */ +static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc; + + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + y = _mm_set1_epi16 (*(short*)&scalar); + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + for(int number = 0;number < sse_iters; number++){ + + x = _mm_lddqu_si128((__m128i*)a); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + realc = _mm_and_si128 (realc, mult1); + imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + imagc = _mm_and_si128 (imagc, mult1); + imagc = _mm_slli_si128 (imagc, 1); + + totalc = _mm_or_si128 (realc, imagc); + + _mm_storeu_si128((__m128i*)c, totalc); + + a += 8; + c += 8; + } + + for (int i = 0; i<(num_points % 8); ++i) + { + *c++ = (*a++) * scalar; + } + +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Multiplies the input vector by a scalar and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector + \param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector + */ +static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){ + + /*lv_8sc_t* cPtr = cVector; + const lv_8sc_t* aPtr = aVector; + + for (int i = 0; i= 8){ + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + number -= 8; + } + + // clean up any remaining + while (number-- > 0) + *cPtr++ = *aPtr++ * scalar; +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */ + + +#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H +#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +/*! + \brief Multiplies the input vector by a scalar and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector + \param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector + */ +static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc; + + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + y = _mm_set1_epi16 (*(short*)&scalar); + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + for(int number = 0;number < sse_iters; number++){ + + x = _mm_load_si128((__m128i*)a); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + realc = _mm_and_si128 (realc, mult1); + imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + imagc = _mm_and_si128 (imagc, mult1); + imagc = _mm_slli_si128 (imagc, 1); + + totalc = _mm_or_si128 (realc, imagc); + + _mm_store_si128((__m128i*)c, totalc); + + a += 8; + c += 8; + } + + for (int i = 0; i<(num_points % 8); ++i) + { + *c++ = (*a++) * scalar; + } + +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Multiplies the input vector by a scalar and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector + \param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector + */ +static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){ + + /*lv_8sc_t* cPtr = cVector; + const lv_8sc_t* aPtr = aVector; + + for (int i = 0; i= 8){ + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + *cPtr++ = (*aPtr++) * scalar; + number -= 8; + } + + // clean up any remaining + while (number-- > 0) + *cPtr++ = *aPtr++ * scalar; +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC +/*! + \brief Multiplies the input vector by a scalar and stores the results in the third vector + \param cVector The vector where the results will be stored + \param aVector The vector to be multiplied + \param scalar The complex scalar to multiply aVector + \param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector + */ +extern void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const char scalarreal, const char scalarimag, unsigned int num_points); +static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points){ + volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl(cVector, aVector, lv_creal(scalar), lv_cimag(scalar), num_points); +} +#endif /* LV_HAVE_ORC */ + +#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h new file mode 100644 index 000000000..696b0a31f --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h @@ -0,0 +1,499 @@ +/*! + * \file volk_gnsssdr_8ic_x2_dot_prod_8ic.h + * \brief Volk protokernel: multiplies two 16 bits vectors and accumulates them + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that multiplies two 16 bits vectors (8 bits the real part + * and 8 bits the imaginary part) and accumulates them + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H +#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H + +#include +#include +#include +#include + +#ifdef LV_HAVE_GENERIC +/*! + \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector + \param cVector The vector where the accumulated result will be stored + \param aVector One of the vectors to be multiplied and accumulated + \param bVector One of the vectors to be multiplied and accumulated + \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { + + /*lv_8sc_t* cPtr = result; + const lv_8sc_t* aPtr = input; + const lv_8sc_t* bPtr = taps; + + for(int number = 0; number < num_points; number++){ + *cPtr += (*aPtr++) * (*bPtr++); + }*/ + + char * res = (char*) result; + char * in = (char*) input; + char * tp = (char*) taps; + unsigned int n_2_ccomplex_blocks = num_points/2; + unsigned int isodd = num_points & 1; + + char sum0[2] = {0,0}; + char sum1[2] = {0,0}; + unsigned int i = 0; + + for(i = 0; i < n_2_ccomplex_blocks; ++i) { + sum0[0] += in[0] * tp[0] - in[1] * tp[1]; + sum0[1] += in[0] * tp[1] + in[1] * tp[0]; + sum1[0] += in[2] * tp[2] - in[3] * tp[3]; + sum1[1] += in[2] * tp[3] + in[3] * tp[2]; + + in += 4; + tp += 4; + } + + res[0] = sum0[0] + sum1[0]; + res[1] = sum0[1] + sum1[1]; + + // Cleanup if we had an odd number of points + for(i = 0; i < isodd; ++i) { + *result += input[num_points - 1] * taps[num_points - 1]; + } +} + +#endif /*LV_HAVE_GENERIC*/ + +#ifdef LV_HAVE_SSE2 +#include "emmintrin.h" +/*! + \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector + \param cVector The vector where the accumulated result will be stored + \param aVector One of the vectors to be multiplied and accumulated + \param bVector One of the vectors to be multiplied and accumulated + \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { + + lv_8sc_t dotProduct; + memset(&dotProduct, 0x0, 2*sizeof(char)); + + const lv_8sc_t* a = input; + const lv_8sc_t* b = taps; + + const unsigned int sse_iters = num_points/8; + + if (sse_iters>0) + { + __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + realcacc = _mm_setzero_si128(); + imagcacc = _mm_setzero_si128(); + + for(int number = 0; number < sse_iters; number++){ + + x = _mm_lddqu_si128((__m128i*)a); + y = _mm_lddqu_si128((__m128i*)b); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + realcacc = _mm_add_epi16 (realcacc, realc); + imagcacc = _mm_add_epi16 (imagcacc, imagc); + + a += 8; + b += 8; + } + + realcacc = _mm_and_si128 (realcacc, mult1); + imagcacc = _mm_and_si128 (imagcacc, mult1); + imagcacc = _mm_slli_si128 (imagcacc, 1); + + totalc = _mm_or_si128 (realcacc, imagcacc); + + __VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8]; + + _mm_storeu_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector + + for (int i = 0; i<8; ++i) + { + dotProduct += dotProductVector[i]; + } + } + + for (int i = 0; i<(num_points % 8); ++i) + { + dotProduct += (*a++) * (*b++); + } + + *result = dotProduct; +} + +#endif /*LV_HAVE_SSE2*/ + +#ifdef LV_HAVE_SSE4_1 +#include "smmintrin.h" +/*! + \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector + \param cVector The vector where the accumulated result will be stored + \param aVector One of the vectors to be multiplied and accumulated + \param bVector One of the vectors to be multiplied and accumulated + \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { + + lv_8sc_t dotProduct; + memset(&dotProduct, 0x0, 2*sizeof(char)); + + const lv_8sc_t* a = input; + const lv_8sc_t* b = taps; + + const unsigned int sse_iters = num_points/8; + + if (sse_iters>0) + { + __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + realcacc = _mm_setzero_si128(); + imagcacc = _mm_setzero_si128(); + + for(int number = 0; number < sse_iters; number++){ + + x = _mm_lddqu_si128((__m128i*)a); + y = _mm_lddqu_si128((__m128i*)b); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + realcacc = _mm_add_epi16 (realcacc, realc); + imagcacc = _mm_add_epi16 (imagcacc, imagc); + + a += 8; + b += 8; + } + + imagcacc = _mm_slli_si128 (imagcacc, 1); + + totalc = _mm_blendv_epi8 (imagcacc, realcacc, mult1); + + __VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8]; + + _mm_storeu_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector + + for (int i = 0; i<8; ++i) + { + dotProduct += dotProductVector[i]; + } + } + + for (int i = 0; i<(num_points % 8); ++i) + { + dotProduct += (*a++) * (*b++); + } + + *result = dotProduct; +} + +#endif /*LV_HAVE_SSE4_1*/ + +#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H*/ + + +#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H +#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H + +#include +#include +#include +#include + + +#ifdef LV_HAVE_GENERIC +/*! + \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector + \param cVector The vector where the accumulated result will be stored + \param aVector One of the vectors to be multiplied and accumulated + \param bVector One of the vectors to be multiplied and accumulated + \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { + + /*lv_8sc_t* cPtr = result; + const lv_8sc_t* aPtr = input; + const lv_8sc_t* bPtr = taps; + + for(int number = 0; number < num_points; number++){ + *cPtr += (*aPtr++) * (*bPtr++); + }*/ + + char * res = (char*) result; + char * in = (char*) input; + char * tp = (char*) taps; + unsigned int n_2_ccomplex_blocks = num_points/2; + unsigned int isodd = num_points & 1; + + char sum0[2] = {0,0}; + char sum1[2] = {0,0}; + unsigned int i = 0; + + for(i = 0; i < n_2_ccomplex_blocks; ++i) { + sum0[0] += in[0] * tp[0] - in[1] * tp[1]; + sum0[1] += in[0] * tp[1] + in[1] * tp[0]; + sum1[0] += in[2] * tp[2] - in[3] * tp[3]; + sum1[1] += in[2] * tp[3] + in[3] * tp[2]; + + in += 4; + tp += 4; + } + + res[0] = sum0[0] + sum1[0]; + res[1] = sum0[1] + sum1[1]; + + // Cleanup if we had an odd number of points + for(i = 0; i < isodd; ++i) { + *result += input[num_points - 1] * taps[num_points - 1]; + } +} + +#endif /*LV_HAVE_GENERIC*/ + +#ifdef LV_HAVE_SSE2 +#include "emmintrin.h" +/*! + \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector + \param cVector The vector where the accumulated result will be stored + \param aVector One of the vectors to be multiplied and accumulated + \param bVector One of the vectors to be multiplied and accumulated + \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { + + lv_8sc_t dotProduct; + memset(&dotProduct, 0x0, 2*sizeof(char)); + + const lv_8sc_t* a = input; + const lv_8sc_t* b = taps; + + const unsigned int sse_iters = num_points/8; + + if (sse_iters>0) + { + __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + realcacc = _mm_setzero_si128(); + imagcacc = _mm_setzero_si128(); + + for(int number = 0; number < sse_iters; number++){ + + x = _mm_load_si128((__m128i*)a); + y = _mm_load_si128((__m128i*)b); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + realcacc = _mm_add_epi16 (realcacc, realc); + imagcacc = _mm_add_epi16 (imagcacc, imagc); + + a += 8; + b += 8; + } + + realcacc = _mm_and_si128 (realcacc, mult1); + imagcacc = _mm_and_si128 (imagcacc, mult1); + imagcacc = _mm_slli_si128 (imagcacc, 1); + + totalc = _mm_or_si128 (realcacc, imagcacc); + + __VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8]; + + _mm_store_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector + + for (int i = 0; i<8; ++i) + { + dotProduct += dotProductVector[i]; + } + } + + for (int i = 0; i<(num_points % 8); ++i) + { + dotProduct += (*a++) * (*b++); + } + + *result = dotProduct; +} + +#endif /*LV_HAVE_SSE2*/ + +#ifdef LV_HAVE_SSE4_1 +#include "smmintrin.h" +/*! + \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector + \param cVector The vector where the accumulated result will be stored + \param aVector One of the vectors to be multiplied and accumulated + \param bVector One of the vectors to be multiplied and accumulated + \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) { + + lv_8sc_t dotProduct; + memset(&dotProduct, 0x0, 2*sizeof(char)); + + const lv_8sc_t* a = input; + const lv_8sc_t* b = taps; + + const unsigned int sse_iters = num_points/8; + + if (sse_iters>0) + { + __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + realcacc = _mm_setzero_si128(); + imagcacc = _mm_setzero_si128(); + + for(int number = 0; number < sse_iters; number++){ + + x = _mm_load_si128((__m128i*)a); + y = _mm_load_si128((__m128i*)b); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + realcacc = _mm_add_epi16 (realcacc, realc); + imagcacc = _mm_add_epi16 (imagcacc, imagc); + + a += 8; + b += 8; + } + + imagcacc = _mm_slli_si128 (imagcacc, 1); + + totalc = _mm_blendv_epi8 (imagcacc, realcacc, mult1); + + __VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8]; + + _mm_store_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector + + for (int i = 0; i<8; ++i) + { + dotProduct += dotProductVector[i]; + } + } + + for (int i = 0; i<(num_points % 8); ++i) + { + dotProduct += (*a++) * (*b++); + } + + *result = dotProduct; +} + +#endif /*LV_HAVE_SSE4_1*/ + +#ifdef LV_HAVE_ORC +/*! + \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector + \param cVector The vector where the accumulated result will be stored + \param aVector One of the vectors to be multiplied and accumulated + \param bVector One of the vectors to be multiplied and accumulated + \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector + */ +extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points); +static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points){ + + short resReal = 0; + char* resRealChar = (char*)&resReal; + resRealChar++; + + short resImag = 0; + char* resImagChar = (char*)&resImag; + resImagChar++; + + volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(&resReal, &resImag, input, taps, num_points); + + *result = lv_cmake(*resRealChar, *resImagChar); +} +#endif /* LV_HAVE_ORC */ + +#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h new file mode 100644 index 000000000..f8af2eb82 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h @@ -0,0 +1,346 @@ +/*! + * \file volk_gnsssdr_8ic_x2_multiply_8ic.h + * \brief Volk protokernel: multiplies two 16 bits vectors + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that multiplies two 16 bits vectors (8 bits the real part + * and 8 bits the imaginary part) + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H +#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE2 +#include "emmintrin.h" +/*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc; + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + const lv_8sc_t* b = bVector; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + for(int number = 0;number < sse_iters; number++){ + + x = _mm_lddqu_si128((__m128i*)a); + y = _mm_lddqu_si128((__m128i*)b); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + realc = _mm_and_si128 (realc, mult1); + imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + imagc = _mm_and_si128 (imagc, mult1); + imagc = _mm_slli_si128 (imagc, 1); + + totalc = _mm_or_si128 (realc, imagc); + + _mm_storeu_si128((__m128i*)c, totalc); + + a += 8; + b += 8; + c += 8; + } + + for (int i = 0; i<(num_points % 8); ++i) + { + *c++ = (*a++) * (*b++); + } +} +#endif /* LV_HAVE_SSE2 */ + +#ifdef LV_HAVE_SSE4_1 +#include "smmintrin.h" +/*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, zero; + __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc; + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + const lv_8sc_t* b = bVector; + + zero = _mm_setzero_si128(); + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + for(int number = 0;number < sse_iters; number++){ + + x = _mm_lddqu_si128((__m128i*)a); + y = _mm_lddqu_si128((__m128i*)b); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + imagc = _mm_slli_si128 (imagc, 1); + + totalc = _mm_blendv_epi8 (imagc, realc, mult1); + + _mm_storeu_si128((__m128i*)c, totalc); + + a += 8; + b += 8; + c += 8; + } + + for (int i = 0; i<(num_points % 8); ++i) + { + *c++ = (*a++) * (*b++); + } +} +#endif /* LV_HAVE_SSE4_1 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ + lv_8sc_t* cPtr = cVector; + const lv_8sc_t* aPtr = aVector; + const lv_8sc_t* bPtr = bVector; + + for(int number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H */ + + +#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H +#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE2 +#include "emmintrin.h" +/*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc; + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + const lv_8sc_t* b = bVector; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + for(int number = 0;number < sse_iters; number++){ + + x = _mm_load_si128((__m128i*)a); + y = _mm_load_si128((__m128i*)b); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + realc = _mm_and_si128 (realc, mult1); + imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + imagc = _mm_and_si128 (imagc, mult1); + imagc = _mm_slli_si128 (imagc, 1); + + totalc = _mm_or_si128 (realc, imagc); + + _mm_store_si128((__m128i*)c, totalc); + + a += 8; + b += 8; + c += 8; + } + + for (int i = 0; i<(num_points % 8); ++i) + { + *c++ = (*a++) * (*b++); + } +} +#endif /* LV_HAVE_SSE2 */ + +#ifdef LV_HAVE_SSE4_1 +#include "smmintrin.h" +/*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, zero; + __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc; + lv_8sc_t* c = cVector; + const lv_8sc_t* a = aVector; + const lv_8sc_t* b = bVector; + + zero = _mm_setzero_si128(); + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + for(int number = 0;number < sse_iters; number++){ + + x = _mm_load_si128((__m128i*)a); + y = _mm_load_si128((__m128i*)b); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + imagc = _mm_slli_si128 (imagc, 1); + + totalc = _mm_blendv_epi8 (imagc, realc, mult1); + + _mm_store_si128((__m128i*)c, totalc); + + a += 8; + b += 8; + c += 8; + } + + for (int i = 0; i<(num_points % 8); ++i) + { + *c++ = (*a++) * (*b++); + } +} +#endif /* LV_HAVE_SSE4_1 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ + lv_8sc_t* cPtr = cVector; + const lv_8sc_t* aPtr = aVector; + const lv_8sc_t* bPtr = bVector; + + for(int number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } + +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC +/*! + \brief Multiplies the two input complex vectors and stores their results in the third vector + \param cVector The vector where the results will be stored + \param aVector One of the vectors to be multiplied + \param bVector One of the vectors to be multiplied + \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector + */ +extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points); +static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points){ + volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + +#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3.h new file mode 100644 index 000000000..fcbebdde3 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3.h @@ -0,0 +1,882 @@ +/*! + * \file volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3.h + * \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that performs the carrier wipe-off mixing and the + * Early, Prompt, and Late correlation with 16 bits vectors (8 bits the + * real part and 8 bits the imaginary part): + * - The carrier wipe-off is done by multiplying the input signal by the + * carrier (multiplication of 16 bits vectors) It returns the input + * signal in base band (BB) + * - Early values are calculated by multiplying the input signal in BB by the + * early code (multiplication of 16 bits vectors), accumulating the results + * - Prompt values are calculated by multiplying the input signal in BB by the + * prompt code (multiplication of 16 bits vectors), accumulating the results + * - Late values are calculated by multiplying the input signal in BB by the + * late code (multiplication of 16 bits vectors), accumulating the results + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H +#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H + +#include +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE4_1 +#include "smmintrin.h" + /*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse4_1(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +{ + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc; + __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output; + + const lv_8sc_t* input_ptr = input; + const lv_8sc_t* carrier_ptr = carrier; + + const lv_8sc_t* E_code_ptr = E_code; + lv_8sc_t* E_out_ptr = E_out; + const lv_8sc_t* L_code_ptr = L_code; + lv_8sc_t* L_out_ptr = L_out; + const lv_8sc_t* P_code_ptr = P_code; + lv_8sc_t* P_out_ptr = P_out; + + *E_out_ptr = 0; + *P_out_ptr = 0; + *L_out_ptr = 0; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + real_E_code_acc = _mm_setzero_si128(); + imag_E_code_acc = _mm_setzero_si128(); + real_L_code_acc = _mm_setzero_si128(); + imag_L_code_acc = _mm_setzero_si128(); + real_P_code_acc = _mm_setzero_si128(); + imag_P_code_acc = _mm_setzero_si128(); + + if (sse_iters>0) + { + for(int number = 0;number < sse_iters; number++){ + + //Perform the carrier wipe-off + x = _mm_lddqu_si128((__m128i*)input_ptr); + y = _mm_lddqu_si128((__m128i*)carrier_ptr); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + //Get early values + y = _mm_lddqu_si128((__m128i*)E_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output); + imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output); + + //Get late values + y = _mm_lddqu_si128((__m128i*)L_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output); + imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output); + + //Get prompt values + y = _mm_lddqu_si128((__m128i*)P_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output); + imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output); + + input_ptr += 8; + carrier_ptr += 8; + E_code_ptr += 8; + L_code_ptr += 8; + P_code_ptr += 8; + } + + __VOLK_ATTR_ALIGNED(16) lv_16sc_t E_dotProductVector[8]; + __VOLK_ATTR_ALIGNED(16) lv_16sc_t L_dotProductVector[8]; + __VOLK_ATTR_ALIGNED(16) lv_16sc_t P_dotProductVector[8]; + + imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1); + output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1); + _mm_storeu_si128((__m128i*)E_dotProductVector, output); + + imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1); + output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1); + _mm_storeu_si128((__m128i*)L_dotProductVector, output); + + imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1); + output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1); + _mm_storeu_si128((__m128i*)P_dotProductVector, output); + + for (int i = 0; i<8; ++i) + { + *E_out_ptr += E_dotProductVector[i]; + *L_out_ptr += L_dotProductVector[i]; + *P_out_ptr += P_dotProductVector[i]; + } + } + + lv_8sc_t bb_signal_sample; + for(int i=0; i < num_points%8; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = (*input_ptr++) * (*carrier_ptr++); + // Now get early, late, and prompt values for each + *E_out_ptr += bb_signal_sample * (*E_code_ptr++); + *P_out_ptr += bb_signal_sample * (*P_code_ptr++); + *L_out_ptr += bb_signal_sample * (*L_code_ptr++); + } +} + +#endif /* LV_HAVE_SSE4_1 */ + +//#ifdef LV_HAVE_SSE2 +//#include "emmintrin.h" +///*! +// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation +// \param input The input signal input +// \param carrier The carrier signal input +// \param E_code Early PRN code replica input +// \param P_code Early PRN code replica input +// \param L_code Early PRN code replica input +// \param E_out Early correlation output +// \param P_out Early correlation output +// \param L_out Early correlation output +// \param num_points The number of complex values in vectors +// */ +//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +//{ +// const unsigned int sse_iters = num_points / 8; +// +// __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc; +// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output; +// +// const lv_8sc_t* input_ptr = input; +// const lv_8sc_t* carrier_ptr = carrier; +// +// const lv_8sc_t* E_code_ptr = E_code; +// lv_8sc_t* E_out_ptr = E_out; +// const lv_8sc_t* L_code_ptr = L_code; +// lv_8sc_t* L_out_ptr = L_out; +// const lv_8sc_t* P_code_ptr = P_code; +// lv_8sc_t* P_out_ptr = P_out; +// +// *E_out_ptr = 0; +// *P_out_ptr = 0; +// *L_out_ptr = 0; +// +// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); +// +// real_E_code_acc = _mm_setzero_si128(); +// imag_E_code_acc = _mm_setzero_si128(); +// real_L_code_acc = _mm_setzero_si128(); +// imag_L_code_acc = _mm_setzero_si128(); +// real_P_code_acc = _mm_setzero_si128(); +// imag_P_code_acc = _mm_setzero_si128(); +// +// if (sse_iters>0) +// { +// for(int number = 0;number < sse_iters; number++){ +// +// //Perform the carrier wipe-off +// x = _mm_lddqu_si128((__m128i*)input_ptr); +// y = _mm_lddqu_si128((__m128i*)carrier_ptr); +// +// imagx = _mm_srli_si128 (x, 1); +// imagx = _mm_and_si128 (imagx, mult1); +// realx = _mm_and_si128 (x, mult1); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (realx, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); +// +// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// //Get early values +// y = _mm_lddqu_si128((__m128i*)E_code_ptr); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); +// +// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output); +// imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output); +// +// //Get late values +// y = _mm_lddqu_si128((__m128i*)L_code_ptr); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); +// +// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output); +// imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output); +// +// //Get prompt values +// y = _mm_lddqu_si128((__m128i*)P_code_ptr); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); +// +// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output); +// imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output); +// +// input_ptr += 8; +// carrier_ptr += 8; +// E_code_ptr += 8; +// L_code_ptr += 8; +// P_code_ptr += 8; +// } +// +// __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8]; +// __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8]; +// __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8]; +// +// real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1); +// imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1); +// imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1); +// output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc); +// _mm_storeu_si128((__m128i*)E_dotProductVector, output); +// +// real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1); +// imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1); +// imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1); +// output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc); +// _mm_storeu_si128((__m128i*)L_dotProductVector, output); +// +// real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1); +// imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1); +// imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1); +// output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc); +// _mm_storeu_si128((__m128i*)P_dotProductVector, output); +// +// for (int i = 0; i<8; ++i) +// { +// *E_out_ptr += E_dotProductVector[i]; +// *L_out_ptr += L_dotProductVector[i]; +// *P_out_ptr += P_dotProductVector[i]; +// } +// } +// +// lv_8sc_t bb_signal_sample; +// for(int i=0; i < num_points%8; ++i) +// { +// //Perform the carrier wipe-off +// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++); +// // Now get early, late, and prompt values for each +// *E_out_ptr += bb_signal_sample * (*E_code_ptr++); +// *P_out_ptr += bb_signal_sample * (*P_code_ptr++); +// *L_out_ptr += bb_signal_sample * (*L_code_ptr++); +// } +//} +// +//#endif /* LV_HAVE_SSE2 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_generic(lv_32fc_t* E_out, lv_32fc_t* P_out, lv_32fc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +{ + lv_8sc_t bb_signal_sample; + lv_16sc_t tmp1; + lv_16sc_t tmp2; + lv_16sc_t tmp3; + + bb_signal_sample = lv_cmake(0, 0); + + *E_out = 0; + *P_out = 0; + *L_out = 0; + // perform Early, Prompt and Late correlation + for(int i=0; i < num_points; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = input[i] * carrier[i]; + + tmp1 = bb_signal_sample * E_code[i]; + tmp2 = bb_signal_sample * P_code[i]; + tmp3 = bb_signal_sample * L_code[i]; + + // Now get early, late, and prompt values for each + *E_out += tmp1; + *P_out += tmp2; + *L_out += tmp3; + } +} + +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_H */ + + +//#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H +//#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H +// +//#include +//#include +//#include +//#include +//#include +// +//#ifdef LV_HAVE_SSE4_1 +//#include "smmintrin.h" +///*! +// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation +// \param input The input signal input +// \param carrier The carrier signal input +// \param E_code Early PRN code replica input +// \param P_code Early PRN code replica input +// \param L_code Early PRN code replica input +// \param E_out Early correlation output +// \param P_out Early correlation output +// \param L_out Early correlation output +// \param num_points The number of complex values in vectors +// */ +//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +//{ +// const unsigned int sse_iters = num_points / 8; +// +// __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc; +// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output; +// +// const lv_8sc_t* input_ptr = input; +// const lv_8sc_t* carrier_ptr = carrier; +// +// const lv_8sc_t* E_code_ptr = E_code; +// lv_8sc_t* E_out_ptr = E_out; +// const lv_8sc_t* L_code_ptr = L_code; +// lv_8sc_t* L_out_ptr = L_out; +// const lv_8sc_t* P_code_ptr = P_code; +// lv_8sc_t* P_out_ptr = P_out; +// +// *E_out_ptr = 0; +// *P_out_ptr = 0; +// *L_out_ptr = 0; +// +// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); +// +// real_E_code_acc = _mm_setzero_si128(); +// imag_E_code_acc = _mm_setzero_si128(); +// real_L_code_acc = _mm_setzero_si128(); +// imag_L_code_acc = _mm_setzero_si128(); +// real_P_code_acc = _mm_setzero_si128(); +// imag_P_code_acc = _mm_setzero_si128(); +// +// if (sse_iters>0) +// { +// for(int number = 0;number < sse_iters; number++){ +// +// //Perform the carrier wipe-off +// x = _mm_load_si128((__m128i*)input_ptr); +// y = _mm_load_si128((__m128i*)carrier_ptr); +// +// imagx = _mm_srli_si128 (x, 1); +// imagx = _mm_and_si128 (imagx, mult1); +// realx = _mm_and_si128 (x, mult1); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (realx, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); +// +// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// //Get early values +// y = _mm_load_si128((__m128i*)E_code_ptr); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); +// +// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output); +// imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output); +// +// //Get late values +// y = _mm_load_si128((__m128i*)L_code_ptr); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); +// +// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output); +// imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output); +// +// //Get prompt values +// y = _mm_load_si128((__m128i*)P_code_ptr); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); +// +// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output); +// imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output); +// +// input_ptr += 8; +// carrier_ptr += 8; +// E_code_ptr += 8; +// L_code_ptr += 8; +// P_code_ptr += 8; +// } +// +// __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8]; +// __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8]; +// __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8]; +// +// imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1); +// output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1); +// _mm_store_si128((__m128i*)E_dotProductVector, output); +// +// imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1); +// output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1); +// _mm_store_si128((__m128i*)L_dotProductVector, output); +// +// imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1); +// output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1); +// _mm_store_si128((__m128i*)P_dotProductVector, output); +// +// for (int i = 0; i<8; ++i) +// { +// *E_out_ptr += E_dotProductVector[i]; +// *L_out_ptr += L_dotProductVector[i]; +// *P_out_ptr += P_dotProductVector[i]; +// } +// } +// +// lv_8sc_t bb_signal_sample; +// for(int i=0; i < num_points%8; ++i) +// { +// //Perform the carrier wipe-off +// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++); +// // Now get early, late, and prompt values for each +// *E_out_ptr += bb_signal_sample * (*E_code_ptr++); +// *P_out_ptr += bb_signal_sample * (*P_code_ptr++); +// *L_out_ptr += bb_signal_sample * (*L_code_ptr++); +// } +//} +// +//#endif /* LV_HAVE_SSE4_1 */ +// +//#ifdef LV_HAVE_SSE2 +//#include "emmintrin.h" +///*! +// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation +// \param input The input signal input +// \param carrier The carrier signal input +// \param E_code Early PRN code replica input +// \param P_code Early PRN code replica input +// \param L_code Early PRN code replica input +// \param E_out Early correlation output +// \param P_out Early correlation output +// \param L_out Early correlation output +// \param num_points The number of complex values in vectors +// */ +//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +//{ +// const unsigned int sse_iters = num_points / 8; +// +// __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc; +// __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output; +// +// const lv_8sc_t* input_ptr = input; +// const lv_8sc_t* carrier_ptr = carrier; +// +// const lv_8sc_t* E_code_ptr = E_code; +// lv_8sc_t* E_out_ptr = E_out; +// const lv_8sc_t* L_code_ptr = L_code; +// lv_8sc_t* L_out_ptr = L_out; +// const lv_8sc_t* P_code_ptr = P_code; +// lv_8sc_t* P_out_ptr = P_out; +// +// *E_out_ptr = 0; +// *P_out_ptr = 0; +// *L_out_ptr = 0; +// +// mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); +// +// real_E_code_acc = _mm_setzero_si128(); +// imag_E_code_acc = _mm_setzero_si128(); +// real_L_code_acc = _mm_setzero_si128(); +// imag_L_code_acc = _mm_setzero_si128(); +// real_P_code_acc = _mm_setzero_si128(); +// imag_P_code_acc = _mm_setzero_si128(); +// +// if (sse_iters>0) +// { +// for(int number = 0;number < sse_iters; number++){ +// +// //Perform the carrier wipe-off +// x = _mm_load_si128((__m128i*)input_ptr); +// y = _mm_load_si128((__m128i*)carrier_ptr); +// +// imagx = _mm_srli_si128 (x, 1); +// imagx = _mm_and_si128 (imagx, mult1); +// realx = _mm_and_si128 (x, mult1); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (realx, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); +// +// real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// //Get early values +// y = _mm_load_si128((__m128i*)E_code_ptr); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); +// +// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output); +// imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output); +// +// //Get late values +// y = _mm_load_si128((__m128i*)L_code_ptr); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); +// +// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output); +// imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output); +// +// //Get prompt values +// y = _mm_load_si128((__m128i*)P_code_ptr); +// +// imagy = _mm_srli_si128 (y, 1); +// imagy = _mm_and_si128 (imagy, mult1); +// realy = _mm_and_si128 (y, mult1); +// +// realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); +// imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); +// realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); +// imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); +// +// real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); +// imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); +// +// real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output); +// imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output); +// +// input_ptr += 8; +// carrier_ptr += 8; +// E_code_ptr += 8; +// L_code_ptr += 8; +// P_code_ptr += 8; +// } +// +// __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8]; +// __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8]; +// __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8]; +// +// real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1); +// imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1); +// imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1); +// output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc); +// _mm_store_si128((__m128i*)E_dotProductVector, output); +// +// real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1); +// imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1); +// imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1); +// output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc); +// _mm_store_si128((__m128i*)L_dotProductVector, output); +// +// real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1); +// imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1); +// imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1); +// output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc); +// _mm_store_si128((__m128i*)P_dotProductVector, output); +// +// for (int i = 0; i<8; ++i) +// { +// *E_out_ptr += E_dotProductVector[i]; +// *L_out_ptr += L_dotProductVector[i]; +// *P_out_ptr += P_dotProductVector[i]; +// } +// } +// +// lv_8sc_t bb_signal_sample; +// for(int i=0; i < num_points%8; ++i) +// { +// //Perform the carrier wipe-off +// bb_signal_sample = (*input_ptr++) * (*carrier_ptr++); +// // Now get early, late, and prompt values for each +// *E_out_ptr += bb_signal_sample * (*E_code_ptr++); +// *P_out_ptr += bb_signal_sample * (*P_code_ptr++); +// *L_out_ptr += bb_signal_sample * (*L_code_ptr++); +// } +//} +// +//#endif /* LV_HAVE_SSE2 */ +// +//#ifdef LV_HAVE_GENERIC +///*! +// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation +// \param input The input signal input +// \param carrier The carrier signal input +// \param E_code Early PRN code replica input +// \param P_code Early PRN code replica input +// \param L_code Early PRN code replica input +// \param E_out Early correlation output +// \param P_out Early correlation output +// \param L_out Early correlation output +// \param num_points The number of complex values in vectors +// */ +//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +//{ +// lv_8sc_t bb_signal_sample; +// +// bb_signal_sample = lv_cmake(0, 0); +// +// *E_out = 0; +// *P_out = 0; +// *L_out = 0; +// // perform Early, Prompt and Late correlation +// for(int i=0; i < num_points; ++i) +// { +// //Perform the carrier wipe-off +// bb_signal_sample = input[i] * carrier[i]; +// // Now get early, late, and prompt values for each +// *E_out += bb_signal_sample * E_code[i]; +// *P_out += bb_signal_sample * P_code[i]; +// *L_out += bb_signal_sample * L_code[i]; +// } +//} +// +//#endif /* LV_HAVE_GENERIC */ +// +//#ifdef LV_HAVE_ORC +///*! +// \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation +// \param input The input signal input +// \param carrier The carrier signal input +// \param E_code Early PRN code replica input +// \param P_code Early PRN code replica input +// \param L_code Early PRN code replica input +// \param E_out Early correlation output +// \param P_out Early correlation output +// \param L_out Early correlation output +// \param num_points The number of complex values in vectors +// */ +// +//extern void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_first_a_orc_impl(short* E_out_real, short* E_out_imag, short* P_out_real, short* P_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, unsigned int num_points); +//extern void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_second_a_orc_impl(short* L_out_real, short* L_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* L_code, unsigned int num_points); +//static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_u_orc(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points){ +// +// short E_out_real = 0; +// short E_out_imag = 0; +// char* E_out_real_c = (char*)&E_out_real; +// E_out_real_c++; +// char* E_out_imag_c = (char*)&E_out_imag; +// E_out_imag_c++; +// +// short P_out_real = 0; +// short P_out_imag = 0; +// char* P_out_real_c = (char*)&P_out_real; +// P_out_real_c++; +// char* P_out_imag_c = (char*)&P_out_imag; +// P_out_imag_c++; +// +// short L_out_real = 0; +// short L_out_imag = 0; +// char* L_out_real_c = (char*)&L_out_real; +// L_out_real_c++; +// char* L_out_imag_c = (char*)&L_out_imag; +// L_out_imag_c++; +// +// volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_first_a_orc_impl( &E_out_real, &E_out_imag, &P_out_real, &P_out_imag, input, carrier, E_code, P_code, num_points); +// volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_second_a_orc_impl( &L_out_real, &L_out_imag, input, carrier, L_code, num_points); +// +// //ORC implementation of 8ic_x5_cw_epl_corr_32fc_x3 is done in two different functions because it seems that +// //in one function the length of the code gives memory problems (bad access, segmentation fault). +// //Also, the maximum number of accumulators that can be used is 4 (and we need 6). +// //The "carrier wipe-off" step is done two times: one in the first function and another one in the second. +// //Joining all the ORC code in one function would be quicker because the "carrier wipe-off" step would be done just +// //one time. +// +// *E_out = lv_cmake(*E_out_real_c, *E_out_imag_c); +// *P_out = lv_cmake(*P_out_real_c, *P_out_imag_c); +// *L_out = lv_cmake(*L_out_real_c, *L_out_imag_c); +//} +//#endif /* LV_HAVE_ORC */ +// +//#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.h new file mode 100644 index 000000000..b58931d8a --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.h @@ -0,0 +1,874 @@ +/*! + * \file volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.h + * \brief Volk protokernel: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that performs the carrier wipe-off mixing and the + * Early, Prompt, and Late correlation with 16 bits vectors (8 bits the + * real part and 8 bits the imaginary part): + * - The carrier wipe-off is done by multiplying the input signal by the + * carrier (multiplication of 16 bits vectors) It returns the input + * signal in base band (BB) + * - Early values are calculated by multiplying the input signal in BB by the + * early code (multiplication of 16 bits vectors), accumulating the results + * - Prompt values are calculated by multiplying the input signal in BB by the + * prompt code (multiplication of 16 bits vectors), accumulating the results + * - Late values are calculated by multiplying the input signal in BB by the + * late code (multiplication of 16 bits vectors), accumulating the results + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H +#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H + +#include +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE4_1 +#include "smmintrin.h" + /*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +{ + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc; + __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output; + + const lv_8sc_t* input_ptr = input; + const lv_8sc_t* carrier_ptr = carrier; + + const lv_8sc_t* E_code_ptr = E_code; + lv_8sc_t* E_out_ptr = E_out; + const lv_8sc_t* L_code_ptr = L_code; + lv_8sc_t* L_out_ptr = L_out; + const lv_8sc_t* P_code_ptr = P_code; + lv_8sc_t* P_out_ptr = P_out; + + *E_out_ptr = 0; + *P_out_ptr = 0; + *L_out_ptr = 0; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + real_E_code_acc = _mm_setzero_si128(); + imag_E_code_acc = _mm_setzero_si128(); + real_L_code_acc = _mm_setzero_si128(); + imag_L_code_acc = _mm_setzero_si128(); + real_P_code_acc = _mm_setzero_si128(); + imag_P_code_acc = _mm_setzero_si128(); + + if (sse_iters>0) + { + for(int number = 0;number < sse_iters; number++){ + + //Perform the carrier wipe-off + x = _mm_lddqu_si128((__m128i*)input_ptr); + y = _mm_lddqu_si128((__m128i*)carrier_ptr); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + //Get early values + y = _mm_lddqu_si128((__m128i*)E_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output); + imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output); + + //Get late values + y = _mm_lddqu_si128((__m128i*)L_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output); + imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output); + + //Get prompt values + y = _mm_lddqu_si128((__m128i*)P_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output); + imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output); + + input_ptr += 8; + carrier_ptr += 8; + E_code_ptr += 8; + L_code_ptr += 8; + P_code_ptr += 8; + } + + __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8]; + __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8]; + __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8]; + + imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1); + output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1); + _mm_storeu_si128((__m128i*)E_dotProductVector, output); + + imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1); + output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1); + _mm_storeu_si128((__m128i*)L_dotProductVector, output); + + imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1); + output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1); + _mm_storeu_si128((__m128i*)P_dotProductVector, output); + + for (int i = 0; i<8; ++i) + { + *E_out_ptr += E_dotProductVector[i]; + *L_out_ptr += L_dotProductVector[i]; + *P_out_ptr += P_dotProductVector[i]; + } + } + + lv_8sc_t bb_signal_sample; + for(int i=0; i < num_points%8; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = (*input_ptr++) * (*carrier_ptr++); + // Now get early, late, and prompt values for each + *E_out_ptr += bb_signal_sample * (*E_code_ptr++); + *P_out_ptr += bb_signal_sample * (*P_code_ptr++); + *L_out_ptr += bb_signal_sample * (*L_code_ptr++); + } +} + +#endif /* LV_HAVE_SSE4_1 */ + +#ifdef LV_HAVE_SSE2 +#include "emmintrin.h" +/*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +{ + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc; + __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output; + + const lv_8sc_t* input_ptr = input; + const lv_8sc_t* carrier_ptr = carrier; + + const lv_8sc_t* E_code_ptr = E_code; + lv_8sc_t* E_out_ptr = E_out; + const lv_8sc_t* L_code_ptr = L_code; + lv_8sc_t* L_out_ptr = L_out; + const lv_8sc_t* P_code_ptr = P_code; + lv_8sc_t* P_out_ptr = P_out; + + *E_out_ptr = 0; + *P_out_ptr = 0; + *L_out_ptr = 0; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + real_E_code_acc = _mm_setzero_si128(); + imag_E_code_acc = _mm_setzero_si128(); + real_L_code_acc = _mm_setzero_si128(); + imag_L_code_acc = _mm_setzero_si128(); + real_P_code_acc = _mm_setzero_si128(); + imag_P_code_acc = _mm_setzero_si128(); + + if (sse_iters>0) + { + for(int number = 0;number < sse_iters; number++){ + + //Perform the carrier wipe-off + x = _mm_lddqu_si128((__m128i*)input_ptr); + y = _mm_lddqu_si128((__m128i*)carrier_ptr); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + //Get early values + y = _mm_lddqu_si128((__m128i*)E_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output); + imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output); + + //Get late values + y = _mm_lddqu_si128((__m128i*)L_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output); + imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output); + + //Get prompt values + y = _mm_lddqu_si128((__m128i*)P_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output); + imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output); + + input_ptr += 8; + carrier_ptr += 8; + E_code_ptr += 8; + L_code_ptr += 8; + P_code_ptr += 8; + } + + __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8]; + __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8]; + __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8]; + + real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1); + imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1); + imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1); + output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc); + _mm_storeu_si128((__m128i*)E_dotProductVector, output); + + real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1); + imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1); + imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1); + output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc); + _mm_storeu_si128((__m128i*)L_dotProductVector, output); + + real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1); + imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1); + imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1); + output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc); + _mm_storeu_si128((__m128i*)P_dotProductVector, output); + + for (int i = 0; i<8; ++i) + { + *E_out_ptr += E_dotProductVector[i]; + *L_out_ptr += L_dotProductVector[i]; + *P_out_ptr += P_dotProductVector[i]; + } + } + + lv_8sc_t bb_signal_sample; + for(int i=0; i < num_points%8; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = (*input_ptr++) * (*carrier_ptr++); + // Now get early, late, and prompt values for each + *E_out_ptr += bb_signal_sample * (*E_code_ptr++); + *P_out_ptr += bb_signal_sample * (*P_code_ptr++); + *L_out_ptr += bb_signal_sample * (*L_code_ptr++); + } +} + +#endif /* LV_HAVE_SSE2 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +{ + lv_8sc_t bb_signal_sample; + + bb_signal_sample = lv_cmake(0, 0); + + *E_out = 0; + *P_out = 0; + *L_out = 0; + // perform Early, Prompt and Late correlation + for(int i=0; i < num_points; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = input[i] * carrier[i]; + // Now get early, late, and prompt values for each + *E_out += bb_signal_sample * E_code[i]; + *P_out += bb_signal_sample * P_code[i]; + *L_out += bb_signal_sample * L_code[i]; + } +} + +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_H */ + + +#ifndef INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H +#define INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H + +#include +#include +#include +#include +#include + +#ifdef LV_HAVE_SSE4_1 +#include "smmintrin.h" +/*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_sse4_1(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +{ + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc; + __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output; + + const lv_8sc_t* input_ptr = input; + const lv_8sc_t* carrier_ptr = carrier; + + const lv_8sc_t* E_code_ptr = E_code; + lv_8sc_t* E_out_ptr = E_out; + const lv_8sc_t* L_code_ptr = L_code; + lv_8sc_t* L_out_ptr = L_out; + const lv_8sc_t* P_code_ptr = P_code; + lv_8sc_t* P_out_ptr = P_out; + + *E_out_ptr = 0; + *P_out_ptr = 0; + *L_out_ptr = 0; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + real_E_code_acc = _mm_setzero_si128(); + imag_E_code_acc = _mm_setzero_si128(); + real_L_code_acc = _mm_setzero_si128(); + imag_L_code_acc = _mm_setzero_si128(); + real_P_code_acc = _mm_setzero_si128(); + imag_P_code_acc = _mm_setzero_si128(); + + if (sse_iters>0) + { + for(int number = 0;number < sse_iters; number++){ + + //Perform the carrier wipe-off + x = _mm_load_si128((__m128i*)input_ptr); + y = _mm_load_si128((__m128i*)carrier_ptr); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + //Get early values + y = _mm_load_si128((__m128i*)E_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output); + imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output); + + //Get late values + y = _mm_load_si128((__m128i*)L_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output); + imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output); + + //Get prompt values + y = _mm_load_si128((__m128i*)P_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output); + imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output); + + input_ptr += 8; + carrier_ptr += 8; + E_code_ptr += 8; + L_code_ptr += 8; + P_code_ptr += 8; + } + + __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8]; + __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8]; + __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8]; + + imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1); + output = _mm_blendv_epi8 (imag_E_code_acc, real_E_code_acc, mult1); + _mm_store_si128((__m128i*)E_dotProductVector, output); + + imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1); + output = _mm_blendv_epi8 (imag_L_code_acc, real_L_code_acc, mult1); + _mm_store_si128((__m128i*)L_dotProductVector, output); + + imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1); + output = _mm_blendv_epi8 (imag_P_code_acc, real_P_code_acc, mult1); + _mm_store_si128((__m128i*)P_dotProductVector, output); + + for (int i = 0; i<8; ++i) + { + *E_out_ptr += E_dotProductVector[i]; + *L_out_ptr += L_dotProductVector[i]; + *P_out_ptr += P_dotProductVector[i]; + } + } + + lv_8sc_t bb_signal_sample; + for(int i=0; i < num_points%8; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = (*input_ptr++) * (*carrier_ptr++); + // Now get early, late, and prompt values for each + *E_out_ptr += bb_signal_sample * (*E_code_ptr++); + *P_out_ptr += bb_signal_sample * (*P_code_ptr++); + *L_out_ptr += bb_signal_sample * (*L_code_ptr++); + } +} + +#endif /* LV_HAVE_SSE4_1 */ + +#ifdef LV_HAVE_SSE2 +#include "emmintrin.h" +/*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_sse2(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +{ + const unsigned int sse_iters = num_points / 8; + + __m128i x, y, real_bb_signal_sample, imag_bb_signal_sample, real_E_code_acc, imag_E_code_acc, real_L_code_acc, imag_L_code_acc, real_P_code_acc, imag_P_code_acc; + __m128i mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, output, real_output, imag_output; + + const lv_8sc_t* input_ptr = input; + const lv_8sc_t* carrier_ptr = carrier; + + const lv_8sc_t* E_code_ptr = E_code; + lv_8sc_t* E_out_ptr = E_out; + const lv_8sc_t* L_code_ptr = L_code; + lv_8sc_t* L_out_ptr = L_out; + const lv_8sc_t* P_code_ptr = P_code; + lv_8sc_t* P_out_ptr = P_out; + + *E_out_ptr = 0; + *P_out_ptr = 0; + *L_out_ptr = 0; + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + + real_E_code_acc = _mm_setzero_si128(); + imag_E_code_acc = _mm_setzero_si128(); + real_L_code_acc = _mm_setzero_si128(); + imag_L_code_acc = _mm_setzero_si128(); + real_P_code_acc = _mm_setzero_si128(); + imag_P_code_acc = _mm_setzero_si128(); + + if (sse_iters>0) + { + for(int number = 0;number < sse_iters; number++){ + + //Perform the carrier wipe-off + x = _mm_load_si128((__m128i*)input_ptr); + y = _mm_load_si128((__m128i*)carrier_ptr); + + imagx = _mm_srli_si128 (x, 1); + imagx = _mm_and_si128 (imagx, mult1); + realx = _mm_and_si128 (x, mult1); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (realx, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy); + realx_mult_imagy = _mm_mullo_epi16 (realx, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imagx, realy); + + real_bb_signal_sample = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_bb_signal_sample = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + //Get early values + y = _mm_load_si128((__m128i*)E_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_E_code_acc = _mm_add_epi16 (real_E_code_acc, real_output); + imag_E_code_acc = _mm_add_epi16 (imag_E_code_acc, imag_output); + + //Get late values + y = _mm_load_si128((__m128i*)L_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_L_code_acc = _mm_add_epi16 (real_L_code_acc, real_output); + imag_L_code_acc = _mm_add_epi16 (imag_L_code_acc, imag_output); + + //Get prompt values + y = _mm_load_si128((__m128i*)P_code_ptr); + + imagy = _mm_srli_si128 (y, 1); + imagy = _mm_and_si128 (imagy, mult1); + realy = _mm_and_si128 (y, mult1); + + realx_mult_realy = _mm_mullo_epi16 (real_bb_signal_sample, realy); + imagx_mult_imagy = _mm_mullo_epi16 (imag_bb_signal_sample, imagy); + realx_mult_imagy = _mm_mullo_epi16 (real_bb_signal_sample, imagy); + imagx_mult_realy = _mm_mullo_epi16 (imag_bb_signal_sample, realy); + + real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy); + imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); + + real_P_code_acc = _mm_add_epi16 (real_P_code_acc, real_output); + imag_P_code_acc = _mm_add_epi16 (imag_P_code_acc, imag_output); + + input_ptr += 8; + carrier_ptr += 8; + E_code_ptr += 8; + L_code_ptr += 8; + P_code_ptr += 8; + } + + __VOLK_ATTR_ALIGNED(16) lv_8sc_t E_dotProductVector[8]; + __VOLK_ATTR_ALIGNED(16) lv_8sc_t L_dotProductVector[8]; + __VOLK_ATTR_ALIGNED(16) lv_8sc_t P_dotProductVector[8]; + + real_E_code_acc = _mm_and_si128 (real_E_code_acc, mult1); + imag_E_code_acc = _mm_and_si128 (imag_E_code_acc, mult1); + imag_E_code_acc = _mm_slli_si128 (imag_E_code_acc, 1); + output = _mm_or_si128 (real_E_code_acc, imag_E_code_acc); + _mm_store_si128((__m128i*)E_dotProductVector, output); + + real_L_code_acc = _mm_and_si128 (real_L_code_acc, mult1); + imag_L_code_acc = _mm_and_si128 (imag_L_code_acc, mult1); + imag_L_code_acc = _mm_slli_si128 (imag_L_code_acc, 1); + output = _mm_or_si128 (real_L_code_acc, imag_L_code_acc); + _mm_store_si128((__m128i*)L_dotProductVector, output); + + real_P_code_acc = _mm_and_si128 (real_P_code_acc, mult1); + imag_P_code_acc = _mm_and_si128 (imag_P_code_acc, mult1); + imag_P_code_acc = _mm_slli_si128 (imag_P_code_acc, 1); + output = _mm_or_si128 (real_P_code_acc, imag_P_code_acc); + _mm_store_si128((__m128i*)P_dotProductVector, output); + + for (int i = 0; i<8; ++i) + { + *E_out_ptr += E_dotProductVector[i]; + *L_out_ptr += L_dotProductVector[i]; + *P_out_ptr += P_dotProductVector[i]; + } + } + + lv_8sc_t bb_signal_sample; + for(int i=0; i < num_points%8; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = (*input_ptr++) * (*carrier_ptr++); + // Now get early, late, and prompt values for each + *E_out_ptr += bb_signal_sample * (*E_code_ptr++); + *P_out_ptr += bb_signal_sample * (*P_code_ptr++); + *L_out_ptr += bb_signal_sample * (*L_code_ptr++); + } +} + +#endif /* LV_HAVE_SSE2 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ +static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_generic(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points) +{ + lv_8sc_t bb_signal_sample; + + bb_signal_sample = lv_cmake(0, 0); + + *E_out = 0; + *P_out = 0; + *L_out = 0; + // perform Early, Prompt and Late correlation + for(int i=0; i < num_points; ++i) + { + //Perform the carrier wipe-off + bb_signal_sample = input[i] * carrier[i]; + // Now get early, late, and prompt values for each + *E_out += bb_signal_sample * E_code[i]; + *P_out += bb_signal_sample * P_code[i]; + *L_out += bb_signal_sample * L_code[i]; + } +} + +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC +/*! + \brief Performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation + \param input The input signal input + \param carrier The carrier signal input + \param E_code Early PRN code replica input + \param P_code Early PRN code replica input + \param L_code Early PRN code replica input + \param E_out Early correlation output + \param P_out Early correlation output + \param L_out Early correlation output + \param num_points The number of complex values in vectors + */ + +extern void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl(short* E_out_real, short* E_out_imag, short* P_out_real, short* P_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, unsigned int num_points); +extern void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl(short* L_out_real, short* L_out_imag, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* L_code, unsigned int num_points); +static inline void volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_u_orc(lv_8sc_t* E_out, lv_8sc_t* P_out, lv_8sc_t* L_out, const lv_8sc_t* input, const lv_8sc_t* carrier, const lv_8sc_t* E_code, const lv_8sc_t* P_code, const lv_8sc_t* L_code, unsigned int num_points){ + + short E_out_real = 0; + short E_out_imag = 0; + char* E_out_real_c = (char*)&E_out_real; + E_out_real_c++; + char* E_out_imag_c = (char*)&E_out_imag; + E_out_imag_c++; + + short P_out_real = 0; + short P_out_imag = 0; + char* P_out_real_c = (char*)&P_out_real; + P_out_real_c++; + char* P_out_imag_c = (char*)&P_out_imag; + P_out_imag_c++; + + short L_out_real = 0; + short L_out_imag = 0; + char* L_out_real_c = (char*)&L_out_real; + L_out_real_c++; + char* L_out_imag_c = (char*)&L_out_imag; + L_out_imag_c++; + + volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl( &E_out_real, &E_out_imag, &P_out_real, &P_out_imag, input, carrier, E_code, P_code, num_points); + volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl( &L_out_real, &L_out_imag, input, carrier, L_code, num_points); + + //ORC implementation of 8ic_x5_cw_epl_corr_8ic_x3 is done in two different functions because it seems that + //in one function the length of the code gives memory problems (bad access, segmentation fault). + //Also, the maximum number of accumulators that can be used is 4 (and we need 6). + //The "carrier wipe-off" step is done two times: one in the first function and another one in the second. + //Joining all the ORC code in one function would be quicker because the "carrier wipe-off" step would be done just + //one time. + + *E_out = lv_cmake(*E_out_real_c, *E_out_imag_c); + *P_out = lv_cmake(*P_out_real_c, *P_out_imag_c); + *L_out = lv_cmake(*L_out_real_c, *L_out_imag_c); +} +#endif /* LV_HAVE_ORC */ + +#endif /* INCLUDED_gnsssdr_volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h new file mode 100644 index 000000000..9bb7c94e3 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h @@ -0,0 +1,210 @@ +/*! + * \file volk_gnsssdr_8u_x2_multiply_8u.h + * \brief Volk protokernel: multiplies unsigned char values + * \authors
    + *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com + *
+ * + * Volk protokernel that multiplies unsigned char values (8 bits data) + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H +#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H + +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +#include +/*! + \brief Multiplies the two input unsigned char values and stores their results in the third unisgned char + \param cChar The unsigned char where the results will be stored + \param aChar One of the unsigned char to be multiplied + \param bChar One of the unsigned char to be multiplied + \param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar + */ +static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 16; + + __m128i x, y, x1, x2, y1, y2, mult1, x1_mult_y1, x2_mult_y2, tmp, tmp1, tmp2, totalc; + unsigned char* c = cChar; + const unsigned char* a = aChar; + const unsigned char* b = bChar; + + for(int number = 0;number < sse_iters; number++){ + x = _mm_lddqu_si128((__m128i*)a); + y = _mm_lddqu_si128((__m128i*)b); + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + x1 = _mm_srli_si128 (x, 1); + x1 = _mm_and_si128 (x1, mult1); + x2 = _mm_and_si128 (x, mult1); + + y1 = _mm_srli_si128 (y, 1); + y1 = _mm_and_si128 (y1, mult1); + y2 = _mm_and_si128 (y, mult1); + + x1_mult_y1 = _mm_mullo_epi16 (x1, y1); + x2_mult_y2 = _mm_mullo_epi16 (x2, y2); + + tmp = _mm_and_si128 (x1_mult_y1, mult1); + tmp1 = _mm_slli_si128 (tmp, 1); + tmp2 = _mm_and_si128 (x2_mult_y2, mult1); + totalc = _mm_or_si128 (tmp1, tmp2); + + _mm_storeu_si128((__m128i*)c, totalc); + + a += 16; + b += 16; + c += 16; + } + + for (int i = 0; i<(num_points % 16); ++i) + { + *c++ = (*a++) * (*b++); + } +} +#endif /* LV_HAVE_SSE3 */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Multiplies the two input unsigned char values and stores their results in the third unisgned char + \param cChar The unsigned char where the results will be stored + \param aChar One of the unsigned char to be multiplied + \param bChar One of the unsigned char to be multiplied + \param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar + */ +static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){ + unsigned char* cPtr = cChar; + const unsigned char* aPtr = aChar; + const unsigned char* bPtr = bChar; + + for(int number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H */ + + +#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H +#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H + +#include +#include + +#ifdef LV_HAVE_SSE3 +#include +#include +/*! + \brief Multiplies the two input unsigned char values and stores their results in the third unisgned char + \param cChar The unsigned char where the results will be stored + \param aChar One of the unsigned char to be multiplied + \param bChar One of the unsigned char to be multiplied + \param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar + */ +static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){ + + const unsigned int sse_iters = num_points / 16; + + __m128i x, y, x1, x2, y1, y2, mult1, x1_mult_y1, x2_mult_y2, tmp, tmp1, tmp2, totalc; + unsigned char* c = cChar; + const unsigned char* a = aChar; + const unsigned char* b = bChar; + + for(int number = 0;number < sse_iters; number++){ + x = _mm_load_si128((__m128i*)a); + y = _mm_load_si128((__m128i*)b); + + mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255); + x1 = _mm_srli_si128 (x, 1); + x1 = _mm_and_si128 (x1, mult1); + x2 = _mm_and_si128 (x, mult1); + + y1 = _mm_srli_si128 (y, 1); + y1 = _mm_and_si128 (y1, mult1); + y2 = _mm_and_si128 (y, mult1); + + x1_mult_y1 = _mm_mullo_epi16 (x1, y1); + x2_mult_y2 = _mm_mullo_epi16 (x2, y2); + + tmp = _mm_and_si128 (x1_mult_y1, mult1); + tmp1 = _mm_slli_si128 (tmp, 1); + tmp2 = _mm_and_si128 (x2_mult_y2, mult1); + totalc = _mm_or_si128 (tmp1, tmp2); + + _mm_store_si128((__m128i*)c, totalc); + + a += 16; + b += 16; + c += 16; + } + + for (int i = 0; i<(num_points % 16); ++i) + { + *c++ = (*a++) * (*b++); + } +} +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Multiplies the two input unsigned char values and stores their results in the third unisgned char + \param cChar The unsigned char where the results will be stored + \param aChar One of the unsigned char to be multiplied + \param bChar One of the unsigned char to be multiplied + \param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar + */ +static inline void volk_gnsssdr_8u_x2_multiply_8u_a_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points){ + unsigned char* cPtr = cChar; + const unsigned char* aPtr = aChar; + const unsigned char* bPtr = bChar; + + for(int number = 0; number < num_points; number++){ + *cPtr++ = (*aPtr++) * (*bPtr++); + } +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_ORC +/*! + \brief Multiplies the two input unsigned char values and stores their results in the third unisgned char + \param cChar The unsigned char where the results will be stored + \param aChar One of the unsigned char to be multiplied + \param bChar One of the unsigned char to be multiplied + \param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar + */ +extern void volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl(unsigned char* cVector, const unsigned char* aVector, const unsigned char* bVector, unsigned int num_points); +static inline void volk_gnsssdr_8u_x2_multiply_8u_u_orc(unsigned char* cVector, const unsigned char* aVector, const unsigned char* bVector, unsigned int num_points){ + volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl(cVector, aVector, bVector, num_points); +} +#endif /* LV_HAVE_ORC */ + +#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr/lib/CMakeLists.txt new file mode 100644 index 000000000..197b5ed45 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/CMakeLists.txt @@ -0,0 +1,572 @@ +# +# Copyright 2011-2012,2014 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +######################################################################## +# header file detection +######################################################################## +include(CheckIncludeFile) +CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H) +if(HAVE_CPUID_H) + add_definitions(-DHAVE_CPUID_H) +endif() + +CHECK_INCLUDE_FILE(intrin.h HAVE_INTRIN_H) +if(HAVE_INTRIN_H) + add_definitions(-DHAVE_INTRIN_H) +endif() + +CHECK_INCLUDE_FILE(fenv.h HAVE_FENV_H) +if(HAVE_FENV_H) + add_definitions(-DHAVE_FENV_H) +endif() + +CHECK_INCLUDE_FILE(dlfcn.h HAVE_DLFCN_H) +if(HAVE_DLFCN_H) + add_definitions(-DHAVE_DLFCN_H) + list(APPEND volk_gnsssdr_libraries ${CMAKE_DL_LIBS}) +endif() + +######################################################################## +# Setup the compiler name +######################################################################## +set(COMPILER_NAME ${CMAKE_C_COMPILER_ID}) +if(MSVC) #its not set otherwise + set(COMPILER_NAME MSVC) +endif() + +message(STATUS "Compiler name: ${COMPILER_NAME}") + +if(NOT DEFINED COMPILER_NAME) + message(FATAL_ERROR "COMPILER_NAME undefined. Volk build may not support this compiler.") +endif() + +######################################################################## +# Special clang flag so flag checks can fail +######################################################################## +if(COMPILER_NAME MATCHES "GNU") + include(CheckCXXCompilerFlag) + CHECK_CXX_COMPILER_FLAG("-Werror=unused-command-line-argument" HAVE_WERROR_UNUSED_CMD_LINE_ARG) + if(HAVE_WERROR_UNUSED_CMD_LINE_ARG) + set(VOLK_FLAG_CHECK_FLAGS "-Werror=unused-command-line-argument") + endif() +endif() + +######################################################################## +# check for posix_memalign, since some OSs do not internally define +# _XOPEN_SOURCE or _POSIX_C_SOURCE; they leave this to the user. +######################################################################## + +include(CheckFunctionExists) +CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN) + +if(HAVE_POSIX_MEMALIGN) + add_definitions(-DHAVE_POSIX_MEMALIGN) +endif(HAVE_POSIX_MEMALIGN) + +######################################################################## +# detect x86 flavor of CPU +######################################################################## +if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(i.86|x86|x86_64|amd64)$") + message(STATUS "x86* CPU detected") + set(CPU_IS_x86 TRUE) +endif() + +######################################################################## +# determine passing architectures based on compile flag tests +######################################################################## +execute_process( + COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B} + ${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_compile_utils.py + --mode "arch_flags" --compiler "${COMPILER_NAME}" + OUTPUT_VARIABLE arch_flag_lines OUTPUT_STRIP_TRAILING_WHITESPACE +) + +macro(check_arch arch_name) + set(flags ${ARGN}) + set(have_${arch_name} TRUE) + foreach(flag ${flags}) + include(CheckCXXCompilerFlag) + set(have_flag have${flag}) + execute_process( #make the have_flag have nice alphanum chars (just for looks/not necessary) + COMMAND ${PYTHON_EXECUTABLE} -c "import re; print(re.sub('\\W', '_', '${have_flag}'))" + OUTPUT_VARIABLE have_flag OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(VOLK_FLAG_CHECK_FLAGS) + set(CMAKE_REQUIRED_FLAGS ${VOLK_FLAG_CHECK_FLAGS}) + endif() + CHECK_CXX_COMPILER_FLAG(${flag} ${have_flag}) + unset(CMAKE_REQUIRED_FLAGS) + if (NOT ${have_flag}) + set(have_${arch_name} FALSE) + endif() + endforeach() + if (have_${arch_name}) + list(APPEND available_archs ${arch_name}) + endif() +endmacro(check_arch) + +foreach(line ${arch_flag_lines}) + string(REGEX REPLACE "," ";" arch_flags ${line}) + check_arch(${arch_flags}) +endforeach(line) + +macro(OVERRULE_ARCH arch reason) + message(STATUS "${reason}, Overruled arch ${arch}") + list(REMOVE_ITEM available_archs ${arch}) +endmacro(OVERRULE_ARCH) + +######################################################################## +# eliminate AVX on if not on x86, or if the compiler does not accept +# the xgetbv instruction, or {if not cross-compiling and the xgetbv +# executable does not function correctly}. +######################################################################## +set(HAVE_XGETBV 0) +set(HAVE_AVX_CVTPI32_PS 0) +if(CPU_IS_x86) + # check to see if the compiler/linker works with xgetb instruction + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c "unsigned long long _xgetbv(unsigned int index) { unsigned int eax, edx; __asm__ __volatile__(\"xgetbv\" : \"=a\"(eax), \"=d\"(edx) : \"c\"(index)); return ((unsigned long long)edx << 32) | eax; } int main (void) { (void) _xgetbv(0); return (0); }") + execute_process(COMMAND ${CMAKE_C_COMPILER} -o + ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv + ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c + OUTPUT_QUIET ERROR_QUIET + RESULT_VARIABLE avx_compile_result) + if(NOT ${avx_compile_result} EQUAL 0) + OVERRULE_ARCH(avx "Compiler or linker missing xgetbv instruction") + elseif(NOT CROSSCOMPILE_MULTILIB) + execute_process(COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv + OUTPUT_QUIET ERROR_QUIET + RESULT_VARIABLE avx_exe_result) + if(NOT ${avx_exe_result} EQUAL 0) + OVERRULE_ARCH(avx "CPU missing xgetbv") + else() + set(HAVE_XGETBV 1) + endif() + else() + # cross compiling and compiler/linker seems to work; assume working + set(HAVE_XGETBV 1) + endif() + file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv + ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c) + + ######################################################################### + # eliminate AVX if cvtpi32_ps intrinsic fails like some versions of clang + ######################################################################### + + # check to see if the compiler/linker works with cvtpi32_ps instrinsic when using AVX + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c "#include \nint main (void) {__m128 __a; __m64 __b; __m128 foo = _mm_cvtpi32_ps(__a, __b); return (0); }") + execute_process(COMMAND ${CMAKE_C_COMPILER} -mavx -o + ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps + ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c + OUTPUT_QUIET ERROR_QUIET + RESULT_VARIABLE avx_compile_result) + if(NOT ${avx_compile_result} EQUAL 0) + OVERRULE_ARCH(avx "Compiler missing cvtpi32_ps instrinsic") + elseif(NOT CROSSCOMPILE_MULTILIB) + execute_process(COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps + OUTPUT_QUIET ERROR_QUIET + RESULT_VARIABLE avx_exe_result) + if(NOT ${avx_exe_result} EQUAL 0) + OVERRULE_ARCH(avx "CPU missing cvtpi32_ps") + else() + set(HAVE_AVX_CVTPI32_PS 1) + endif() + else() + set(HAVE_AVX_CVTPI32_PS 1) + endif() + file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps + ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c) + + # Disable SSE4a if Clang is less than version 3.2 + if("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + # Figure out the version of Clang + if(CMAKE_VERSION VERSION_LESS "2.8.10") + # Exctract the Clang version from the --version string. + # In cmake 2.8.10, we can just use CMAKE_C_COMPILER_VERSION + # without having to go through these string manipulations + execute_process(COMMAND ${CMAKE_C_COMPILER} --version + OUTPUT_VARIABLE clang_version) + string(REGEX MATCH "[0-9].[0-9]" CMAKE_C_COMPILER_VERSION ${clang_version}) + endif(CMAKE_VERSION VERSION_LESS "2.8.10") + + if(CMAKE_C_COMPILER_VERSION VERSION_LESS "3.2") + OVERRULE_ARCH(sse4_a "Clang >= 3.2 required for SSE4a") + endif(CMAKE_C_COMPILER_VERSION VERSION_LESS "3.2") + endif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") + +endif(CPU_IS_x86) + +if(${HAVE_XGETBV}) + add_definitions(-DHAVE_XGETBV) +endif() + +if(${HAVE_AVX_CVTPI32_PS}) + add_definitions(-DHAVE_AVX_CVTPI32_PS) +endif() + +######################################################################## +# if the CPU is not x86, eliminate all Intel SIMD +######################################################################## + +if(NOT CPU_IS_x86) + OVERRULE_ARCH(3dnow "Architecture is not x86 or x86_64") + OVERRULE_ARCH(mmx "Architecture is not x86 or x86_64") + OVERRULE_ARCH(sse "Architecture is not x86 or x86_64") + OVERRULE_ARCH(sse2 "Architecture is not x86 or x86_64") + OVERRULE_ARCH(sse3 "Architecture is not x86 or x86_64") + OVERRULE_ARCH(ssse3 "Architecture is not x86 or x86_64") + OVERRULE_ARCH(sse4_a "Architecture is not x86 or x86_64") + OVERRULE_ARCH(sse4_1 "Architecture is not x86 or x86_64") + OVERRULE_ARCH(sse4_2 "Architecture is not x86 or x86_64") + OVERRULE_ARCH(avx "Architecture is not x86 or x86_64") +endif(NOT CPU_IS_x86) + +######################################################################## +# implement overruling in the ORC case, +# since ORC always passes flag detection +######################################################################## +if(NOT ORC_FOUND) + OVERRULE_ARCH(orc "ORC support not found") +endif() + +######################################################################## +# implement overruling in the non-multilib case +# this makes things work when both -m32 and -m64 pass +######################################################################## +if(NOT CROSSCOMPILE_MULTILIB AND CPU_IS_x86) + include(CheckTypeSize) + check_type_size("void*[8]" SIZEOF_CPU BUILTIN_TYPES_ONLY) + if (${SIZEOF_CPU} EQUAL 64) + OVERRULE_ARCH(32 "CPU width is 64 bits") + endif() + if (${SIZEOF_CPU} EQUAL 32) + OVERRULE_ARCH(64 "CPU width is 32 bits") + endif() + + #MSVC 64 bit does not have MMX, overrule it + if (${SIZEOF_CPU} EQUAL 64 AND MSVC) + OVERRULE_ARCH(mmx "No MMX for Win64") + endif() + +endif() + +######################################################################## +# done overrules! print the result +######################################################################## +message(STATUS "Available architectures: ${available_archs}") + +######################################################################## +# determine available machines given the available architectures +######################################################################## +execute_process( + COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B} + ${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_compile_utils.py + --mode "machines" --archs "${available_archs}" + OUTPUT_VARIABLE available_machines OUTPUT_STRIP_TRAILING_WHITESPACE +) + +######################################################################## +# Implement machine overruling for redundant machines: +# A machine is redundant when expansion rules occur, +# and the arch superset passes configuration checks. +# When this occurs, eliminate the redundant machines +# to avoid unnecessary compilation of subset machines. +######################################################################## +foreach(arch mmx orc 64 32) + foreach(machine_name ${available_machines}) + string(REPLACE "_${arch}" "" machine_name_no_arch ${machine_name}) + if (${machine_name} STREQUAL ${machine_name_no_arch}) + else() + list(REMOVE_ITEM available_machines ${machine_name_no_arch}) + endif() + endforeach(machine_name) +endforeach(arch) + +######################################################################## +# done overrules! print the result +######################################################################## +message(STATUS "Available machines: ${available_machines}") + +######################################################################## +# Create rules to run the volk_gnsssdr generator +######################################################################## + +#dependencies are all python, xml, and header implementation files +file(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml) +file(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py) +file(GLOB h_files ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr/*.h) + +macro(gen_template tmpl output) + list(APPEND volk_gnsssdr_gen_sources ${output}) + add_custom_command( + OUTPUT ${output} + DEPENDS ${xml_files} ${py_files} ${h_files} ${tmpl} + COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B} + ${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_tmpl_utils.py + --input ${tmpl} --output ${output} ${ARGN} + ) +endmacro(gen_template) + +make_directory(${CMAKE_BINARY_DIR}/include/volk_gnsssdr) + +gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h) +gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr.c) +gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_typedefs.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_typedefs.h) +gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_cpu.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h) +gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_cpu.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr_cpu.c) +gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_config_fixed.tmpl.h ${CMAKE_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h) +gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machines.tmpl.h ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr_machines.h) +gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machines.tmpl.c ${CMAKE_BINARY_DIR}/lib/volk_gnsssdr_machines.c) + +set(BASE_CFLAGS NONE) +STRING(TOUPPER ${CMAKE_BUILD_TYPE} CBTU) +MESSAGE(STATUS BUILT TYPE ${CBTU}) +MESSAGE(STATUS "Base cflags = ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS}") +set(COMPILER_INFO "") +IF(MSVC) + IF(MSVC90) #Visual Studio 9 + SET(cmake_c_compiler_version "Microsoft Visual Studio 9.0") + ELSE(MSVC10) #Visual Studio 10 + SET(cmake_c_compiler_version "Microsoft Visual Studio 10.0") + ELSE(MSVC11) #Visual Studio 11 + SET(cmake_c_compiler_version "Microsoft Visual Studio 11.0") + ELSE(MSVC12) #Visual Studio 12 + SET(cmake_c_compiler_version "Microsoft Visual Studio 12.0") + ENDIF() +ELSE() + execute_process(COMMAND ${CMAKE_C_COMPILER} --version + OUTPUT_VARIABLE cmake_c_compiler_version) +ENDIF(MSVC) +set(COMPILER_INFO "${CMAKE_C_COMPILER}:::${CMAKE_C_FLAGS_${GRCBTU}} ${CMAKE_C_FLAGS}\n${CMAKE_CXX_COMPILER}:::${CMAKE_CXX_FLAGS_${GRCBTU}} ${CMAKE_CXX_FLAGS}\n" ) + +foreach(machine_name ${available_machines}) + #generate machine source + set(machine_source ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr_machine_${machine_name}.c) + gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_gnsssdr_machine_xxx.tmpl.c ${machine_source} ${machine_name}) + + #determine machine flags + execute_process( + COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B} + ${CMAKE_SOURCE_DIR}/gen/volk_gnsssdr_compile_utils.py + --mode "machine_flags" --machine "${machine_name}" --compiler "${COMPILER_NAME}" + OUTPUT_VARIABLE ${machine_name}_flags OUTPUT_STRIP_TRAILING_WHITESPACE + ) + MESSAGE(STATUS "BUILD INFO ::: ${machine_name} ::: ${COMPILER_NAME} ::: ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}") + set(COMPILER_INFO "${COMPILER_INFO}${machine_name}:::${COMPILER_NAME}:::${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}\n" ) + if(${machine_name}_flags) + set_source_files_properties(${machine_source} PROPERTIES COMPILE_FLAGS "${${machine_name}_flags}") + endif() + + #add to available machine defs + string(TOUPPER LV_MACHINE_${machine_name} machine_def) + list(APPEND machine_defs ${machine_def}) +endforeach(machine_name) + +# Convert to a C string to compile and display properly +string(STRIP "${cmake_c_compiler_version}" cmake_c_compiler_version) +string(STRIP ${COMPILER_INFO} COMPILER_INFO) +MESSAGE(STATUS "Compiler Version: ${cmake_c_compiler_version}") +string(REPLACE "\n" " \\n" cmake_c_compiler_version ${cmake_c_compiler_version}) +string(REPLACE "\n" " \\n" COMPILER_INFO ${COMPILER_INFO}) + +######################################################################## +# Set local include directories first +######################################################################## +include_directories( + ${CMAKE_BINARY_DIR}/include + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/kernels + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} +) + +######################################################################## +# Handle ASM support +# on by default, but let users turn it off +######################################################################## +if(${CMAKE_VERSION} VERSION_GREATER "2.8.9") + set(ASM_ARCHS_AVAILABLE "armv7") + + set(FULL_C_FLAGS "${CMAKE_C_FLAGS}" "${CMAKE_CXX_COMPILER_ARG1}") + + # sort through a list of all architectures we have ASM for + # if we find one that matches our current system architecture + # set up the assembler flags and include the source files + foreach(ARCH ${ASM_ARCHS_AVAILABLE}) + message(STATUS "--==>> -CFLAGS1: ${FULL_C_FLAGS}") + string(REGEX MATCH "${ARCH}" ASM_ARCH "${FULL_C_FLAGS}") + if( ASM_ARCH STREQUAL "armv7" ) + set(ASM-ATT $ENV{ASM}) + message(STATUS "---- Adding ASM files") # we always use ATT syntax + message(STATUS "-- Detected armv7 architecture; enabling ASM") + # setup architecture specific assembler flags + set(ARCH_ASM_FLAGS "-mfpu=neon -g") + # then add the files + include_directories(${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr/asm/neon) + file(GLOB asm_files ${CMAKE_SOURCE_DIR}/kernels/volk_gnsssdr/asm/neon/*.s) + foreach(asm_file ${asm_files}) + list(APPEND volk_gnsssdr_sources ${asm_file}) + message(STATUS "Adding source file: ${asm_file}") + endforeach(asm_file) + endif() + set(CMAKE_ASM-ATT_FLAGS_INIT ${ARCH_ASM_FLAGS}) + enable_language(ASM-ATT) # this must be after flags_init + message(STATUS "asm flags: ${CMAKE_ASM-ATT_FLAGS}") + endforeach(ARCH) + +else(${CMAKE_VERSION} VERSION_GREATER "2.8.9") + message(STATUS "Not enabling ASM support. CMake >= 2.8.10 required.") +endif(${CMAKE_VERSION} VERSION_GREATER "2.8.9") + +######################################################################## +# Handle orc support +######################################################################## +if(ORC_FOUND) + #setup orc library usage + include_directories(${ORC_INCLUDE_DIRS}) + link_directories(${ORC_LIBRARY_DIRS}) + list(APPEND volk_gnsssdr_libraries ${ORC_LIBRARIES}) + + #setup orc functions + file(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc) + foreach(orc_file ${orc_files}) + + #extract the name for the generated c source from the orc file + get_filename_component(orc_file_name_we ${orc_file} NAME_WE) + set(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c) + + #create a rule to generate the source and add to the list of sources + add_custom_command( + COMMAND ${ORCC_EXECUTABLE} --include math.h --implementation -o ${orcc_gen} ${orc_file} + DEPENDS ${orc_file} OUTPUT ${orcc_gen} + ) + list(APPEND volk_gnsssdr_sources ${orcc_gen}) + + endforeach(orc_file) +else() + message(STATUS "Did not find liborc and orcc, disabling orc support...") +endif() + + +######################################################################## +# Handle the generated constants +######################################################################## + +execute_process(COMMAND ${PYTHON_EXECUTABLE} -c + "import time;print time.strftime('%a, %d %b %Y %H:%M:%S', time.gmtime())" + OUTPUT_VARIABLE BUILD_DATE OUTPUT_STRIP_TRAILING_WHITESPACE +) +message(STATUS "Loading build date ${BUILD_DATE} into constants...") +message(STATUS "Loading version ${VERSION} into constants...") + +#double escape for windows backslash path separators +string(REPLACE "\\" "\\\\" prefix ${prefix}) + +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/constants.c.in + ${CMAKE_CURRENT_BINARY_DIR}/constants.c +@ONLY) + +list(APPEND volk_gnsssdr_sources ${CMAKE_CURRENT_BINARY_DIR}/constants.c) + +######################################################################## +# Setup the volk_gnsssdr sources list and library +######################################################################## +if(NOT WIN32) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden") +endif() + +list(APPEND volk_gnsssdr_sources + ${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_prefs.c + ${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_rank_archs.c + ${CMAKE_CURRENT_SOURCE_DIR}/volk_gnsssdr_malloc.c + ${volk_gnsssdr_gen_sources} +) + +#set the machine definitions where applicable +set_source_files_properties( + ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr.c + ${CMAKE_CURRENT_BINARY_DIR}/volk_gnsssdr_machines.c +PROPERTIES COMPILE_DEFINITIONS "${machine_defs}") + +if(MSVC) + #add compatibility includes for stdint types + include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc) + add_definitions(-DHAVE_CONFIG_H) + #compile the sources as C++ due to the lack of complex.h under MSVC + set_source_files_properties(${volk_gnsssdr_sources} PROPERTIES LANGUAGE CXX) +endif() + +#create the volk_gnsssdr runtime library + +#MODIFICATIONS BY GNSS-SDR +file(GLOB orc ${CMAKE_SOURCE_DIR}/orc/*.orc) + +#add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources}) +add_library(volk_gnsssdr SHARED ${volk_gnsssdr_sources} ${h_files} ${orc}) + +source_group("Kernels" FILES ${h_files}) +source_group("ORC Files" FILES ${orc}) +#END OF MODIFICATIONS + +target_link_libraries(volk_gnsssdr ${volk_gnsssdr_libraries}) +set_target_properties(volk_gnsssdr PROPERTIES SOVERSION ${LIBVER}) +set_target_properties(volk_gnsssdr PROPERTIES DEFINE_SYMBOL "volk_gnsssdr_EXPORTS") + + +install(TARGETS volk_gnsssdr + LIBRARY DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_runtime" # .so file + ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file + RUNTIME DESTINATION bin COMPONENT "volk_gnsssdr_runtime" # .dll file +) + +if(ENABLE_STATIC_LIBS) + add_library(volk_gnsssdr_static STATIC ${volk_gnsssdr_sources}) + + if(NOT WIN32) + set_target_properties(volk_gnsssdr_static + PROPERTIES OUTPUT_NAME volk_gnsssdr) + endif(NOT WIN32) + + install(TARGETS volk_gnsssdr_static + ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_gnsssdr_devel" # .lib file + ) +endif(ENABLE_STATIC_LIBS) + +######################################################################## +# Build the QA test application +######################################################################## + + +if(Boost_FOUND) + + set_source_files_properties( + ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES + COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN" + ) + + include_directories(${Boost_INCLUDE_DIRS}) + link_directories(${Boost_LIBRARY_DIRS}) + + add_executable(test_all + ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc + ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc + ) + target_link_libraries(test_all volk_gnsssdr ${Boost_LIBRARIES}) + add_test(qa_volk_gnsssdr_test_all test_all) + +endif(Boost_FOUND) diff --git a/src/algorithms/libs/volk_gnsssdr/lib/constants.c.in b/src/algorithms/libs/volk_gnsssdr/lib/constants.c.in new file mode 100644 index 000000000..2f5fdcc3d --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/constants.c.in @@ -0,0 +1,63 @@ +/* -*- c++ -*- */ +/* + * Copyright 2013 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include + +char* +volk_gnsssdr_prefix() +{ + return "@prefix@"; +} + +char* +volk_gnsssdr_build_date() +{ + return "@BUILD_DATE@"; +} + +char* +volk_gnsssdr_version() +{ + return "@VERSION@"; +} + +char* +volk_gnsssdr_c_compiler() +{ + return "@cmake_c_compiler_version@"; +} + +char* +volk_gnsssdr_compiler_flags() +{ + return "@COMPILER_INFO@"; +} + +char* +volk_gnsssdr_available_machines() +{ + return "@available_machines@"; +} diff --git a/src/algorithms/libs/volk_gnsssdr/lib/gcc_x86_cpuid.h b/src/algorithms/libs/volk_gnsssdr/lib/gcc_x86_cpuid.h new file mode 100644 index 000000000..e0254f192 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/gcc_x86_cpuid.h @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + * + * This file is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * . + */ + +/* %ecx */ +#define bit_SSE3 (1 << 0) +#define bit_PCLMUL (1 << 1) +#define bit_SSSE3 (1 << 9) +#define bit_FMA (1 << 12) +#define bit_CMPXCHG16B (1 << 13) +#define bit_SSE4_1 (1 << 19) +#define bit_SSE4_2 (1 << 20) +#define bit_MOVBE (1 << 22) +#define bit_POPCNT (1 << 23) +#define bit_AES (1 << 25) +#define bit_XSAVE (1 << 26) +#define bit_OSXSAVE (1 << 27) +#define bit_AVX (1 << 28) +#define bit_F16C (1 << 29) +#define bit_RDRND (1 << 30) + +/* %edx */ +#define bit_CMPXCHG8B (1 << 8) +#define bit_CMOV (1 << 15) +#define bit_MMX (1 << 23) +#define bit_FXSAVE (1 << 24) +#define bit_SSE (1 << 25) +#define bit_SSE2 (1 << 26) + +/* Extended Features */ +/* %ecx */ +#define bit_LAHF_LM (1 << 0) +#define bit_ABM (1 << 5) +#define bit_SSE4a (1 << 6) +#define bit_XOP (1 << 11) +#define bit_LWP (1 << 15) +#define bit_FMA4 (1 << 16) +#define bit_TBM (1 << 21) + +/* %edx */ +#define bit_MMXEXT (1 << 22) +#define bit_LM (1 << 29) +#define bit_3DNOWP (1 << 30) +#define bit_3DNOW (1 << 31) + +/* Extended Features (%eax == 7) */ +#define bit_FSGSBASE (1 << 0) +#define bit_BMI (1 << 3) + +#if defined(__i386__) && defined(__PIC__) +/* %ebx may be the PIC register. */ +#if __GNUC__ >= 3 +#define __cpuid(level, a, b, c, d) \ + __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchg{l}\t{%%}ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchg{l}\t{%%}ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +#else +/* Host GCCs older than 3.0 weren't supporting Intel asm syntax + nor alternatives in i386 code. */ +#define __cpuid(level, a, b, c, d) \ + __asm__ ("xchgl\t%%ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchgl\t%%ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("xchgl\t%%ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchgl\t%%ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +#endif +#else +#define __cpuid(level, a, b, c, d) \ + __asm__ ("cpuid\n\t" \ + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ + : "0" (level)) + +#define __cpuid_count(level, count, a, b, c, d) \ + __asm__ ("cpuid\n\t" \ + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ + : "0" (level), "2" (count)) +#endif + +/* Return highest supported input value for cpuid instruction. ext can + be either 0x0 or 0x8000000 to return highest supported value for + basic or extended cpuid information. Function returns 0 if cpuid + is not supported or whatever cpuid returns in eax register. If sig + pointer is non-null, then first four bytes of the signature + (as found in ebx register) are returned in location pointed by sig. */ + +static __inline unsigned int +__get_cpuid_max (unsigned int __ext, unsigned int *__sig) +{ + unsigned int __eax, __ebx, __ecx, __edx; + +#ifndef __x86_64__ + /* See if we can use cpuid. On AMD64 we always can. */ +#if __GNUC__ >= 3 + __asm__ ("pushf{l|d}\n\t" + "pushf{l|d}\n\t" + "pop{l}\t%0\n\t" + "mov{l}\t{%0, %1|%1, %0}\n\t" + "xor{l}\t{%2, %0|%0, %2}\n\t" + "push{l}\t%0\n\t" + "popf{l|d}\n\t" + "pushf{l|d}\n\t" + "pop{l}\t%0\n\t" + "popf{l|d}\n\t" + : "=&r" (__eax), "=&r" (__ebx) + : "i" (0x00200000)); +#else +/* Host GCCs older than 3.0 weren't supporting Intel asm syntax + nor alternatives in i386 code. */ + __asm__ ("pushfl\n\t" + "pushfl\n\t" + "popl\t%0\n\t" + "movl\t%0, %1\n\t" + "xorl\t%2, %0\n\t" + "pushl\t%0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl\t%0\n\t" + "popfl\n\t" + : "=&r" (__eax), "=&r" (__ebx) + : "i" (0x00200000)); +#endif + + if (!((__eax ^ __ebx) & 0x00200000)) + return 0; +#endif + + /* Host supports cpuid. Return highest supported cpuid input value. */ + __cpuid (__ext, __eax, __ebx, __ecx, __edx); + + if (__sig) + *__sig = __ebx; + + return __eax; +} + +/* Return cpuid data for requested cpuid level, as found in returned + eax, ebx, ecx and edx registers. The function checks if cpuid is + supported and returns 1 for valid cpuid information or 0 for + unsupported cpuid level. All pointers are required to be non-null. */ + +static __inline int +__get_cpuid (unsigned int __level, + unsigned int *__eax, unsigned int *__ebx, + unsigned int *__ecx, unsigned int *__edx) +{ + unsigned int __ext = __level & 0x80000000; + + if (__get_cpuid_max (__ext, 0) < __level) + return 0; + + __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx); + return 1; +} diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_add_quad_aligned16.cc b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_add_quad_aligned16.cc new file mode 100644 index 000000000..771c4a24a --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_add_quad_aligned16.cc @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_16s_add_quad_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + + + +void qa_16s_add_quad_aligned16::t1() { + + volk_gnsssdr_environment_init(); + clock_t start, end; + double total; + const int vlen = 3200; + const int ITERS = 100000; + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; + __VOLK_ATTR_ALIGNED(16) short input4[vlen]; + + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; + __VOLK_ATTR_ALIGNED(16) short output2[vlen]; + __VOLK_ATTR_ALIGNED(16) short output3[vlen]; + __VOLK_ATTR_ALIGNED(16) short output01[vlen]; + __VOLK_ATTR_ALIGNED(16) short output11[vlen]; + __VOLK_ATTR_ALIGNED(16) short output21[vlen]; + __VOLK_ATTR_ALIGNED(16) short output31[vlen]; + + for(int i = 0; i < vlen; ++i) { + short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2; + short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2; + + input0[i] = plus0 - minus0; + input1[i] = plus1 - minus1; + input2[i] = plus2 - minus2; + input3[i] = plus3 - minus3; + input4[i] = plus4 - minus4; + + } + printf("16s_add_quad_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_gnsssdr_16s_add_quad_aligned16_manual(output0, output1, output2, output3, input0, input1, input2, input3, input4, vlen << 1 , "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_gnsssdr_16s_add_quad_aligned16_manual(output01, output11, output21, output31, input0, input1, input2, input3, input4, vlen << 1 , "sse2"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse2_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]); + CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]); + CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]); + CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]); + } +} + +#endif diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_add_quad_aligned16.h b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_add_quad_aligned16.h new file mode 100644 index 000000000..3c1ae978b --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_add_quad_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H +#define INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H + +#include +#include + +class qa_16s_add_quad_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_branch_4_state_8_aligned16.cc b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_branch_4_state_8_aligned16.cc new file mode 100644 index 000000000..c11a3a203 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_branch_4_state_8_aligned16.cc @@ -0,0 +1,106 @@ +#include +#include +#include +#include + +//test for ssse3 + +#ifndef LV_HAVE_SSSE3 + +void qa_16s_branch_4_state_8_aligned16::t1() { + printf("ssse3 not available... no test performed\n"); +} + +#else + +void qa_16s_branch_4_state_8_aligned16::t1() { + const int num_iters = 1000000; + const int vlen = 32; + + static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03}; + static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01}; + static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f}; + static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d}; + static char* permuters[4] = {permute0, permute1, permute2, permute3}; + + unsigned int num_bytes = vlen << 1; + + volk_gnsssdr_environment_init(); + clock_t start, end; + double total; + + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short target3[vlen]; + + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = { +7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 }; + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = { + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = { + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = { + 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 }; + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = { + 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; + + + + for(int i = 0; i < vlen; ++i) { + src0[i] = i; + + } + + + printf("16s_branch_4_state_8_aligned\n"); + + + start = clock(); + for(int i = 0; i < num_iters; ++i) { + volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("permute_and_scalar_add_time: %f\n", total); + + + + start = clock(); + for(int i = 0; i < num_iters; ++i) { + volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("branch_4_state_8_time, ssse3: %f\n", total); + + start = clock(); + for(int i = 0; i < num_iters; ++i) { + volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("permute_and_scalar_add_time, generic: %f\n", total); + + + + for(int i = 0; i < vlen; ++i) { + printf("psa... %d, b4s8... %d\n", target[i], target3[i]); + } + + for(int i = 0; i < vlen; ++i) { + + CPPUNIT_ASSERT(target[i] == target2[i]); + CPPUNIT_ASSERT(target[i] == target3[i]); + } +} + + +#endif diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_branch_4_state_8_aligned16.h b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_branch_4_state_8_aligned16.h new file mode 100644 index 000000000..41ab073e0 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_branch_4_state_8_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H +#define INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H + +#include +#include + +class qa_16s_branch_4_state_8_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_branch_4_state_8_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_permute_and_scalar_add_aligned16.cc b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_permute_and_scalar_add_aligned16.cc new file mode 100644 index 000000000..74482c490 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_permute_and_scalar_add_aligned16.cc @@ -0,0 +1,78 @@ +#include +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_16s_permute_and_scalar_add_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_16s_permute_and_scalar_add_aligned16::t1() { + const int vlen = 64; + + unsigned int num_bytes = vlen << 1; + + volk_gnsssdr_environment_init(); + clock_t start, end; + double total; + + __VOLK_ATTR_ALIGNED(16) short target[vlen]; + __VOLK_ATTR_ALIGNED(16) short target2[vlen]; + __VOLK_ATTR_ALIGNED(16) short src0[vlen]; + __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl0[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl1[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl2[vlen]; + __VOLK_ATTR_ALIGNED(16) short cntl3[vlen]; + __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; + + for(int i = 0; i < vlen; ++i) { + src0[i] = i; + permute_indexes[i] = (3 * i)%vlen; + cntl0[i] = 0xff; + cntl1[i] = 0xff * (i%2); + cntl2[i] = 0xff * ((i>>1)%2); + cntl3[i] = 0xff * ((i%4) == 3); + } + + printf("16s_permute_and_scalar_add_aligned\n"); + + start = clock(); + for(int i = 0; i < 100000; ++i) { + volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("generic_time: %f\n", total); + + start = clock(); + for(int i = 0; i < 100000; ++i) { + volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2"); + } + end = clock(); + + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + + printf("sse2_time: %f\n", total); + + + for(int i = 0; i < vlen; ++i) { + //printf("generic... %d, sse2... %d\n", target[i], target2[i]); + } + + for(int i = 0; i < vlen; ++i) { + + CPPUNIT_ASSERT(target[i] == target2[i]); + } +} + +#endif diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_permute_and_scalar_add_aligned16.h b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_permute_and_scalar_add_aligned16.h new file mode 100644 index 000000000..3643aeef6 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_permute_and_scalar_add_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H +#define INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H + +#include +#include + +class qa_16s_permute_and_scalar_add_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_permute_and_scalar_add_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_quad_max_star_aligned16.cc b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_quad_max_star_aligned16.cc new file mode 100644 index 000000000..d3cd803e6 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_quad_max_star_aligned16.cc @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include + +//test for sse2 + +#ifndef LV_HAVE_SSE2 + +void qa_16s_quad_max_star_aligned16::t1() { + printf("sse2 not available... no test performed\n"); +} + +#else + +void qa_16s_quad_max_star_aligned16::t1() { + const int vlen = 34; + + __VOLK_ATTR_ALIGNED(16) short input0[vlen]; + __VOLK_ATTR_ALIGNED(16) short input1[vlen]; + __VOLK_ATTR_ALIGNED(16) short input2[vlen]; + __VOLK_ATTR_ALIGNED(16) short input3[vlen]; + + __VOLK_ATTR_ALIGNED(16) short output0[vlen]; + __VOLK_ATTR_ALIGNED(16) short output1[vlen]; + + for(int i = 0; i < vlen; ++i) { + short plus0 = (short) (rand() - (RAND_MAX/2)); + short plus1 = (short) (rand() - (RAND_MAX/2)); + short plus2 = (short) (rand() - (RAND_MAX/2)); + short plus3 = (short) (rand() - (RAND_MAX/2)); + + short minus0 = (short) (rand() - (RAND_MAX/2)); + short minus1 = (short) (rand() - (RAND_MAX/2)); + short minus2 = (short) (rand() - (RAND_MAX/2)); + short minus3 = (short) (rand() - (RAND_MAX/2)); + + input0[i] = plus0 - minus0; + input1[i] = plus1 - minus1; + input2[i] = plus2 - minus2; + input3[i] = plus3 - minus3; + } + + volk_gnsssdr_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, input3, 2*vlen, "generic"); + + volk_gnsssdr_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, input3, 2*vlen, "sse2"); + + printf("16s_quad_max_star_aligned\n"); + for(int i = 0; i < vlen; ++i) { + printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]); + } + + for(int i = 0; i < vlen; ++i) { + + CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); + } +} + +#endif diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_quad_max_star_aligned16.h b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_quad_max_star_aligned16.h new file mode 100644 index 000000000..51e77081a --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_16s_quad_max_star_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H +#define INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H + +#include +#include + +class qa_16s_quad_max_star_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_16s_quad_max_star_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_fm_detect_aligned16.cc b/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_fm_detect_aligned16.cc new file mode 100644 index 000000000..6c30de171 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_fm_detect_aligned16.cc @@ -0,0 +1,61 @@ +#include +#include +#include +#include +#include + +//test for sse + +#ifndef LV_HAVE_SSE + +void qa_32f_fm_detect_aligned16::t1() { + printf("sse not available... no test performed\n"); +} + +#else + +void qa_32f_fm_detect_aligned16::t1() { + + volk_gnsssdr_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + __VOLK_ATTR_ALIGNED(16) float input0[vlen]; + + __VOLK_ATTR_ALIGNED(16) float output0[vlen]; + __VOLK_ATTR_ALIGNED(16) float output01[vlen]; + + for(int i = 0; i < vlen; ++i) { + input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2)); + } + printf("32f_fm_detect_aligned\n"); + + start = clock(); + float save = 0.1; + for(int count = 0; count < ITERS; ++count) { + volk_gnsssdr_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + save = 0.1; + for(int count = 0; count < ITERS; ++count) { + volk_gnsssdr_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, "sse"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse_time: %f\n", total); + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4); + } +} + +#endif diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_fm_detect_aligned16.h b/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_fm_detect_aligned16.h new file mode 100644 index 000000000..a2680c524 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_fm_detect_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H +#define INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H + +#include +#include + +class qa_32f_fm_detect_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_fm_detect_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_index_max_aligned16.cc b/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_index_max_aligned16.cc new file mode 100644 index 000000000..99ea2bc5d --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_index_max_aligned16.cc @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include + +#define ERR_DELTA (1e-4) +#define NUM_ITERS 1000000 +#define VEC_LEN 3097 +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + unsigned int i = 0; + for (; i < n; i++) { + + buf[i] = uniform () * 32767; + + } +} + + +#ifndef LV_HAVE_SSE + +void qa_32f_index_max_aligned16::t1(){ + printf("sse not available... no test performed\n"); +} + +#else + + +void qa_32f_index_max_aligned16::t1(){ + + const int vlen = VEC_LEN; + + + volk_gnsssdr_runtime_init(); + + volk_gnsssdr_environment_init(); + int ret; + + unsigned int* target_sse4_1; + unsigned int* target_sse; + unsigned int* target_generic; + float* src0 ; + + + unsigned int i_target_sse4_1; + target_sse4_1 = &i_target_sse4_1; + unsigned int i_target_sse; + target_sse = &i_target_sse; + unsigned int i_target_generic; + target_generic = &i_target_generic; + + ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float)); + + random_floats((float*)src0, vlen); + + printf("32f_index_max_aligned16\n"); + + clock_t start, end; + double total; + + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_gnsssdr_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic time: %f\n", total); + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_gnsssdr_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2"); + } + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse time: %f\n", total); + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + get_volk_gnsssdr_runtime()->volk_gnsssdr_32f_index_max_aligned16(target_sse4_1, src0, vlen); + } + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse4.1 time: %f\n", total); + + + printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]); + CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]); + CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]); + + free(src0); +} + +#endif /*LV_HAVE_SSE3*/ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_index_max_aligned16.h b/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_index_max_aligned16.h new file mode 100644 index 000000000..8cadffa47 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_32f_index_max_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H +#define INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H + +#include +#include + +class qa_32f_index_max_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32f_index_max_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_index_max_aligned16.cc b/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_index_max_aligned16.cc new file mode 100644 index 000000000..aa5f7165d --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_index_max_aligned16.cc @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +#define ERR_DELTA (1e-4) +#define NUM_ITERS 1000000 +#define VEC_LEN 3096 +static float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +static void +random_floats (float *buf, unsigned n) +{ + unsigned int i = 0; + for (; i < n; i++) { + + buf[i] = uniform () * 32767; + + } +} + + +#ifndef LV_HAVE_SSE3 + +void qa_32fc_index_max_aligned16::t1(){ + printf("sse3 not available... no test performed\n"); +} + +#else + + +void qa_32fc_index_max_aligned16::t1(){ + + const int vlen = VEC_LEN; + + volk_gnsssdr_environment_init(); + int ret; + + unsigned int* target; + unsigned int* target_generic; + std::complex* src0 ; + + + unsigned int i_target; + target = &i_target; + unsigned int i_target_generic; + target_generic = &i_target_generic; + ret = posix_memalign((void**)&src0, 16, vlen << 3); + + random_floats((float*)src0, vlen * 2); + + printf("32fc_index_max_aligned16\n"); + + clock_t start, end; + double total; + + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_gnsssdr_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic time: %f\n", total); + + start = clock(); + for(int k = 0; k < NUM_ITERS; ++k) { + volk_gnsssdr_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3"); + } + + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3 time: %f\n", total); + + + + + printf("generic: %u, sse3: %u\n", target_generic[0], target[0]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1); + + + + free(src0); +} + +#endif /*LV_HAVE_SSE3*/ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_index_max_aligned16.h b/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_index_max_aligned16.h new file mode 100644 index 000000000..0990bcb1f --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_index_max_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H +#define INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H + +#include +#include + +class qa_32fc_index_max_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_index_max_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_power_spectral_density_32f_aligned16.cc b/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_power_spectral_density_32f_aligned16.cc new file mode 100644 index 000000000..9467ff973 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_power_spectral_density_32f_aligned16.cc @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include + +//test for sse3 + +#ifndef LV_HAVE_SSE3 + +void qa_32fc_power_spectral_density_32f_aligned16::t1() { + printf("sse3 not available... no test performed\n"); +} + +#else + +void qa_32fc_power_spectral_density_32f_aligned16::t1() { + + volk_gnsssdr_environment_init(); + clock_t start, end; + double total; + const int vlen = 3201; + const int ITERS = 10000; + __VOLK_ATTR_ALIGNED(16) std::complex input0[vlen]; + + __VOLK_ATTR_ALIGNED(16) float output_generic[vlen]; + __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen]; + + const float scalar = vlen; + const float rbw = 1.7; + + float* inputLoad = (float*)input0; + for(int i = 0; i < 2*vlen; ++i) { + inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))); + } + printf("32fc_power_spectral_density_32f_aligned\n"); + + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_generic, input0, scalar, rbw, vlen, "generic"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("generic_time: %f\n", total); + start = clock(); + for(int count = 0; count < ITERS; ++count) { + volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, scalar, rbw, vlen, "sse3"); + } + end = clock(); + total = (double)(end-start)/(double)CLOCKS_PER_SEC; + printf("sse3_time: %f\n", total); + + for(int i = 0; i < 1; ++i) { + //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); + //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); + } + + for(int i = 0; i < vlen; ++i) { + //printf("%d...%d\n", output0[i], output01[i]); + CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4)); + } +} + +#endif diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_power_spectral_density_32f_aligned16.h b/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_power_spectral_density_32f_aligned16.h new file mode 100644 index 000000000..26f430bec --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_32fc_power_spectral_density_32f_aligned16.h @@ -0,0 +1,18 @@ +#ifndef INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H +#define INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H + +#include +#include + +class qa_32fc_power_spectral_density_32f_aligned16 : public CppUnit::TestCase { + + CPPUNIT_TEST_SUITE (qa_32fc_power_spectral_density_32f_aligned16); + CPPUNIT_TEST (t1); + CPPUNIT_TEST_SUITE_END (); + + private: + void t1 (); +}; + + +#endif /* INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H */ diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_utils.cc b/src/algorithms/libs/volk_gnsssdr/lib/qa_utils.cc new file mode 100644 index 000000000..65b3af1ca --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_utils.cc @@ -0,0 +1,704 @@ +#include "qa_utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +float uniform() { + return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) +} + +template +void random_floats (t *buf, unsigned n) +{ + for (unsigned i = 0; i < n; i++) + buf[i] = uniform (); +} + +void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n) { + if(type.is_complex) n *= 2; + if(type.is_float) { + if(type.size == 8) random_floats((double *)data, n); + else random_floats((float *)data, n); + } else { + float int_max = float(uint64_t(2) << (type.size*8)); + if(type.is_signed) int_max /= 2.0; + for(unsigned int i=0; i((RAND_MAX/2))) * int_max; + //man i really don't know how to do this in a more clever way, you have to cast down at some point + switch(type.size) { + case 8: + if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand; + else ((uint64_t *)data)[i] = (uint64_t) scaled_rand; + break; + case 4: + if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand; + else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; + break; + case 2: + if(type.is_signed) ((int16_t *)data)[i] = (int16_t) scaled_rand; + else ((uint16_t *)data)[i] = (uint16_t) scaled_rand; + break; + case 1: + if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand; + else ((uint8_t *)data)[i] = (uint8_t) scaled_rand; + break; + default: + throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here + } + } + } +} + +static std::vector get_arch_list(volk_gnsssdr_func_desc_t desc) { + std::vector archlist; + + for(size_t i = 0; i < desc.n_impls; i++) { + //if(!(archs[i+1] & volk_gnsssdr_get_lvarch())) continue; //this arch isn't available on this pc + archlist.push_back(std::string(desc.impl_names[i])); + } + + return archlist; +} + +volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) { + volk_gnsssdr_type_t type; + type.is_float = false; + type.is_scalar = false; + type.is_complex = false; + type.is_signed = false; + type.size = 0; + type.str = name; + + if(name.size() < 2) throw std::string("name too short to be a datatype"); + + //is it a scalar? + if(name[0] == 's') { + type.is_scalar = true; + name = name.substr(1, name.size()-1); + } + + //get the data size + size_t last_size_pos = name.find_last_of("0123456789"); + if(last_size_pos == std::string::npos) + throw std::string("no size spec in type ").append(name); + //will throw if malformed + int size = boost::lexical_cast(name.substr(0, last_size_pos+1)); + + assert(((size % 8) == 0) && (size <= 64) && (size != 0)); + type.size = size/8; //in bytes + + for(size_t i=last_size_pos+1; i < name.size(); i++) { + switch (name[i]) { + case 'f': + type.is_float = true; + break; + case 'i': + type.is_signed = true; + break; + case 'c': + type.is_complex = true; + break; + case 'u': + type.is_signed = false; + break; + default: + throw; + } + } + + return type; +} + +static void get_signatures_from_name(std::vector &inputsig, + std::vector &outputsig, + std::string name) { + boost::char_separator sep("_"); + boost::tokenizer > tok(name, sep); + std::vector toked; + tok.assign(name); + toked.assign(tok.begin(), tok.end()); + assert(toked[0] == "volk"); + toked.erase(toked.begin()); + toked.erase(toked.begin()); + + //ok. we're assuming a string in the form + //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) + + enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT; + std::string fn_name; + volk_gnsssdr_type_t type; + BOOST_FOREACH(std::string token, toked) { + try { + type = volk_gnsssdr_type_from_string(token); + if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name... + + if(side == SIDE_INPUT) inputsig.push_back(type); + else outputsig.push_back(type); + } catch (...){ + if(token[0] == 'x') { //it's a multiplier + if(side == SIDE_INPUT) assert(inputsig.size() > 0); + else assert(outputsig.size() > 0); + int multiplier = boost::lexical_cast(token.substr(1, token.size()-1)); //will throw if invalid + for(int i=1; i &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], vlen, arch.c_str()); +} + +inline void run_cast_test2(volk_gnsssdr_fn_2arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str()); +} + +inline void run_cast_test3(volk_gnsssdr_fn_3arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str()); +} + +inline void run_cast_test4(volk_gnsssdr_fn_4arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str()); +} + +inline void run_cast_test1_s32f(volk_gnsssdr_fn_1arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test2_s32f(volk_gnsssdr_fn_2arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test3_s32f(volk_gnsssdr_fn_3arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test1_s32fc(volk_gnsssdr_fn_1arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test2_s32fc(volk_gnsssdr_fn_2arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test3_s32fc(volk_gnsssdr_fn_3arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); +} + +//ADDED BY GNSS-SDR. START +inline void run_cast_test1_s8i(volk_gnsssdr_fn_1arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test2_s8i(volk_gnsssdr_fn_2arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test3_s8i(volk_gnsssdr_fn_3arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test1_s8ic(volk_gnsssdr_fn_1arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test2_s8ic(volk_gnsssdr_fn_2arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test3_s8ic(volk_gnsssdr_fn_3arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test8(volk_gnsssdr_fn_8arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], vlen, arch.c_str()); +} + +inline void run_cast_test8_s8i(volk_gnsssdr_fn_8arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test8_s8ic(volk_gnsssdr_fn_8arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test8_s32f(volk_gnsssdr_fn_8arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test8_s32fc(volk_gnsssdr_fn_8arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test12(volk_gnsssdr_fn_12arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], vlen, arch.c_str()); +} + +inline void run_cast_test12_s8i(volk_gnsssdr_fn_12arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test12_s8ic(volk_gnsssdr_fn_12arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test12_s32f(volk_gnsssdr_fn_12arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str()); +} + +inline void run_cast_test12_s32fc(volk_gnsssdr_fn_12arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { + while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], buffs[4], buffs[5], buffs[6], buffs[7], buffs[8], buffs[9], buffs[10], buffs[11], scalar, vlen, arch.c_str()); +} +//ADDED BY GNSS-SDR. END + +// This function is a nop that helps resolve GNU Radio bugs 582 and 583. +// Without this the cast in run_volk_gnsssdr_tests for tol_i = static_cast(float tol) +// won't happen on armhf (reported on cortex A9 and A15). +void lv_force_cast_hf( int tol_i, float tol_f) +{ + int diff_i = 1; + float diff_f = 1; + if( diff_i > tol_i ) + std::cout << "" ; + if( diff_f > tol_f ) + std::cout << "" ; +} + +template +bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { + bool fail = false; + int print_max_errs = 10; + for(unsigned int i=0; i tol ) + { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl; + } + } + } + // the primary test is the percent different greater than given tol + else if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/(((t *)in1)[i]) > tol) { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl; + } + } + } + + return fail; +} + +template +bool ccompare(t *in1, t *in2, unsigned int vlen, float tol) { + bool fail = false; + int print_max_errs = 10; + for(unsigned int i=0; i<2*vlen; i+=2) { + t diff[2] = { in1[i] - in2[i], in1[i+1] - in2[i+1] }; + t err = std::sqrt(diff[0] * diff[0] + diff[1] * diff[1]); + t norm = std::sqrt(in1[i] * in1[i] + in1[i+1] * in1[i+1]); + + // for very small numbers we'll see round off errors due to limited + // precision. So a special test case... + if (norm < 1e-30) { + if (err > tol) + { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i/2 << " in1: " << in1[i] << " + " << in1[i+1] << "j in2: " << in2[i] << " + " << in2[i+1] << "j" << std::endl; + } + } + } + // the primary test is the percent different greater than given tol + else if((err / norm) > tol) { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i/2 << " in1: " << in1[i] << " + " << in1[i+1] << "j in2: " << in2[i] << " + " << in2[i+1] << "j" << std::endl; + } + } + } + + return fail; +} + +template +bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { + bool fail = false; + int print_max_errs = 10; + for(unsigned int i=0; i tol) { + fail=true; + if(print_max_errs-- > 0) { + std::cout << "offset " << i << " in1: " << static_cast(t(((t *)(in1))[i])) << " in2: " << static_cast(t(((t *)(in2))[i])) << std::endl; + } + } + } + + return fail; +} + +class volk_gnsssdr_qa_aligned_mem_pool{ +public: + void *get_new(size_t size){ + size_t alignment = volk_gnsssdr_get_alignment(); + void* ptr = volk_gnsssdr_malloc(size, alignment); + memset(ptr, 0x00, size); + _mems.push_back(ptr); + return ptr; + } + ~volk_gnsssdr_qa_aligned_mem_pool() { + for(unsigned int ii = 0; ii < _mems.size(); ++ii) { + volk_gnsssdr_free(_mems[ii]); + } + } +private: std::vector _mems; +}; + +bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, + void (*manual_func)(), + std::string name, + float tol, + lv_32fc_t scalar, + int vlen, + int iter, + std::vector *best_arch_vector = 0, + std::string puppet_master_name = "NULL", + bool benchmark_mode, + std::string kernel_regex + ) { + boost::xpressive::sregex kernel_expression = boost::xpressive::sregex::compile(kernel_regex); + if( !boost::xpressive::regex_search(name, kernel_expression) ) { + // in this case we have a regex and are only looking to test one kernel + return false; + } + std::cout << "RUN_VOLK_TESTS: " << name << "(" << vlen << "," << iter << ")" << std::endl; + + // The multiply and lv_force_cast_hf are work arounds for GNU Radio bugs 582 and 583 + // The bug is the casting/assignment below do not happen, which results in false + // positives when testing for errors in fcompare and icompare. + // Since this only happens on armhf (reported for Cortex A9 and A15) combined with + // the following fixes it is suspected to be a compiler bug. + // Bug 1272024 on launchpad has been filed with Linaro GCC. + const float tol_f = tol*1.0000001; + const unsigned int tol_i = static_cast(tol); + lv_force_cast_hf( tol_i, tol_f ); + + //first let's get a list of available architectures for the test + std::vector arch_list = get_arch_list(desc); + + if((!benchmark_mode) && (arch_list.size() < 2)) { + std::cout << "no architectures to test" << std::endl; + return false; + } + + //something that can hang onto memory and cleanup when this function exits + volk_gnsssdr_qa_aligned_mem_pool mem_pool; + + //now we have to get a function signature by parsing the name + std::vector inputsig, outputsig; + get_signatures_from_name(inputsig, outputsig, name); + + //pull the input scalars into their own vector + std::vector inputsc; + for(size_t i=0; i inbuffs; + BOOST_FOREACH(volk_gnsssdr_type_t sig, inputsig) { + if(!sig.is_scalar) //we don't make buffers for scalars + inbuffs.push_back(mem_pool.get_new(vlen*sig.size*(sig.is_complex ? 2 : 1))); + } + for(size_t i=0; i > test_data; + for(size_t i=0; i arch_buffs; + for(size_t j=0; j both_sigs; + both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); + both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); + + //now run the test + clock_t start, end; + std::vector profile_times; + for(size_t i = 0; i < arch_list.size(); i++) { + start = clock(); + + switch(both_sigs.size()) { + case 1: + if(inputsc.size() == 0) { + run_cast_test1((volk_gnsssdr_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + if(inputsc[0].is_complex) { + run_cast_test1_s32fc((volk_gnsssdr_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test1_s32f((volk_gnsssdr_fn_1arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } + } + //ADDED BY GNSS-SDR. START + else if(inputsc.size() == 1 && !inputsc[0].is_float) { + if(inputsc[0].is_complex) { + run_cast_test1_s8ic((volk_gnsssdr_fn_1arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test1_s8i((volk_gnsssdr_fn_1arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } + } + //ADDED BY GNSS-SDR. END + else throw "unsupported 1 arg function >1 scalars"; + break; + case 2: + if(inputsc.size() == 0) { + run_cast_test2((volk_gnsssdr_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + if(inputsc[0].is_complex) { + run_cast_test2_s32fc((volk_gnsssdr_fn_2arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test2_s32f((volk_gnsssdr_fn_2arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } + } + //ADDED BY GNSS-SDR. START + else if(inputsc.size() == 1 && !inputsc[0].is_float) { + if(inputsc[0].is_complex) { + run_cast_test2_s8ic((volk_gnsssdr_fn_2arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test2_s8i((volk_gnsssdr_fn_2arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } + } + //ADDED BY GNSS-SDR. END + else throw "unsupported 2 arg function >1 scalars"; + break; + case 3: + if(inputsc.size() == 0) { + run_cast_test3((volk_gnsssdr_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + if(inputsc[0].is_complex) { + run_cast_test3_s32fc((volk_gnsssdr_fn_3arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test3_s32f((volk_gnsssdr_fn_3arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } + } + //ADDED BY GNSS-SDR. START + else if(inputsc.size() == 1 && !inputsc[0].is_float) { + if(inputsc[0].is_complex) { + run_cast_test3_s8ic((volk_gnsssdr_fn_3arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test3_s8i((volk_gnsssdr_fn_3arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } + } + //ADDED BY GNSS-SDR. END + else throw "unsupported 3 arg function >1 scalars"; + break; + case 4: + run_cast_test4((volk_gnsssdr_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + break; + //ADDED BY GNSS-SDR. START + case 8: + if(inputsc.size() == 0) { + run_cast_test8((volk_gnsssdr_fn_8arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + if(inputsc[0].is_complex) { + run_cast_test8_s32fc((volk_gnsssdr_fn_8arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test8_s32f((volk_gnsssdr_fn_8arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } + } + else if(inputsc.size() == 1 && !inputsc[0].is_float) { + if(inputsc[0].is_complex) { + run_cast_test8_s8ic((volk_gnsssdr_fn_8arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test8_s8i((volk_gnsssdr_fn_8arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } + } + else throw "unsupported 8 arg function >1 scalars"; + break; + case 12: + if(inputsc.size() == 0) { + run_cast_test12((volk_gnsssdr_fn_12arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); + } else if(inputsc.size() == 1 && inputsc[0].is_float) { + if(inputsc[0].is_complex) { + run_cast_test12_s32fc((volk_gnsssdr_fn_12arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test12_s32f((volk_gnsssdr_fn_12arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } + } + else if(inputsc.size() == 1 && !inputsc[0].is_float) { + if(inputsc[0].is_complex) { + run_cast_test12_s8ic((volk_gnsssdr_fn_12arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); + } else { + run_cast_test12_s8i((volk_gnsssdr_fn_12arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); + } + } + else throw "unsupported 12 arg function >1 scalars"; + break; + //ADDED BY GNSS-SDR. END + default: + throw "no function handler for this signature"; + break; + } + + end = clock(); + double arch_time = 1000.0 * (double)(end-start)/(double)CLOCKS_PER_SEC; + std::cout << arch_list[i] << " completed in " << arch_time << "ms" << std::endl; + + profile_times.push_back(arch_time); + } + + //and now compare each output to the generic output + //first we have to know which output is the generic one, they aren't in order... + size_t generic_offset=0; + for(size_t i=0; i arch_results; + for(size_t i=0; i::max(); + double best_time_u = std::numeric_limits::max(); + std::string best_arch_a = "generic"; + std::string best_arch_u = "generic"; + for(size_t i=0; i < arch_list.size(); i++) + { + if((profile_times[i] < best_time_u) && arch_results[i] && desc.impl_alignment[i] == 0) + { + best_time_u = profile_times[i]; + best_arch_u = arch_list[i]; + } + if((profile_times[i] < best_time_a) && arch_results[i]) + { + best_time_a = profile_times[i]; + best_arch_a = arch_list[i]; + } + } + + std::cout << "Best aligned arch: " << best_arch_a << std::endl; + std::cout << "Best unaligned arch: " << best_arch_u << std::endl; + if(best_arch_vector) { + if(puppet_master_name == "NULL") { + best_arch_vector->push_back(name + " " + best_arch_a + " " + best_arch_u); + } + else { + best_arch_vector->push_back(puppet_master_name + " " + best_arch_a + " " + best_arch_u); + } + } + + return fail_global; +} + + diff --git a/src/algorithms/libs/volk_gnsssdr/lib/qa_utils.h b/src/algorithms/libs/volk_gnsssdr/lib/qa_utils.h new file mode 100644 index 000000000..211f7b23c --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/qa_utils.h @@ -0,0 +1,62 @@ +#ifndef VOLK_QA_UTILS_H +#define VOLK_QA_UTILS_H + +#include +#include +#include +#include +#include + +struct volk_gnsssdr_type_t { + bool is_float; + bool is_scalar; + bool is_signed; + bool is_complex; + int size; + std::string str; +}; + +volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string); + +float uniform(void); +void random_floats(float *buf, unsigned n); + +bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t, void(*)(), std::string, float, lv_32fc_t, int, int, std::vector *, std::string, bool benchmark_mode=false, std::string kernel_regex=""); + + +#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) BOOST_AUTO_TEST_CASE(func##_test) { BOOST_CHECK_EQUAL(run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, 0, "NULL"), 0); } +#define VOLK_PROFILE(func, tol, scalar, len, iter, results, bnmode, kernel_regex) run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results, "NULL", bnmode, kernel_regex) +#define VOLK_PUPPET_PROFILE(func, puppet_master_func, tol, scalar, len, iter, results, bnmode, kernel_regex) run_volk_gnsssdr_tests(func##_get_func_desc(), (void (*)())func##_manual, std::string(#func), tol, scalar, len, iter, results, std::string(#puppet_master_func), bnmode, kernel_regex) +typedef void (*volk_gnsssdr_fn_1arg)(void *, unsigned int, const char*); //one input, operate in place +typedef void (*volk_gnsssdr_fn_2arg)(void *, void *, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_3arg)(void *, void *, void *, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_4arg)(void *, void *, void *, void *, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_1arg_s32f)(void *, float, unsigned int, const char*); //one input vector, one scalar float input +typedef void (*volk_gnsssdr_fn_2arg_s32f)(void *, void *, float, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_1arg_s32fc)(void *, lv_32fc_t, unsigned int, const char*); //one input vector, one scalar float input +typedef void (*volk_gnsssdr_fn_2arg_s32fc)(void *, void *, lv_32fc_t, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_3arg_s32fc)(void *, void *, void *, lv_32fc_t, unsigned int, const char*); + +//ADDED BY GNSS-SDR. START +typedef void (*volk_gnsssdr_fn_1arg_s8i)(void *, char, unsigned int, const char*); //one input vector, one scalar char input +typedef void (*volk_gnsssdr_fn_2arg_s8i)(void *, void *, char, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_3arg_s8i)(void *, void *, void *, char, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_1arg_s8ic)(void *, lv_8sc_t, unsigned int, const char*); //one input vector, one scalar lv_8sc_t vector input +typedef void (*volk_gnsssdr_fn_2arg_s8ic)(void *, void *, lv_8sc_t, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_3arg_s8ic)(void *, void *, void *, lv_8sc_t, unsigned int, const char*); + +typedef void (*volk_gnsssdr_fn_8arg)(void *, void *, void *, void *, void *, void *, void *, void *, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_8arg_s32f)(void *, void *, void *, void *, void *, void *, void *, void *, float, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_8arg_s32fc)(void *, void *, void *, void *, void *, void *, void *, void *, lv_32fc_t, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_8arg_s8i)(void *, void *, void *, void *, void *, void *, void *, void *, char, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_8arg_s8ic)(void *, void *, void *, void *, void *, void *, void *, void *, lv_8sc_t, unsigned int, const char*); + +typedef void (*volk_gnsssdr_fn_12arg)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_12arg_s32f)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, float, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_12arg_s32fc)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, lv_32fc_t, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_12arg_s8i)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, char, unsigned int, const char*); +typedef void (*volk_gnsssdr_fn_12arg_s8ic)(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, lv_8sc_t, unsigned int, const char*); +//ADDED BY GNSS-SDR. END + +#endif //VOLK_QA_UTILS_H diff --git a/src/algorithms/libs/volk_gnsssdr/lib/testqa.cc b/src/algorithms/libs/volk_gnsssdr/lib/testqa.cc new file mode 100644 index 000000000..d5241ec1b --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/testqa.cc @@ -0,0 +1,67 @@ +/* -*- c++ -*- */ +/* + * Copyright 2012-2014 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include "qa_utils.h" +#include +#include + +//GNSS-SDR PROTO-KERNELS +VOLK_RUN_TESTS(volk_gnsssdr_32fc_x2_multiply_32fc, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8ic_x2_multiply_8ic, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_32fc_x2_dot_prod_32fc, 1e-4, 0, 204603, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8ic_x2_dot_prod_8ic, 1e-4, 0, 204603, 1); +VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32fc_multiply_32fc, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_32fc_conjugate_32fc, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8ic_conjugate_8ic, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_32f_x2_add_32f, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8i_x2_add_8i, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_32f_index_max_16u, 3, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8i_index_max_16u, 3, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_32f_accumulator_s32f, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_32fc_magnitude_squared_32f, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8ic_magnitude_squared_8i, 1e-4, 0, 20462, 1); + +VOLK_RUN_TESTS(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 20462, 1); + +VOLK_RUN_TESTS(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 20462, 1); +VOLK_RUN_TESTS(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 20462, 1); + +VOLK_RUN_TESTS(volk_gnsssdr_16i_s32f_convert_32f, 1e-4, 32768.0, 20462, 1); + +VOLK_RUN_TESTS(volk_gnsssdr_8i_max_s8i, 3, 0, 20462, 1); + +//VOLK_RUN_TESTS(volk_gnsssdr_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000); +//VOLK_RUN_TESTS(volk_gnsssdr_16i_branch_4_state_8, 1e-4, 2046, 10000); +//VOLK_RUN_TESTS(volk_gnsssdr_16i_max_star_16i, 0, 0, 20462, 10000); +//VOLK_RUN_TESTS(volk_gnsssdr_16i_max_star_horizontal_16i, 0, 0, 20462, 10000); +//VOLK_RUN_TESTS(volk_gnsssdr_16i_permute_and_scalar_add, 1e-4, 0, 2046, 1000); +//VOLK_RUN_TESTS(volk_gnsssdr_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 1000); +//VOLK_RUN_TESTS(volk_gnsssdr_16i_32fc_dot_prod_32fc, 1e-4, 0, 204602, 1); +//VOLK_RUN_TESTS(volk_gnsssdr_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000); +//VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 10000); +//VOLK_RUN_TESTS(volk_gnsssdr_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000); +//VOLK_RUN_TESTS(volk_gnsssdr_32u_popcnt, 0, 0, 2046, 10000); +//VOLK_RUN_TESTS(volk_gnsssdr_64u_popcnt, 0, 0, 2046, 10000); diff --git a/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_malloc.c b/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_malloc.c new file mode 100644 index 000000000..03e53a513 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_malloc.c @@ -0,0 +1,142 @@ +/* -*- c -*- */ +/* + * Copyright 2014 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include + +/* + * For #defines used to determine support for allocation functions, + * see: http://linux.die.net/man/3/aligned_alloc +*/ + +// Disabling use of aligned_alloc. This function requires that size be +// a multiple of alignment, which is too restrictive for many uses of +// VOLK. + +//// If we are using C11 standard, use the aligned_alloc +//#ifdef _ISOC11_SOURCE +// +//void *volk_gnsssdr_malloc(size_t size, size_t alignment) +//{ +// void *ptr = aligned_alloc(alignment, size); +// if(ptr == NULL) { +// fprintf(stderr, "VOLK: Error allocating memory (aligned_alloc)\n"); +// } +// return ptr; +//} +// +//void volk_gnsssdr_free(void *ptr) +//{ +// free(ptr); +//} +// +//#else // _ISOC11_SOURCE + +// Otherwise, test if we are a POSIX or X/Open system +// This only has a restriction that alignment be a power of 2. +#if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN + +void *volk_gnsssdr_malloc(size_t size, size_t alignment) +{ + void *ptr; + int err = posix_memalign(&ptr, alignment, size); + if(err == 0) { + return ptr; + } + else { + fprintf(stderr, "VOLK: Error allocating memory (posix_memalign: %d)\n", err); + return NULL; + } +} + +void volk_gnsssdr_free(void *ptr) +{ + free(ptr); +} + +// _aligned_malloc has no restriction on size, +// available on Windows since Visual C++ 2005 +#elif _MSC_VER >= 1400 + +void *volk_gnsssdr_malloc(size_t size, size_t alignment) +{ + void *ptr = _aligned_malloc(size, alignment); + if(ptr == NULL) { + fprintf(stderr, "VOLK: Error allocating memory (_aligned_malloc)\n"); + } + return ptr; +} + +void volk_gnsssdr_free(void *ptr) +{ + _aligned_free(ptr); +} + +// No standard handlers; we'll do it ourselves. +#else // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN + +struct block_info +{ + void *real; +}; + +void * +volk_gnsssdr_malloc(size_t size, size_t alignment) +{ + void *real, *user; + struct block_info *info; + + /* At least align to sizeof our struct */ + if (alignment < sizeof(struct block_info)) + alignment = sizeof(struct block_info); + + /* Alloc */ + real = malloc(size + (2 * alignment - 1)); + + /* Get pointer to the various zones */ + user = (void *)((((uintptr_t) real) + sizeof(struct block_info) + alignment - 1) & ~(alignment - 1)); + info = (struct block_info *)(((uintptr_t)user) - sizeof(struct block_info)); + + /* Store the info for the free */ + info->real = real; + + /* Return pointer to user */ + return user; +} + +void +volk_gnsssdr_free(void *ptr) +{ + struct block_info *info; + + /* Get the real pointer */ + info = (struct block_info *)(((uintptr_t)ptr) - sizeof(struct block_info)); + + /* Release real pointer */ + free(info->real); +} + +#endif // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN + +//#endif // _ISOC11_SOURCE diff --git a/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_prefs.c b/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_prefs.c new file mode 100644 index 000000000..dc4dc645e --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_prefs.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include + +//#if defined(_WIN32) +//#include +//#endif + +void volk_gnsssdr_get_config_path(char *path) +{ + const char *suffix = "/.volk_gnsssdr/volk_gnsssdr_config"; + char *home = NULL; + if (home == NULL) home = getenv("HOME"); + if (home == NULL) home = getenv("APPDATA"); + if (home == NULL){ + path = NULL; + return; + } + strcpy(path, home); + strcat(path, suffix); +} + +size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **prefs_res) +{ + FILE *config_file; + char path[512], line[512]; + size_t n_arch_prefs = 0; + volk_gnsssdr_arch_pref_t *prefs = NULL; + + //get the config path + volk_gnsssdr_get_config_path(path); + if (path == NULL) return n_arch_prefs; //no prefs found + config_file = fopen(path, "r"); + if(!config_file) return n_arch_prefs; //no prefs found + + //reset the file pointer and write the prefs into volk_gnsssdr_arch_prefs + while(fgets(line, sizeof(line), config_file) != NULL) + { + prefs = (volk_gnsssdr_arch_pref_t *) realloc(prefs, (n_arch_prefs+1) * sizeof(*prefs)); + volk_gnsssdr_arch_pref_t *p = prefs + n_arch_prefs; + if(sscanf(line, "%s %s %s", p->name, p->impl_a, p->impl_u) == 3 && !strncmp(p->name, "volk_gnsssdr_", 5)) + { + n_arch_prefs++; + } + } + fclose(config_file); + *prefs_res = prefs; + return n_arch_prefs; +} diff --git a/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.c b/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.c new file mode 100644 index 000000000..415ca4039 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.c @@ -0,0 +1,119 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include + +#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4 + #define __popcnt __builtin_popcount +#else + inline unsigned __popcnt(unsigned num) + { + unsigned pop = 0; + while(num) + { + if (num & 0x1) pop++; + num >>= 1; + } + return pop; + } +#endif + +int volk_gnsssdr_get_index( + const char *impl_names[], //list of implementations by name + const size_t n_impls, //number of implementations available + const char *impl_name //the implementation name to find +){ + unsigned int i; + for (i = 0; i < n_impls; i++) { + if(!strncmp(impl_names[i], impl_name, 20)) { + return i; + } + } + //TODO return -1; + //something terrible should happen here + printf("Volk warning: no arch found, returning generic impl\n"); + return volk_gnsssdr_get_index(impl_names, n_impls, "generic"); //but we'll fake it for now +} + +int volk_gnsssdr_rank_archs( + const char *kern_name, //name of the kernel to rank + const char *impl_names[], //list of implementations by name + const int* impl_deps, //requirement mask per implementation + const bool* alignment, //alignment status of each implementation + size_t n_impls, //number of implementations available + const bool align //if false, filter aligned implementations +){ + size_t i; + static volk_gnsssdr_arch_pref_t *volk_gnsssdr_arch_prefs; + static size_t n_arch_prefs = 0; + static int prefs_loaded = 0; + if(!prefs_loaded) { + n_arch_prefs = volk_gnsssdr_load_preferences(&volk_gnsssdr_arch_prefs); + prefs_loaded = 1; + } + + // If we've defined VOLK_GENERIC to be anything, always return the + // 'generic' kernel. Used in GR's QA code. + char *gen_env = getenv("VOLK_GENERIC"); + if(gen_env) { + return volk_gnsssdr_get_index(impl_names, n_impls, "generic"); + } + + //now look for the function name in the prefs list + for(i = 0; i < n_arch_prefs; i++) + { + if(!strncmp(kern_name, volk_gnsssdr_arch_prefs[i].name, sizeof(volk_gnsssdr_arch_prefs[i].name))) //found it + { + const char *impl_name = align? volk_gnsssdr_arch_prefs[i].impl_a : volk_gnsssdr_arch_prefs[i].impl_u; + return volk_gnsssdr_get_index(impl_names, n_impls, impl_name); + } + } + + //return the best index with the largest deps + size_t best_index_a = 0; + size_t best_index_u = 0; + int best_value_a = -1; + int best_value_u = -1; + for(i = 0; i < n_impls; i++) + { + const signed val = __popcnt(impl_deps[i]); + if (alignment[i] && val > best_value_a) + { + best_index_a = i; + best_value_a = val; + } + if (!alignment[i] && val > best_value_u) + { + best_index_u = i; + best_value_u = val; + } + } + + //when align and we found a best aligned, use it + if (align && best_value_a != -1) return best_index_a; + + //otherwise return the best unaligned + return best_index_u; +} diff --git a/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.h b/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.h new file mode 100644 index 000000000..6cf9108fb --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/lib/volk_gnsssdr_rank_archs.h @@ -0,0 +1,50 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_RANK_ARCHS_H +#define INCLUDED_VOLK_RANK_ARCHS_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int volk_gnsssdr_get_index( + const char *impl_names[], //list of implementations by name + const size_t n_impls, //number of implementations available + const char *impl_name //the implementation name to find +); + +int volk_gnsssdr_rank_archs( + const char *kern_name, //name of the kernel to rank + const char *impl_names[], //list of implementations by name + const int* impl_deps, //requirement mask per implementation + const bool* alignment, //alignment status of each implementation + size_t n_impls, //number of implementations available + const bool align //if false, filter aligned implementations +); + +#ifdef __cplusplus +} +#endif +#endif /*INCLUDED_VOLK_RANK_ARCHS_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_16sc_magnitude_32f_aligned16_orc_impl.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_16sc_magnitude_32f_aligned16_orc_impl.orc new file mode 100644 index 000000000..561010761 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_16sc_magnitude_32f_aligned16_orc_impl.orc @@ -0,0 +1,25 @@ +.function volk_gnsssdr_16ic_magnitude_32f_a_orc_impl +.source 4 src +.dest 4 dst +.floatparam 4 scalar +.temp 4 reall +.temp 4 imagl +.temp 2 reals +.temp 2 imags +.temp 4 realf +.temp 4 imagf +.temp 4 sumf + + + +splitlw reals, imags, src +convswl reall, reals +convswl imagl, imags +convlf realf, reall +convlf imagf, imagl +divf realf, realf, scalar +divf imagf, imagf, scalar +mulf realf, realf, realf +mulf imagf, imagf, imagf +addf sumf, realf, imagf +sqrtf dst, sumf diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_32f_x2_add_32f.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_32f_x2_add_32f.orc new file mode 100644 index 000000000..4419688b6 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_32f_x2_add_32f.orc @@ -0,0 +1,5 @@ +.function volk_gnsssdr_32f_x2_add_32f_a_orc_impl +.dest 4 dst +.source 4 src1 +.source 4 src2 +addf dst, src1, src2 diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_32fc_s32fc_multiply_32fc.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_32fc_s32fc_multiply_32fc.orc new file mode 100644 index 000000000..03297831f --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_32fc_s32fc_multiply_32fc.orc @@ -0,0 +1,18 @@ +.function volk_gnsssdr_32fc_s32fc_multiply_32fc_a_orc_impl +.source 8 src1 +.floatparam 8 scalar +.dest 8 dst +.temp 8 iqprod +.temp 4 real +.temp 4 imag +.temp 4 ac +.temp 4 bd +.temp 8 swapped +x2 mulf iqprod, src1, scalar +splitql bd, ac, iqprod +subf real, ac, bd +swaplq swapped, src1 +x2 mulf iqprod, swapped, scalar +splitql bd, ac, iqprod +addf imag, ac, bd +mergelq dst, real, imag diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_32fc_x2_multiply_32fc.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_32fc_x2_multiply_32fc.orc new file mode 100644 index 000000000..5d049ad93 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_32fc_x2_multiply_32fc.orc @@ -0,0 +1,18 @@ +.function volk_gnsssdr_32fc_x2_multiply_32fc_a_orc_impl +.source 8 src1 +.source 8 src2 +.dest 8 dst +.temp 8 iqprod +.temp 4 real +.temp 4 imag +.temp 4 ac +.temp 4 bd +.temp 8 swapped +x2 mulf iqprod, src1, src2 +splitql bd, ac, iqprod +subf real, ac, bd +swaplq swapped, src1 +x2 mulf iqprod, swapped, src2 +splitql bd, ac, iqprod +addf imag, ac, bd +mergelq dst, real, imag diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8i_accumulator_s8i.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8i_accumulator_s8i.orc new file mode 100644 index 000000000..71d301c45 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8i_accumulator_s8i.orc @@ -0,0 +1,40 @@ +#/*! +# * \file volk_gnsssdr_8i_accumulator_s8i.orc +# * \brief ORC implementation: 8 bits (char) scalar accumulator +# * \authors
    +# *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com +# *
+# * +# * ORC code that implements an accumulator of char values +# * +# * ------------------------------------------------------------------------- +# * +# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) +# * +# * GNSS-SDR is a software defined Global Navigation +# * Satellite Systems receiver +# * +# * This file is part of GNSS-SDR. +# * +# * GNSS-SDR is free software: you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation, either version 3 of the License, or +# * at your option) any later version. +# * +# * GNSS-SDR is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with GNSS-SDR. If not, see . +# * +# * ------------------------------------------------------------------------- +# */ + +.function volk_gnsssdr_8i_accumulator_s8i_a_orc_impl +.source 1 src1 +.accumulator 2 acc +.temp 2 sum +mergebw sum, 0, src1 +accw acc, sum diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8i_x2_add_8i.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8i_x2_add_8i.orc new file mode 100644 index 000000000..decb88029 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8i_x2_add_8i.orc @@ -0,0 +1,39 @@ +#/*! +# * \file volk_gnsssdr_8i_x2_add_8i.orc +# * \brief ORC implementation: adds pairs of 8 bits (char) scalars +# * \authors
    +# *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com +# *
+# * +# * ORC code that adds pairs of 8 bits (char) scalars +# * +# * ------------------------------------------------------------------------- +# * +# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) +# * +# * GNSS-SDR is a software defined Global Navigation +# * Satellite Systems receiver +# * +# * This file is part of GNSS-SDR. +# * +# * GNSS-SDR is free software: you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation, either version 3 of the License, or +# * at your option) any later version. +# * +# * GNSS-SDR is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with GNSS-SDR. If not, see . +# * +# * ------------------------------------------------------------------------- +# */ + +.function volk_gnsssdr_8i_x2_add_8i_a_orc_impl +.dest 1 dst +.source 1 src1 +.source 1 src2 +addb dst, src1, src2 diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_conjugate_8ic.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_conjugate_8ic.orc new file mode 100644 index 000000000..9e14e65f1 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_conjugate_8ic.orc @@ -0,0 +1,42 @@ +#/*! +# * \file volk_gnsssdr_8ic_conjugate_8ic.orc +# * \brief ORC implementation: calculates the conjugate of a 16 bits vector +# * \authors
    +# *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com +# *
+# * +# * ORC code that calculates the conjugate of a +# * 16 bits vector (8 bits the real part and 8 bits the imaginary part) +# * result = (real*real) + (imag*imag) +# * +# * ------------------------------------------------------------------------- +# * +# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) +# * +# * GNSS-SDR is a software defined Global Navigation +# * Satellite Systems receiver +# * +# * This file is part of GNSS-SDR. +# * +# * GNSS-SDR is free software: you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation, either version 3 of the License, or +# * at your option) any later version. +# * +# * GNSS-SDR is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with GNSS-SDR. If not, see . +# * +# * ------------------------------------------------------------------------- +# */ + +.function volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl +.source 2 src1 +.dest 2 dst +.temp 2 merged +mergebw merged, 1, -1 +x2 mullb dst, merged, src1 diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_magnitude_squared_8i.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_magnitude_squared_8i.orc new file mode 100644 index 000000000..a0c40a741 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_magnitude_squared_8i.orc @@ -0,0 +1,45 @@ +#/*! +# * \file volk_gnsssdr_8ic_magnitude_squared_8i.orc +# * \brief ORC implementation: calculates the magnitude squared of a 16 bits vector +# * \authors
    +# *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com +# *
+# * +# * ORC code that calculates the magnitude squared of a +# * 16 bits vector (8 bits the real part and 8 bits the imaginary part) +# * result = (real*real) + (imag*imag) +# * +# * ------------------------------------------------------------------------- +# * +# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) +# * +# * GNSS-SDR is a software defined Global Navigation +# * Satellite Systems receiver +# * +# * This file is part of GNSS-SDR. +# * +# * GNSS-SDR is free software: you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation, either version 3 of the License, or +# * at your option) any later version. +# * +# * GNSS-SDR is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with GNSS-SDR. If not, see . +# * +# * ------------------------------------------------------------------------- +# */ + +.function volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl +.source 2 src1 +.dest 1 dst +.temp 2 iqprod +.temp 1 ac +.temp 1 bd +x2 mullb iqprod, src1, src1 +splitwb bd, ac, iqprod +addb dst, ac, bd diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_s8ic_multiply_8ic.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_s8ic_multiply_8ic.orc new file mode 100644 index 000000000..7c0fc2d6b --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_s8ic_multiply_8ic.orc @@ -0,0 +1,58 @@ +#/*! +# * \file volk_gnsssdr_8ic_s8ic_multiply_8ic.orc +# * \brief ORC implementation: multiplies a group of 16 bits vectors by one constant vector +# * \authors
    +# *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com +# *
+# * +# * ORC code that multiplies a group of 16 bits vectors +# * (8 bits the real part and 8 bits the imaginary part) by one constant vector +# * +# * ------------------------------------------------------------------------- +# * +# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) +# * +# * GNSS-SDR is a software defined Global Navigation +# * Satellite Systems receiver +# * +# * This file is part of GNSS-SDR. +# * +# * GNSS-SDR is free software: you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation, either version 3 of the License, or +# * at your option) any later version. +# * +# * GNSS-SDR is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with GNSS-SDR. If not, see . +# * +# * ------------------------------------------------------------------------- +# */ + +.function volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl +.source 2 src1 +.param 2 src2real +.param 2 src2imag +.dest 2 dst +.temp 2 iqprod +.temp 1 real +.temp 1 imag +.temp 1 rr +.temp 1 ii +.temp 1 ri +.temp 1 ir +x2 mullb iqprod, src1, src2real +splitwb ir, rr, iqprod +x2 mullb iqprod, src1, src2imag +splitwb ii, ri, iqprod +subb real, rr, ii +addb imag, ri, ir +mergebw dst, real, imag + + + + diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_x2_dot_prod_8ic.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_x2_dot_prod_8ic.orc new file mode 100644 index 000000000..c4dae8840 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_x2_dot_prod_8ic.orc @@ -0,0 +1,59 @@ +#/*! +# * \file volk_gnsssdr_8ic_x2_dot_prod_8ic.orc +# * \brief ORC implementation: multiplies two 16 bits vectors and accumulates them +# * \authors
    +# *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com +# *
+# * +# * ORC code that multiplies two 16 bits vectors (8 bits the real part +# * and 8 bits the imaginary part) and accumulates them +# * +# * ------------------------------------------------------------------------- +# * +# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) +# * +# * GNSS-SDR is a software defined Global Navigation +# * Satellite Systems receiver +# * +# * This file is part of GNSS-SDR. +# * +# * GNSS-SDR is free software: you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation, either version 3 of the License, or +# * at your option) any later version. +# * +# * GNSS-SDR is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with GNSS-SDR. If not, see . +# * +# * ------------------------------------------------------------------------- +# */ + +.function volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl +.source 2 src1 +.source 2 src2 +.accumulator 2 accreal +.accumulator 2 accimag +.temp 2 iqprod +.temp 1 real +.temp 1 imag +.temp 2 real2 +.temp 2 imag2 +.temp 1 ac +.temp 1 bd +.temp 2 swapped +x2 mullb iqprod, src1, src2 +splitwb bd, ac, iqprod +subb real, ac, bd +swapw swapped, src1 +x2 mullb iqprod, swapped, src2 +splitwb bd, ac, iqprod +addb imag, ac, bd +mergebw real2, 0, real +accw accreal, real2 +mergebw imag2, 0, imag +accw accimag, imag2 diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_x2_multiply_8ic.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_x2_multiply_8ic.orc new file mode 100644 index 000000000..b448eac0b --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_x2_multiply_8ic.orc @@ -0,0 +1,57 @@ +#/*! +# * \file volk_gnsssdr_8ic_x2_multiply_8ic.orc +# * \brief ORC implementation: multiplies two 16 bits vectors +# * \authors
    +# *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com +# *
+# * +# * ORC code that multiplies two 16 bits vectors (8 bits the real part +# * and 8 bits the imaginary part) +# * +# * ------------------------------------------------------------------------- +# * +# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) +# * +# * GNSS-SDR is a software defined Global Navigation +# * Satellite Systems receiver +# * +# * This file is part of GNSS-SDR. +# * +# * GNSS-SDR is free software: you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation, either version 3 of the License, or +# * at your option) any later version. +# * +# * GNSS-SDR is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with GNSS-SDR. If not, see . +# * +# * ------------------------------------------------------------------------- +# */ + +.function volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl +.source 2 src1 +.source 2 src2 +.dest 2 dst +.temp 2 iqprod +.temp 1 real +.temp 1 imag +.temp 1 ac +.temp 1 bd +.temp 2 swapped +x2 mullb iqprod, src1, src2 +splitwb bd, ac, iqprod +subb real, ac, bd +swapw swapped, src1 +x2 mullb iqprod, swapped, src2 +splitwb bd, ac, iqprod +addb imag, ac, bd +mergebw dst, real, imag + + + + diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.orc new file mode 100644 index 000000000..29bb09a8c --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.orc @@ -0,0 +1,139 @@ +#/*! +# * \file volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.orc +# * \brief ORC implementation: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors +# * \authors
    +# *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com +# *
+# * +# * ORC code that performs the carrier wipe-off mixing and the +# * Early, Prompt, and Late correlation with 16 bits vectors (8 bits the +# * real part and 8 bits the imaginary part): +# * - The carrier wipe-off is done by multiplying the input signal by the +# * carrier (multiplication of 16 bits vectors) It returns the input +# * signal in base band (BB) +# * - Early values are calculated by multiplying the input signal in BB by the +# * early code (multiplication of 16 bits vectors), accumulating the results +# * - Prompt values are calculated by multiplying the input signal in BB by the +# * prompt code (multiplication of 16 bits vectors), accumulating the results +# * - Late values are calculated by multiplying the input signal in BB by the +# * late code (multiplication of 16 bits vectors), accumulating the results +# * +# * ------------------------------------------------------------------------- +# * +# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) +# * +# * GNSS-SDR is a software defined Global Navigation +# * Satellite Systems receiver +# * +# * This file is part of GNSS-SDR. +# * +# * GNSS-SDR is free software: you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation, either version 3 of the License, or +# * at your option) any later version. +# * +# * GNSS-SDR is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with GNSS-SDR. If not, see . +# * +# * ------------------------------------------------------------------------- +# */ + +.function volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl +.source 2 input +.source 2 carrier +.source 2 E_code +.source 2 P_code +.accumulator 2 E_out_real +.accumulator 2 E_out_imag +.accumulator 2 P_out_real +.accumulator 2 P_out_imag +.temp 2 bb_signal_sample +.temp 2 iqprod +.temp 1 real +.temp 1 imag +.temp 1 ac +.temp 1 bd +.temp 2 swapped + +.temp 2 real2 +.temp 2 imag2 + +x2 mullb iqprod, input, carrier +splitwb bd, ac, iqprod +subb real, ac, bd +swapw swapped, input +x2 mullb iqprod, swapped, carrier +splitwb bd, ac, iqprod +addb imag, ac, bd +mergebw bb_signal_sample, real, imag + +swapw swapped, bb_signal_sample + +x2 mullb iqprod, bb_signal_sample, E_code +splitwb bd, ac, iqprod +subb real, ac, bd +x2 mullb iqprod, swapped, E_code +splitwb bd, ac, iqprod +addb imag, ac, bd +mergebw real2, 0, real +mergebw imag2, 0, imag +accw E_out_real, real2 +accw E_out_imag, imag2 + +x2 mullb iqprod, bb_signal_sample, P_code +splitwb bd, ac, iqprod +subb real, ac, bd +x2 mullb iqprod, swapped, P_code +splitwb bd, ac, iqprod +addb imag, ac, bd +mergebw real2, 0, real +mergebw imag2, 0, imag +accw P_out_real, real2 +accw P_out_imag, imag2 + +.function volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_second_a_orc_impl +.source 2 input +.source 2 carrier +.source 2 L_code +.accumulator 2 L_out_real +.accumulator 2 L_out_imag + +.temp 2 bb_signal_sample +.temp 2 iqprod +.temp 1 real +.temp 1 imag +.temp 1 ac +.temp 1 bd +.temp 2 swapped + +.temp 2 real2 +.temp 2 imag2 + +x2 mullb iqprod, input, carrier +splitwb bd, ac, iqprod +subb real, ac, bd +swapw swapped, input +x2 mullb iqprod, swapped, carrier +splitwb bd, ac, iqprod +addb imag, ac, bd +mergebw bb_signal_sample, real, imag + +swapw swapped, bb_signal_sample + +x2 mullb iqprod, bb_signal_sample, L_code +splitwb bd, ac, iqprod +subb real, ac, bd +x2 mullb iqprod, swapped, L_code +splitwb bd, ac, iqprod +addb imag, ac, bd +mergebw real2, 0, real +mergebw imag2, 0, imag +accw L_out_real, real2 +accw L_out_imag, imag2 + + diff --git a/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8u_x2_multiply_8u.orc b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8u_x2_multiply_8u.orc new file mode 100644 index 000000000..773daabc1 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/orc/volk_gnsssdr_8u_x2_multiply_8u.orc @@ -0,0 +1,39 @@ +#/*! +# * \file volk_gnsssdr_8u_x2_multiply_8u.orc +# * \brief ORC implementation: multiplies unsigned char values +# * \authors
    +# *
  • AndrĂ©s Cecilia, 2014. a.cecilia.luque(at)gmail.com +# *
+# * +# * ORC code that multiplies unsigned char values (8 bits data) +# * +# * ------------------------------------------------------------------------- +# * +# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) +# * +# * GNSS-SDR is a software defined Global Navigation +# * Satellite Systems receiver +# * +# * This file is part of GNSS-SDR. +# * +# * GNSS-SDR is free software: you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation, either version 3 of the License, or +# * at your option) any later version. +# * +# * GNSS-SDR is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with GNSS-SDR. If not, see . +# * +# * ------------------------------------------------------------------------- +# */ + +.function volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl +.source 1 src1 +.source 1 src2 +.dest 1 dst +mullb dst, src1, src2 diff --git a/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/CMakeLists.txt new file mode 100644 index 000000000..bba4d3664 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/CMakeLists.txt @@ -0,0 +1,39 @@ +# Copyright 2013 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. + +######################################################################## +# Install python files and apps +######################################################################## +include(GrPython) + +VOLK_PYTHON_INSTALL( + FILES + __init__.py + cfg.py + volk_gnsssdr_modtool_generate.py + DESTINATION ${VOLK_PYTHON_DIR}/volk_gnsssdr_modtool + COMPONENT "volk_gnsssdr" +) + +VOLK_PYTHON_INSTALL( + PROGRAMS + volk_gnsssdr_modtool + DESTINATION ${VOLK_RUNTIME_DIR} + COMPONENT "volk_gnsssdr" +) diff --git a/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/README b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/README new file mode 100644 index 000000000..3820201c2 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/README @@ -0,0 +1,114 @@ +The volk_gnsssdr_modtool tool is installed along with VOLK as a way of helping +to construct, add to, and interogate the VOLK library or companion +libraries. + +volk_gnsssdr_modtool is installed into $prefix/bin. + +VOLK modtool enables creating standalone (out-of-tree) VOLK modules +and provides a few tools for sharing VOLK kernels between VOLK +modules. If you need to design or work with VOLK kernels away from +the canonical VOLK library, this is the tool. If you need to tailor +your own VOLK library for whatever reason, this is the tool. + +The canonical VOLK library installs a volk_gnsssdr.h and a libvolk_gnsssdr.so. Your +own library will install volk_gnsssdr_$name.h and libvolk_gnsssdr_$name.so. Ya Gronk? +Good. + +There isn't a substantial difference between the canonical VOLK +module and any other VOLK module. They're all peers. Any module +created via VOLK modtool will come complete with a default +volk_gnsssdr_modtool.cfg file associating the module with the base from which +it came, its distinctive $name and its destination (or path). These +values (created from user input if VOLK modtool runs without a +user-supplied config file or a default config file) serve as default +values for some VOLK modtool actions. It's more or less intended for +the user to change directories to the top level of a created VOLK +module and then run volk_gnsssdr_modtool to take advantage of the values +stored in the default volk_gnsssdr_modtool.cfg file. + +Apart from creating new VOLK modules, VOLK modtool allows you to list +the names of kernels in other modules, list the names of kernels in +the current module, add kernels from another module into the current +module, and remove kernels from the current module. When moving +kernels between modules, VOLK modtool does its best to keep the qa +and profiling code for those kernels intact. If the base has a test +or a profiling call for some kernel, those calls will follow the +kernel when VOLK modtool adds that kernel. If QA or profiling +requires a puppet kernel, the puppet kernel will follow the original +kernel when VOLK modtool adds that original kernel. VOLK modtool +respects puppets. + +====================================================================== + +Installing a new VOLK Library: + +Run the command "volk_gnsssdr_modtool -i". This will ask you three questions: + + name: // the name to give your VOLK library: volk_gnsssdr_ + destination: // directory new source tree is built under -- must exists. + // It will create /volk_gnsssdr_ + base: // the directory containing the original VOLK source code + +The name provided must be alphanumeric (and cannot start with a +number). No special characters including dashes and underscores are +allowed. + +This will build a new skeleton directory in the destination provided +with the name volk_gnsssdr_. It will contain the necessary structure to +build: + + mkdir build + cd build + cmake -DCMAKE_INSTALL_PREFIX=/opt/volk_gnsssdr ../ + make + sudo make install + +Right now, the library is empty and contains no kernels. Kernels can +be added from another VOLK library using the '-a' option. If not +specified, the kernel will be extracted from the base VOLK +directory. Using the '-b' allows us to specify another VOLK library to +use for this purpose. + + volk_gnsssdr_modtool -a -n 32fc_x2_conjugate_dot_prod_32fc + +This will put the code for the new kernel into +/volk_gnsssdr_/kernels/volk_gnsssdr_/ + +Other kernels must be added by hand. See the following webpages for +more information about creating VOLK kernels: + http://gnuradio.org/doc/doxygen/volk_gnsssdr_guide.html + http://gnuradio.org/redmine/projects/gnuradio/wiki/Volk + + +====================================================================== + +OPTIONS + +Options for Adding and Removing Kernels: + -a, --add_kernel + Add kernel from existing VOLK module. Uses the base VOLK module + unless -b is used. Use -n to specify the kernel name. + Requires: -n. + Optional: -b + + -A, --add_all_kernels + Add all kernels from existing VOLK module. Uses the base VOLK + module unless -b is used. + Optional: -b + + -x, --remove_kernel + Remove kernel from module. + Required: -n. + Optional: -b + +Options for Listing Kernels: + -l, --list + Lists all kernels available in the base VOLK module. + + -k, --kernels + Lists all kernels in this VOLK module. + + -r, --remote-list + Lists all kernels in another VOLK module that is specified + using the -b option. + diff --git a/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/__init__.py b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/__init__.py new file mode 100644 index 000000000..1d8fc6a3d --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/__init__.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# +# Copyright 2013 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from cfg import volk_gnsssdr_modtool_config +from volk_gnsssdr_modtool_generate import volk_gnsssdr_modtool diff --git a/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/__init__.pyc b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/__init__.pyc new file mode 100644 index 000000000..bb525bb1a Binary files /dev/null and b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/__init__.pyc differ diff --git a/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/cfg.py b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/cfg.py new file mode 100644 index 000000000..aa2ffbfdd --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/cfg.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# +# Copyright 2013 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +import ConfigParser +import sys +import os +import exceptions +import re + + +class volk_gnsssdr_modtool_config: + def key_val_sub(self, num, stuff, section): + return re.sub('\$' + 'k' + str(num), stuff[num][0], (re.sub('\$' + str(num), stuff[num][1], section[1][num]))); + + def verify(self): + for i in self.verification: + self.verify_section(i) + def remap(self): + for i in self.remapification: + self.verify_section(i) + + def verify_section(self, section): + stuff = self.cfg.items(section[0]) + for i in range(len(section[1])): + eval(self.key_val_sub(i, stuff, section)) + try: + val = eval(self.key_val_sub(i, stuff, section)) + if val == False: + raise exceptions.ValueError + except ValueError: + raise exceptions.ValueError('Verification function returns False... key:%s, val:%s'%(stuff[i][0], stuff[i][1])) + except: + raise exceptions.IOError('bad configuration... key:%s, val:%s'%(stuff[i][0], stuff[i][1])) + + + def __init__(self, cfg=None): + self.config_name = 'config' + self.config_defaults = ['name', 'destination', 'base'] + self.config_defaults_remap = ['1', + 'self.cfg.set(self.config_name, \'$k1\', os.path.realpath(os.path.expanduser(\'$1\')))', + 'self.cfg.set(self.config_name, \'$k2\', os.path.realpath(os.path.expanduser(\'$2\')))'] + + self.config_defaults_verify = ['re.match(\'[a-zA-Z0-9]+$\', \'$0\')', + 'os.path.exists(\'$1\')', + 'os.path.exists(\'$2\')'] + self.remapification = [(self.config_name, self.config_defaults_remap)] + self.verification = [(self.config_name, self.config_defaults_verify)] + default = os.path.join(os.getcwd(), 'volk_gnsssdr_modtool.cfg') + icfg = ConfigParser.RawConfigParser() + if cfg: + icfg.read(cfg) + elif os.path.exists(default): + icfg.read(default) + else: + print "Initializing config file..." + icfg.add_section(self.config_name) + for kn in self.config_defaults: + rv = raw_input("%s: "%(kn)) + icfg.set(self.config_name, kn, rv) + self.cfg = icfg + self.remap() + self.verify() + + + + def read_map(self, name, inp): + if self.cfg.has_section(name): + self.cfg.remove_section(name) + self.cfg.add_section(name) + for i in inp: + self.cfg.set(name, i, inp[i]) + + def get_map(self, name): + retval = {} + stuff = self.cfg.items(name) + for i in stuff: + retval[i[0]] = i[1] + return retval + + + + + + + diff --git a/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/cfg.pyc b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/cfg.pyc new file mode 100644 index 000000000..f3688fabf Binary files /dev/null and b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/cfg.pyc differ diff --git a/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/volk_gnsssdr_modtool b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/volk_gnsssdr_modtool new file mode 100644 index 000000000..304aad4ca --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/volk_gnsssdr_modtool @@ -0,0 +1,128 @@ +#!/usr/bin/env python +# +# Copyright 2013 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +from volk_gnsssdr_modtool import volk_gnsssdr_modtool, volk_gnsssdr_modtool_config +from optparse import OptionParser, OptionGroup + +import exceptions +import os +import sys + +if __name__ == '__main__': + parser = OptionParser(); + actions = OptionGroup(parser, 'Actions'); + actions.add_option('-i', '--install', action='store_true', + help='Create a new volk_gnsssdr module.') + parser.add_option('-b', '--base_path', action='store', default=None, + help='Base path for action. By default, volk_gnsssdr_modtool.cfg loads this value.') + parser.add_option('-n', '--kernel_name', action='store', default=None, + help='Kernel name for action. No default') + parser.add_option('-c', '--config', action='store', dest='config_file', default=None, + help='Config file for volk_gnsssdr_modtool. By default, volk_gnsssdr_modtool.cfg in the local directory will be used/created.') + actions.add_option('-a', '--add_kernel', action='store_true', + help='Add kernel from existing volk_gnsssdr module. Requires: -n. Optional: -b') + actions.add_option('-A', '--add_all_kernels', action='store_true', + help='Add all kernels from existing volk_gnsssdr module. Optional: -b') + actions.add_option('-x', '--remove_kernel', action='store_true', + help='Remove kernel from module. Required: -n. Optional: -b') + actions.add_option('-l', '--list', action='store_true', + help='List all kernels in the base.') + actions.add_option('-k', '--kernels', action='store_true', + help='List all kernels in the module.') + actions.add_option('-r', '--remote_list', action='store_true', + help='List all available kernels in remote volk_gnsssdr module. Requires: -b.') + actions.add_option('-m', '--moo', action='store_true', + help='Have you mooed today?') + parser.add_option_group(actions) + + (options, args) = parser.parse_args(); + if len(sys.argv) < 2: + parser.print_help() + + elif options.moo: + print " (__) " + print " (oo) " + print " /------\/ " + print " / | || " + print " * /\---/\ " + print " ~~ ~~ " + + else: + my_cfg = volk_gnsssdr_modtool_config(options.config_file); + + my_modtool = volk_gnsssdr_modtool(my_cfg.get_map(my_cfg.config_name)); + + + if options.install: + my_modtool.make_module_skeleton(); + my_modtool.write_default_cfg(my_cfg.cfg); + + + if options.add_kernel: + if not options.kernel_name: + raise exceptions.IOError("This action requires the -n option."); + else: + name = options.kernel_name; + if options.base_path: + base = options.base_path; + else: + base = my_cfg.cfg.get(my_cfg.config_name, 'base'); + my_modtool.import_kernel(name, base); + + if options.remove_kernel: + if not options.kernel_name: + raise exceptions.IOError("This action requires the -n option."); + else: + name = options.kernel_name; + my_modtool.remove_kernel(name); + + if options.add_all_kernels: + + if options.base_path: + base = options.base_path; + else: + base = my_cfg.cfg.get(my_cfg.config_name, 'base'); + kernelset = my_modtool.get_current_kernels(base); + for i in kernelset: + my_modtool.import_kernel(i, base); + + if options.remote_list: + if not options.base_path: + raise exceptions.IOError("This action requires the -b option. Try -l or -k for listing kernels in the base or the module.") + else: + base = options.base_path; + kernelset = my_modtool.get_current_kernels(base); + for i in kernelset: + print i; + + if options.list: + kernelset = my_modtool.get_current_kernels(); + for i in kernelset: + print i; + + if options.kernels: + dest = my_cfg.cfg.get(my_cfg.config_name, 'destination'); + name = my_cfg.cfg.get(my_cfg.config_name, 'name'); + base = os.path.join(dest, 'volk_gnsssdr_' + name); + kernelset = my_modtool.get_current_kernels(base); + for i in kernelset: + print i; diff --git a/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/volk_gnsssdr_modtool_generate.py b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/volk_gnsssdr_modtool_generate.py new file mode 100644 index 000000000..a613a2171 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/volk_gnsssdr_modtool_generate.py @@ -0,0 +1,330 @@ +# +# Copyright 2013 Free Software Foundation, Inc. +# +# This file is part of GNU Radio +# +# GNU Radio is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GNU Radio is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Radio; see the file COPYING. If not, write to +# the Free Software Foundation, Inc., 51 Franklin Street, +# Boston, MA 02110-1301, USA. +# + +import os +import glob +import sys +import re +import glob +import shutil +import exceptions +from sets import Set + +class volk_gnsssdr_modtool: + def __init__(self, cfg): + self.volk_gnsssdr = re.compile('volk_gnsssdr'); + self.remove_after_underscore = re.compile("_.*"); + self.volk_gnsssdr_run_tests = re.compile('^\s*VOLK_RUN_TESTS.*\n', re.MULTILINE); + self.volk_gnsssdr_profile = re.compile('^\s*(VOLK_PROFILE|VOLK_PUPPET_PROFILE).*\n', re.MULTILINE); + self.my_dict = cfg; + self.lastline = re.compile('\s*char path\[1024\];.*'); + self.badassert = re.compile('^\s*assert\(toked\[0\] == "volk_gnsssdr_.*\n', re.MULTILINE); + self.goodassert = ' assert(toked[0] == "volk_gnsssdr");\n' + self.baderase = re.compile('^\s*toked.erase\(toked.begin\(\)\);.*\n', re.MULTILINE); + self.gooderase = ' toked.erase(toked.begin());\n toked.erase(toked.begin());\n'; + + def get_basename(self, base=None): + if not base: + base = self.my_dict['base'] + candidate = base.split('/')[-1]; + if len(candidate.split('_')) == 1: + return ''; + else: + return candidate.split('_')[-1]; + + def get_current_kernels(self, base=None): + if not base: + base = self.my_dict['base'] + name = self.get_basename(); + else: + name = self.get_basename(base); + if name == '': + hdr_files = glob.glob(os.path.join(base, "kernels/volk_gnsssdr/*.h")); + begins = re.compile("(?<=volk_gnsssdr_).*") + else: + hdr_files = glob.glob(os.path.join(base, "kernels/volk_gnsssdr_" + name + "/*.h")); + begins = re.compile("(?<=volk_gnsssdr_" + name + "_).*") + + datatypes = []; + functions = []; + + + for line in hdr_files: + + subline = re.search(".*\.h.*", os.path.basename(line)) + if subline: + subsubline = begins.search(subline.group(0)); + if subsubline: + dtype = self.remove_after_underscore.sub("", subsubline.group(0)); + subdtype = re.search("[0-9]+[A-z]+", dtype); + if subdtype: + datatypes.append(subdtype.group(0)); + + + datatypes = set(datatypes); + + for line in hdr_files: + for dt in datatypes: + if dt in line: + #subline = re.search("(?<=volk_gnsssdr_)" + dt + ".*(?=\.h)", line); + subline = re.search(begins.pattern[:-2] + dt + ".*(?=\.h)", line); + if subline: + functions.append(subline.group(0)); + + return set(functions); + + def make_module_skeleton(self): + + dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name']) + if os.path.exists(dest): + raise exceptions.IOError("Destination %s already exits!"%(dest)); + + if not os.path.exists(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'kernels/volk_gnsssdr_' + self.my_dict['name'])): + os.makedirs(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'kernels/volk_gnsssdr_' + self.my_dict['name'])) + + current_kernel_names = self.get_current_kernels(); + + for root, dirnames, filenames in os.walk(self.my_dict['base']): + for name in filenames: + t_table = map(lambda a: re.search(a, name), current_kernel_names); + t_table = set(t_table); + if t_table == set([None]): + infile = os.path.join(root, name); + instring = open(infile, 'r').read(); + outstring = re.sub(self.volk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], instring); + newname = re.sub(self.volk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], name); + relpath = os.path.relpath(infile, self.my_dict['base']); + newrelpath = re.sub(self.volk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], relpath); + dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], os.path.dirname(newrelpath), newname); + + if not os.path.exists(os.path.dirname(dest)): + os.makedirs(os.path.dirname(dest)) + open(dest, 'w+').write(outstring); + + + infile = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/testqa.cc'); + instring = open(infile, 'r').read(); + outstring = re.sub(self.volk_gnsssdr_run_tests, '', instring); + open(infile, 'w+').write(outstring); + + infile = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'apps/volk_gnsssdr_' + self.my_dict['name'] + '_profile.cc'); + instring = open(infile, 'r').read(); + outstring = re.sub(self.volk_gnsssdr_profile, '', instring); + open(infile, 'w+').write(outstring); + + infile = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/qa_utils.cc'); + instring = open(infile, 'r').read(); + outstring = re.sub(self.badassert, self.goodassert, instring); + outstring = re.sub(self.baderase, self.gooderase, outstring); + open(infile, 'w+').write(outstring); + + def write_default_cfg(self, cfg): + outfile = open(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'volk_gnsssdr_modtool.cfg'), 'wb'); + cfg.write(outfile); + outfile.close(); + + + def convert_kernel(self, oldvolk_gnsssdr, name, base, inpath, top): + infile = os.path.join(inpath, 'kernels/' + top[:-1] + '/' + top + name + '.h'); + instring = open(infile, 'r').read(); + outstring = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], instring); + newname = 'volk_gnsssdr_' + self.my_dict['name'] + '_' + name + '.h'; + relpath = os.path.relpath(infile, base); + newrelpath = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], relpath); + dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], os.path.dirname(newrelpath), newname); + + if not os.path.exists(os.path.dirname(dest)): + os.makedirs(os.path.dirname(dest)) + open(dest, 'w+').write(outstring); + + # copy orc proto-kernels if they exist + for orcfile in glob.glob(inpath + '/orc/' + top + name + '*.orc'): + if os.path.isfile(orcfile): + instring = open(orcfile, 'r').read(); + outstring = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], instring); + newname = 'volk_gnsssdr_' + self.my_dict['name'] + '_' + name + '.orc'; + relpath = os.path.relpath(orcfile, base); + newrelpath = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], relpath); + dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], os.path.dirname(newrelpath), newname); + if not os.path.exists(os.path.dirname(dest)): + os.makedirs(os.path.dirname(dest)); + open(dest, 'w+').write(outstring) + + + def remove_kernel(self, name): + basename = self.my_dict['name']; + if len(basename) > 0: + top = 'volk_gnsssdr_' + basename + '_'; + else: + top = 'volk_gnsssdr_' + base = os.path.join(self.my_dict['destination'], top[:-1]) ; + + if not name in self.get_current_kernels(): + + raise exceptions.IOError("Requested kernel %s is not in module %s"%(name,base)); + + + + inpath = os.path.abspath(base); + + + kernel = re.compile(name) + search_kernels = Set([kernel]) + profile = re.compile('^\s*VOLK_PROFILE') + puppet = re.compile('^\s*VOLK_PUPPET') + src_dest = os.path.join(inpath, 'apps/', top[:-1] + '_profile.cc'); + infile = open(src_dest); + otherlines = infile.readlines(); + open(src_dest, 'w+').write(''); + + for otherline in otherlines: + write_okay = True; + if kernel.search(otherline): + write_okay = False; + if puppet.match(otherline): + args = re.search("(?<=VOLK_PUPPET_PROFILE).*", otherline) + m_func = args.group(0).split(',')[0]; + func = re.search('(?<=' + top + ').*', m_func); + search_kernels.add(re.compile(func.group(0))); + if write_okay: + open(src_dest, 'a').write(otherline); + + + src_dest = os.path.join(inpath, 'lib/testqa.cc') + infile = open(src_dest); + otherlines = infile.readlines(); + open(src_dest, 'w+').write(''); + + for otherline in otherlines: + write_okay = True; + + for kernel in search_kernels: + if kernel.search(otherline): + write_okay = False; + + if write_okay: + open(src_dest, 'a').write(otherline); + + for kernel in search_kernels: + infile = os.path.join(inpath, 'kernels/' + top[:-1] + '/' + top + kernel.pattern + '.h'); + print "Removing kernel %s"%(kernel.pattern) + if os.path.exists(infile): + os.remove(infile); + # remove the orc proto-kernels if they exist. There are no puppets here + # so just need to glob for files matching kernel name + print glob.glob(inpath + '/orc/' + top + name + '*.orc'); + for orcfile in glob.glob(inpath + '/orc/' + top + name + '*.orc'): + print orcfile + if(os.path.exists(orcfile)): + os.remove(orcfile); + + def import_kernel(self, name, base): + if not (base): + base = self.my_dict['base']; + basename = self.getbasename(); + else: + basename = self.get_basename(base); + if not name in self.get_current_kernels(base): + raise exceptions.IOError("Requested kernel %s is not in module %s"%(name,base)); + + inpath = os.path.abspath(base); + if len(basename) > 0: + top = 'volk_gnsssdr_' + basename + '_'; + else: + top = 'volk_gnsssdr_' + oldvolk_gnsssdr = re.compile(top[:-1]); + + self.convert_kernel(oldvolk_gnsssdr, name, base, inpath, top); + + kernel = re.compile(name) + search_kernels = Set([kernel]) + + profile = re.compile('^\s*VOLK_PROFILE') + puppet = re.compile('^\s*VOLK_PUPPET') + infile = open(os.path.join(inpath, 'apps/', oldvolk_gnsssdr.pattern + '_profile.cc')); + otherinfile = open(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'apps/volk_gnsssdr_' + self.my_dict['name'] + '_profile.cc')); + dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'apps/volk_gnsssdr_' + self.my_dict['name'] + '_profile.cc'); + lines = infile.readlines(); + otherlines = otherinfile.readlines(); + open(dest, 'w+').write(''); + insert = False; + inserted = False + for otherline in otherlines: + + if self.lastline.match(otherline): + insert = True; + if insert and not inserted: + inserted = True; + for line in lines: + if kernel.search(line): + if profile.match(line): + outline = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], line); + open(dest, 'a').write(outline); + elif puppet.match(line): + outline = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], line); + open(dest, 'a').write(outline); + args = re.search("(?<=VOLK_PUPPET_PROFILE).*", line) + m_func = args.group(0).split(',')[0]; + func = re.search('(?<=' + top + ').*', m_func); + search_kernels.add(re.compile(func.group(0))); + self.convert_kernel(oldvolk_gnsssdr, func.group(0), base, inpath, top); + write_okay = True; + for kernel in search_kernels: + if kernel.search(otherline): + write_okay = False + if write_okay: + open(dest, 'a').write(otherline); + + for kernel in search_kernels: + print "Adding kernel %s from module %s"%(kernel.pattern,base) + + infile = open(os.path.join(inpath, 'lib/testqa.cc')); + otherinfile = open(os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/testqa.cc')); + dest = os.path.join(self.my_dict['destination'], 'volk_gnsssdr_' + self.my_dict['name'], 'lib/testqa.cc'); + lines = infile.readlines(); + otherlines = otherinfile.readlines(); + open(dest, 'w+').write(''); + inserted = False; + insert = False + for otherline in otherlines: + + if (re.match('\s*', otherline) == None or re.match('\s*#.*', otherline) == None): + + insert = True; + if insert and not inserted: + inserted = True; + for line in lines: + for kernel in search_kernels: + if kernel.search(line): + if self.volk_gnsssdr_run_tests.match(line): + outline = re.sub(oldvolk_gnsssdr, 'volk_gnsssdr_' + self.my_dict['name'], line); + open(dest, 'a').write(outline); + write_okay = True; + for kernel in search_kernels: + if kernel.search(otherline): + write_okay = False + if write_okay: + open(dest, 'a').write(otherline); + + + + + diff --git a/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/volk_gnsssdr_modtool_generate.pyc b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/volk_gnsssdr_modtool_generate.pyc new file mode 100644 index 000000000..67cee0681 Binary files /dev/null and b/src/algorithms/libs/volk_gnsssdr/python/volk_gnsssdr_modtool/volk_gnsssdr_modtool_generate.pyc differ diff --git a/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.c b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.c new file mode 100644 index 000000000..53dfaa97b --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.c @@ -0,0 +1,212 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include +#include "volk_gnsssdr_machines.h" +#include +#include +#include "volk_gnsssdr_rank_archs.h" +#include +#include +#include +#include + +static size_t __alignment = 0; +static intptr_t __alignment_mask = 0; + +struct volk_gnsssdr_machine *get_machine(void) +{ + extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[]; + extern unsigned int n_volk_gnsssdr_machines; + static struct volk_gnsssdr_machine *machine = NULL; + + if(machine != NULL) + return machine; + else { + unsigned int max_score = 0; + unsigned int i; + struct volk_gnsssdr_machine *max_machine = NULL; + for(i=0; icaps & (~volk_gnsssdr_get_lvarch()))) { + if(volk_gnsssdr_machines[i]->caps > max_score) { + max_score = volk_gnsssdr_machines[i]->caps; + max_machine = volk_gnsssdr_machines[i]; + } + } + } + machine = max_machine; + printf("Using Volk machine: %s\n", machine->name); + __alignment = machine->alignment; + __alignment_mask = (intptr_t)(__alignment-1); + return machine; + } +} + +void volk_gnsssdr_list_machines(void) +{ + extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[]; + extern unsigned int n_volk_gnsssdr_machines; + + unsigned int i; + for(i=0; icaps & (~volk_gnsssdr_get_lvarch()))) { + printf("%s;", volk_gnsssdr_machines[i]->name); + } + } + printf("\n"); +} + +const char* volk_gnsssdr_get_machine(void) +{ + extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[]; + extern unsigned int n_volk_gnsssdr_machines; + static struct volk_gnsssdr_machine *machine = NULL; + + if(machine != NULL) + return machine->name; + else { + unsigned int max_score = 0; + unsigned int i; + struct volk_gnsssdr_machine *max_machine = NULL; + for(i=0; icaps & (~volk_gnsssdr_get_lvarch()))) { + if(volk_gnsssdr_machines[i]->caps > max_score) { + max_score = volk_gnsssdr_machines[i]->caps; + max_machine = volk_gnsssdr_machines[i]; + } + } + } + machine = max_machine; + return machine->name; + } +} + +size_t volk_gnsssdr_get_alignment(void) +{ + get_machine(); //ensures alignment is set + return __alignment; +} + +bool volk_gnsssdr_is_aligned(const void *ptr) +{ + return ((intptr_t)(ptr) & __alignment_mask) == 0; +} + +#define LV_HAVE_GENERIC +#define LV_HAVE_DISPATCHER + +#for $kern in $kernels + +#if $kern.has_dispatcher +#include //pulls in the dispatcher +#end if + +static inline void __$(kern.name)_d($kern.arglist_full) +{ + #if $kern.has_dispatcher + $(kern.name)_dispatcher($kern.arglist_names); + return; + #end if + + if (volk_gnsssdr_is_aligned( + #set $num_open_parens = 0 + #for $arg_type, $arg_name in $kern.args + #if '*' in $arg_type + VOLK_OR_PTR($arg_name, + #set $num_open_parens += 1 + #end if + #end for + 0$(')'*$num_open_parens) + )){ + $(kern.name)_a($kern.arglist_names); + } + else{ + $(kern.name)_u($kern.arglist_names); + } +} + +static inline void __init_$(kern.name)(void) +{ + const char *name = get_machine()->$(kern.name)_name; + const char **impl_names = get_machine()->$(kern.name)_impl_names; + const int *impl_deps = get_machine()->$(kern.name)_impl_deps; + const bool *alignment = get_machine()->$(kern.name)_impl_alignment; + const size_t n_impls = get_machine()->$(kern.name)_n_impls; + const size_t index_a = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, true/*aligned*/); + const size_t index_u = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, false/*unaligned*/); + $(kern.name)_a = get_machine()->$(kern.name)_impls[index_a]; + $(kern.name)_u = get_machine()->$(kern.name)_impls[index_u]; + + assert($(kern.name)_a); + assert($(kern.name)_u); + + $(kern.name) = &__$(kern.name)_d; +} + +static inline void __$(kern.name)_a($kern.arglist_full) +{ + __init_$(kern.name)(); + $(kern.name)_a($kern.arglist_names); +} + +static inline void __$(kern.name)_u($kern.arglist_full) +{ + __init_$(kern.name)(); + $(kern.name)_u($kern.arglist_names); +} + +static inline void __$(kern.name)($kern.arglist_full) +{ + __init_$(kern.name)(); + $(kern.name)($kern.arglist_names); +} + +$kern.pname $(kern.name)_a = &__$(kern.name)_a; +$kern.pname $(kern.name)_u = &__$(kern.name)_u; +$kern.pname $(kern.name) = &__$(kern.name); + +void $(kern.name)_manual($kern.arglist_full, const char* impl_name) +{ + const int index = volk_gnsssdr_get_index( + get_machine()->$(kern.name)_impl_names, + get_machine()->$(kern.name)_n_impls, + impl_name + ); + get_machine()->$(kern.name)_impls[index]( + $kern.arglist_names + ); +} + +volk_gnsssdr_func_desc_t $(kern.name)_get_func_desc(void) { + const char **impl_names = get_machine()->$(kern.name)_impl_names; + const int *impl_deps = get_machine()->$(kern.name)_impl_deps; + const bool *alignment = get_machine()->$(kern.name)_impl_alignment; + const size_t n_impls = get_machine()->$(kern.name)_n_impls; + volk_gnsssdr_func_desc_t desc = { + impl_names, + impl_deps, + alignment, + n_impls + }; + return desc; +} + +#end for diff --git a/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.h b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.h new file mode 100644 index 000000000..16d0934e9 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr.tmpl.h @@ -0,0 +1,94 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_RUNTIME +#define INCLUDED_VOLK_RUNTIME + +#include +#include +#include +#include +#include + +#include +#include + +__VOLK_DECL_BEGIN + +typedef struct volk_gnsssdr_func_desc +{ + const char **impl_names; + const int *impl_deps; + const bool *impl_alignment; + const size_t n_impls; +} volk_gnsssdr_func_desc_t; + +//! Prints a list of machines available +VOLK_API void volk_gnsssdr_list_machines(void); + +//! Returns the name of the machine this instance will use +VOLK_API const char* volk_gnsssdr_get_machine(void); + +//! Get the machine alignment in bytes +VOLK_API size_t volk_gnsssdr_get_alignment(void); + +/*! + * The VOLK_OR_PTR macro is a convenience macro + * for checking the alignment of a set of pointers. + * Example usage: + * volk_gnsssdr_is_aligned(VOLK_OR_PTR((VOLK_OR_PTR(p0, p1), p2))) + */ +#define VOLK_OR_PTR(ptr0, ptr1) \ + (const void *)(((intptr_t)(ptr0)) | ((intptr_t)(ptr1))) + +/*! + * Is the pointer on a machine alignment boundary? + * + * Note: for performance reasons, this function + * is not usable until another volk_gnsssdr API call is made + * which will perform certain initialization tasks. + * + * \param ptr the pointer to some memory buffer + * \return 1 for alignment boundary, else 0 + */ +VOLK_API bool volk_gnsssdr_is_aligned(const void *ptr); + +#for $kern in $kernels + +//! A function pointer to the dispatcher implementation +extern VOLK_API $kern.pname $kern.name; + +//! A function pointer to the fastest aligned implementation +extern VOLK_API $kern.pname $(kern.name)_a; + +//! A function pointer to the fastest unaligned implementation +extern VOLK_API $kern.pname $(kern.name)_u; + +//! Call into a specific implementation given by name +extern VOLK_API void $(kern.name)_manual($kern.arglist_full, const char* impl_name); + +//! Get description paramaters for this kernel +extern VOLK_API volk_gnsssdr_func_desc_t $(kern.name)_get_func_desc(void); +#end for + +__VOLK_DECL_END + +#endif /*INCLUDED_VOLK_RUNTIME*/ diff --git a/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_config_fixed.tmpl.h b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_config_fixed.tmpl.h new file mode 100644 index 000000000..e1c01ae77 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_config_fixed.tmpl.h @@ -0,0 +1,29 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_CONFIG_FIXED_H +#define INCLUDED_VOLK_CONFIG_FIXED_H + +#for $i, $arch in enumerate($archs) +#define LV_$(arch.name.upper()) $i +#end for + +#endif /*INCLUDED_VOLK_CONFIG_FIXED*/ diff --git a/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_cpu.tmpl.c b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_cpu.tmpl.c new file mode 100644 index 000000000..cc58d9ebf --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_cpu.tmpl.c @@ -0,0 +1,191 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include +#include +#include + +struct VOLK_CPU volk_gnsssdr_cpu; + +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) + #define VOLK_CPU_x86 +#endif + +#if defined(VOLK_CPU_x86) + +//implement get cpuid for gcc compilers using a system or local copy of cpuid.h +#if defined(__GNUC__) + #if defined(HAVE_CPUID_H) + #include + #else + #include "gcc_x86_cpuid.h" + #endif + #define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3) + + /* Return Intel AVX extended CPU capabilities register. + * This function will bomb on non-AVX-capable machines, so + * check for AVX capability before executing. + */ + #if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) && defined(HAVE_XGETBV) + static inline unsigned long long _xgetbv(unsigned int index){ + unsigned int eax, edx; + __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); + return ((unsigned long long)edx << 32) | eax; + } + #define __xgetbv() _xgetbv(0) + #else + #define __xgetbv() 0 + #endif + +//implement get cpuid for MSVC compilers using __cpuid intrinsic +#elif defined(_MSC_VER) && defined(HAVE_INTRIN_H) + #include + #define cpuid_x86(op, r) __cpuid(((int*)r), op) + + #if defined(_XCR_XFEATURE_ENABLED_MASK) + #define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) + #else + #define __xgetbv() 0 + #endif + +#else + #error "A get cpuid for volk_gnsssdr is not available on this compiler..." +#endif //defined(__GNUC__) + +#endif //defined(VOLK_CPU_x86) + +static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) { +#if defined(VOLK_CPU_x86) + unsigned int regs[4]; + cpuid_x86(op, regs); + return regs[reg] >> bit & 0x01; +#else + return 0; +#endif +} + +static inline unsigned int check_extended_cpuid(unsigned int val) { +#if defined(VOLK_CPU_x86) + unsigned int regs[4]; + cpuid_x86(0x80000000, regs); + return regs[0] >= val; +#else + return 0; +#endif +} + +static inline unsigned int get_avx_enabled(void) { +#if defined(VOLK_CPU_x86) + return __xgetbv() & 0x6; +#else + return 0; +#endif +} + +//neon detection is linux specific +#if defined(__arm__) && defined(__linux__) + #include + #include + #include + #define VOLK_CPU_ARM +#endif + +static int has_neon(void){ +#if defined(VOLK_CPU_ARM) + FILE *auxvec_f; + unsigned long auxvec[2]; + unsigned int found_neon = 0; + auxvec_f = fopen("/proc/self/auxv", "rb"); + if(!auxvec_f) return 0; + + size_t r = 1; + //so auxv is basically 32b of ID and 32b of value + //so it goes like this + while(!found_neon && r) { + r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f); + if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON)) + found_neon = 1; + } + + fclose(auxvec_f); + return found_neon; +#else + return 0; +#endif +} + +static int has_ppc(void){ +#ifdef __PPC__ + return 1; +#else + return 0; +#endif +} + +#for $arch in $archs +static int i_can_has_$arch.name (void) { + #for $check, $params in $arch.checks + if ($(check)($(', '.join($params))) == 0) return 0; + #end for + return 1; +} + +#end for + +#if defined(HAVE_FENV_H) + #if defined(FE_TONEAREST) + #include + static inline void set_float_rounding(void){ + fesetround(FE_TONEAREST); + } + #else + static inline void set_float_rounding(void){ + //do nothing + } + #endif +#elif defined(_MSC_VER) + #include + static inline void set_float_rounding(void){ + unsigned int cwrd; + _controlfp_s(&cwrd, 0, 0); + _controlfp_s(&cwrd, _RC_NEAR, _MCW_RC); + } +#else + static inline void set_float_rounding(void){ + //do nothing + } +#endif + +void volk_gnsssdr_cpu_init() { + #for $arch in $archs + volk_gnsssdr_cpu.has_$arch.name = &i_can_has_$arch.name; + #end for + set_float_rounding(); +} + +unsigned int volk_gnsssdr_get_lvarch() { + unsigned int retval = 0; + volk_gnsssdr_cpu_init(); + #for $arch in $archs + retval += volk_gnsssdr_cpu.has_$(arch.name)() << LV_$(arch.name.upper()); + #end for + return retval; +} diff --git a/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_cpu.tmpl.h b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_cpu.tmpl.h new file mode 100644 index 000000000..5e6e2bb6a --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_cpu.tmpl.h @@ -0,0 +1,42 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_CPU_H +#define INCLUDED_VOLK_CPU_H + +#include + +__VOLK_DECL_BEGIN + +struct VOLK_CPU { + #for $arch in $archs + int (*has_$arch.name) (); + #end for +}; + +extern struct VOLK_CPU volk_gnsssdr_cpu; + +void volk_gnsssdr_cpu_init (); +unsigned int volk_gnsssdr_get_lvarch (); + +__VOLK_DECL_END + +#endif /*INCLUDED_VOLK_CPU_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_machine_xxx.tmpl.c b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_machine_xxx.tmpl.c new file mode 100644 index 000000000..36b61da4f --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_machine_xxx.tmpl.c @@ -0,0 +1,79 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#set $this_machine = $machine_dict[$args[0]] +#set $arch_names = $this_machine.arch_names + +#for $arch in $this_machine.archs +#define LV_HAVE_$(arch.name.upper()) 1 +#end for + +#include +#include "volk_gnsssdr_machines.h" +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#for $kern in $kernels +#include +#end for + +######################################################################## +#def make_arch_have_list($archs) +$(' | '.join(['(1 << LV_%s)'%a.name.upper() for a in $archs]))#slurp +#end def + +######################################################################## +#def make_impl_name_list($impls) +{$(', '.join(['"%s"'%i.name for i in $impls]))}#slurp +#end def + +######################################################################## +#def make_impl_align_list($impls) +{$(', '.join(['true' if i.is_aligned else 'false' for i in $impls]))}#slurp +#end def + +######################################################################## +#def make_impl_deps_list($impls) +{$(', '.join([' | '.join(['(1 << LV_%s)'%d.upper() for d in i.deps]) for i in $impls]))}#slurp +#end def + +######################################################################## +#def make_impl_fcn_list($name, $impls) +{$(', '.join(['%s_%s'%($name, i.name) for i in $impls]))}#slurp +#end def + +struct volk_gnsssdr_machine volk_gnsssdr_machine_$(this_machine.name) = { + $make_arch_have_list($this_machine.archs), + "$this_machine.name", + $this_machine.alignment, + #for $kern in $kernels + #set $impls = $kern.get_impls($arch_names) + "$kern.name", ##//kernel name + $make_impl_name_list($impls), ##//list of kernel implementations by name + $make_impl_deps_list($impls), ##//list of arch dependencies per implementation + $make_impl_align_list($impls), ##//alignment required? for each implementation + $make_impl_fcn_list($kern.name, $impls), ##//pointer to each implementation + $(len($impls)), ##//number of implementations listed here + #end for +}; diff --git a/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_machines.tmpl.c b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_machines.tmpl.c new file mode 100644 index 000000000..64e436010 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_machines.tmpl.c @@ -0,0 +1,34 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#include +#include +#include "volk_gnsssdr_machines.h" + +struct volk_gnsssdr_machine *volk_gnsssdr_machines[] = { +#for $machine in $machines +#ifdef LV_MACHINE_$(machine.name.upper()) +&volk_gnsssdr_machine_$(machine.name), +#endif +#end for +}; + +unsigned int n_volk_gnsssdr_machines = sizeof(volk_gnsssdr_machines)/sizeof(*volk_gnsssdr_machines); diff --git a/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_machines.tmpl.h b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_machines.tmpl.h new file mode 100644 index 000000000..98edb724e --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_machines.tmpl.h @@ -0,0 +1,55 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_LIBVOLK_MACHINES_H +#define INCLUDED_LIBVOLK_MACHINES_H + +#include +#include + +#include +#include + +__VOLK_DECL_BEGIN + +struct volk_gnsssdr_machine { + const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_gnsssdr_get_lvarch format) + const char *name; + const size_t alignment; //the maximum byte alignment required for functions in this library + #for $kern in $kernels + const char *$(kern.name)_name; + const char *$(kern.name)_impl_names[$(len($archs))]; + const int $(kern.name)_impl_deps[$(len($archs))]; + const bool $(kern.name)_impl_alignment[$(len($archs))]; + const $(kern.pname) $(kern.name)_impls[$(len($archs))]; + const size_t $(kern.name)_n_impls; + #end for +}; + +#for $machine in $machines +#ifdef LV_MACHINE_$(machine.name.upper()) +extern struct volk_gnsssdr_machine volk_gnsssdr_machine_$(machine.name); +#endif +#end for + +__VOLK_DECL_END + +#endif //INCLUDED_LIBVOLK_MACHINES_H diff --git a/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_typedefs.tmpl.h b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_typedefs.tmpl.h new file mode 100644 index 000000000..1de950b61 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/tmpl/volk_gnsssdr_typedefs.tmpl.h @@ -0,0 +1,32 @@ +/* + * Copyright 2011-2012 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef INCLUDED_VOLK_TYPEDEFS +#define INCLUDED_VOLK_TYPEDEFS + +#include +#include + +#for $kern in $kernels +typedef void (*$(kern.pname))($kern.arglist_types); +#end for + +#endif /*INCLUDED_VOLK_TYPEDEFS*/ diff --git a/src/algorithms/libs/volk_gnsssdr/volk_gnsssdr.pc.in b/src/algorithms/libs/volk_gnsssdr/volk_gnsssdr.pc.in new file mode 100644 index 000000000..bc2c2e425 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/volk_gnsssdr.pc.in @@ -0,0 +1,14 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ +LV_CXXFLAGS=@LV_CXXFLAGS@ + + +Name: volk_gnsssdr +Description: VOLK: Vector Optimized Library of Kernels +Requires: +Version: @VERSION@ +Libs: -L${libdir} -lvolk_gnsssdr +Cflags: -I${includedir} ${LV_CXXFLAGS} + diff --git a/src/algorithms/libs/volk_gnsssdr/volk_modtool.cfg b/src/algorithms/libs/volk_gnsssdr/volk_modtool.cfg new file mode 100644 index 000000000..c47ac2444 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr/volk_modtool.cfg @@ -0,0 +1,5 @@ +[config] +name = gnsssdr +destination = /Users/andres/Github/gnss-sdr/src/algorithms/libs +base = /Users/andres/github/gnuradio/volk +