mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-12-14 20:20:35 +00:00
Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next
This commit is contained in:
commit
0507618736
@ -19,7 +19,7 @@ ControlThread.wait_for_flowgraph=false
|
||||
SignalSource.implementation=Nsr_File_Signal_Source
|
||||
|
||||
;#filename: path to file with the captured GNSS signal samples to be processed
|
||||
SignalSource.filename=/datalogger/signals/ifen/E1L1_FE0_Band0.stream
|
||||
SignalSource.filename=/media/javier/SISTEMA/signals/ifen/E1L1_FE0_Band0.stream
|
||||
|
||||
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
|
||||
SignalSource.item_type=byte
|
||||
@ -150,7 +150,7 @@ Resampler.implementation=Pass_Through
|
||||
;#count: Number of available GPS satellite channels.
|
||||
Channels_1C.count=8
|
||||
;#count: Number of available Galileo satellite channels.
|
||||
Channels_1B.count=8
|
||||
Channels_1B.count=0
|
||||
;#in_acquisition: Number of channels simultaneously acquiring for the whole receiver
|
||||
Channels.in_acquisition=1
|
||||
|
||||
@ -193,14 +193,15 @@ Acquisition_1C.if=0
|
||||
Acquisition_1C.sampled_ms=1
|
||||
;#implementation: Acquisition algorithm selection for this channel: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition]
|
||||
Acquisition_1C.implementation=GPS_L1_CA_PCPS_Acquisition
|
||||
Acquisition_1C.use_CFAR_algorithm=false;
|
||||
;#threshold: Acquisition threshold
|
||||
Acquisition_1C.threshold=0.0075
|
||||
Acquisition_1C.threshold=40
|
||||
;#pfa: Acquisition false alarm probability. This option overrides the threshold option. Only use with implementations: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition]
|
||||
;Acquisition_1C.pfa=0.01
|
||||
;#doppler_max: Maximum expected Doppler shift [Hz]
|
||||
Acquisition_1C.doppler_max=10000
|
||||
;#doppler_max: Doppler step in the grid search [Hz]
|
||||
Acquisition_1C.doppler_step=500
|
||||
Acquisition_1C.doppler_step=250
|
||||
|
||||
|
||||
;######### GALILEO ACQUISITION CONFIG ############
|
||||
@ -229,7 +230,7 @@ Acquisition_1B.doppler_step=125
|
||||
;######### TRACKING GPS CONFIG ############
|
||||
|
||||
;#implementation: Selected tracking algorithm: [GPS_L1_CA_DLL_PLL_Tracking] or [GPS_L1_CA_DLL_FLL_PLL_Tracking] or [GPS_L1_CA_TCP_CONNECTOR_Tracking] or [Galileo_E1_DLL_PLL_VEML_Tracking]
|
||||
Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_Tracking
|
||||
Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_C_Aid_Tracking
|
||||
;#item_type: Type and resolution for each of the signal samples. Use only [gr_complex] in this version.
|
||||
Tracking_1C.item_type=gr_complex
|
||||
|
||||
@ -237,19 +238,19 @@ Tracking_1C.item_type=gr_complex
|
||||
Tracking_1C.if=0
|
||||
|
||||
;#dump: Enable or disable the Tracking internal binary data file logging [true] or [false]
|
||||
Tracking_1C.dump=false
|
||||
Tracking_1C.dump=true
|
||||
|
||||
;#dump_filename: Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
|
||||
Tracking_1C.dump_filename=../data/epl_tracking_ch_
|
||||
|
||||
;#pll_bw_hz: PLL loop filter bandwidth [Hz]
|
||||
Tracking_1C.pll_bw_hz=45.0;
|
||||
Tracking_1C.pll_bw_hz=40;
|
||||
|
||||
;#dll_bw_hz: DLL loop filter bandwidth [Hz]
|
||||
Tracking_1C.dll_bw_hz=2.0;
|
||||
Tracking_1C.dll_bw_hz=2.5;
|
||||
|
||||
;#fll_bw_hz: FLL loop filter bandwidth [Hz]
|
||||
Tracking_1C.fll_bw_hz=10.0;
|
||||
Tracking_1C.fll_bw_hz=2.0;
|
||||
|
||||
;#order: PLL/DLL loop filter order [2] or [3]
|
||||
Tracking_1C.order=3;
|
||||
|
@ -634,6 +634,7 @@ EXCLUDE = @top_srcdir@/docs/html \
|
||||
@top_srcdir@/cmake \
|
||||
@top_srcdir@/data \
|
||||
@top_srcdir@/src/core/libs/supl \
|
||||
@top_srcdir@/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr \
|
||||
|
||||
|
||||
# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
|
||||
|
@ -125,6 +125,10 @@ void Channel::connect(gr::top_block_sptr top_block)
|
||||
DLOG(INFO) << "pass_through_ -> tracking";
|
||||
top_block->connect(trk_->get_right_block(), 0, nav_->get_left_block(), 0);
|
||||
DLOG(INFO) << "tracking -> telemetry_decoder";
|
||||
|
||||
top_block->msg_connect(nav_->get_left_block(),pmt::mp("preamble_index"),trk_->get_right_block(),pmt::mp("preamble_index"));
|
||||
DLOG(INFO) << "MSG FEEDBACK CHANNEL telemetry_decoder -> tracking";
|
||||
|
||||
connected_ = true;
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,6 @@ From now on, GNSS-SDR (and any other program of your own that makes use of VOLK_
|
||||
|
||||
___
|
||||
|
||||
VOLK_GNSSSDR was originally created by Andres Cecilia Luque in the framework of the [Summer Of Code In Space (SOCIS 2014)](http://sophia.estec.esa.int/socis2014/?q=about "SOCIS 2014 webpage") program organized by the European Space Agency, and evolved since then by other authors (see each file header for main authorship). This software is released under the GNU General Public License version 3, see the file COPYING.
|
||||
VOLK_GNSSSDR was originally created by Andres Cecilia Luque in the framework of the [Summer Of Code In Space (SOCIS 2014)](http://sophia.estec.esa.int/socis2014/?q=about "SOCIS 2014 webpage") program organized by the European Space Agency, and then evolved and maintained by Carles Fernandez-Prades and Javier Arribas. This software is released under the GNU General Public License version 3, see the file COPYING.
|
||||
|
||||
This project is managed by [Centre Tecnologic de Telecomunicacions de Catalunya](http://www.cttc.es "CTTC webpage").
|
||||
|
@ -112,7 +112,7 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
/** --help option */
|
||||
if ( vm.count("help") ) {
|
||||
std::cout << "The VOLK profiler." << std::endl
|
||||
std::cout << "The VOLK_GNSSSDR profiler." << std::endl
|
||||
<< desc << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ function(VOLK_ADD_TEST test_name)
|
||||
#functionality will be removed in the future. Leave it here for
|
||||
#now until someone can figure out how to do this in Windows.
|
||||
foreach(target ${test_name} ${VOLK_TEST_TARGET_DEPS})
|
||||
get_target_property(location ${target} LOCATION)
|
||||
get_target_property(location "${target}" LOCATION)
|
||||
if(location)
|
||||
get_filename_component(path ${location} PATH)
|
||||
string(REGEX REPLACE "\\$\\(.*\\)" ${CMAKE_BUILD_TYPE} path ${path})
|
||||
|
@ -0,0 +1,92 @@
|
||||
/*! \page extending_volk Extending VOLK
|
||||
|
||||
There are two primary routes for extending VOLK for your own use. The
|
||||
preferred route is by writing kernels and proto-kernels as part of this
|
||||
repository and sending patches upstream. The alternative is creating your
|
||||
own VOLK module, as it is the case of VOLK_GNSSSDR ;-). There is a good reason
|
||||
for that: to provide GNSS-SDR users with adequate protokernels as soon as possible,
|
||||
without needing to upgrade to the latest VOLK version to enjoy the benefits.
|
||||
Notwithstanding, some of VOLK_GNSSSDR can be integrated into VOLK in the future,
|
||||
if other users find them useful.
|
||||
|
||||
## Modifying this repository
|
||||
|
||||
### Adding kernels
|
||||
|
||||
Adding kernels refers to introducing a new function to the VOLK_GNSSSDR API that is
|
||||
presumably a useful math function/operation. The first step is to create
|
||||
the file in volk_gnsssdr/kernels/volk_gnsssdr. Follow the naming scheme provided in the
|
||||
VOLK_GNSSSDR terms and techniques page. First create the generic protokernel.
|
||||
|
||||
The generic protokernel should be written in plain C using explicitly sized
|
||||
types from stdint.h or volk_gnsssdr_complex.h when appropriate. volk_gnsssdr_complex.h
|
||||
includes explicitly sized complex types for floats and ints. The name of
|
||||
the generic kernel should be volk_gnsssdr_signature_from_file_generic. If multiple
|
||||
versions of the generic kernel exist then a description can be appended to
|
||||
generic_, but it is not required to use alignment flags in the generic
|
||||
protokernel name. It is required to surround the entire generic function
|
||||
with preprocessor ifdef fences on the symbol LV_HAVE_GENERIC.
|
||||
|
||||
Finally, add the kernel to the list of test cases in volk_gnsssdr/lib/kernel_tests.h.
|
||||
Many kernels should be able to use the default test parameters, but if yours
|
||||
requires a lower tolerance, specific vector length, or other test parameters
|
||||
just create a new instance of volk_gnsssdr_test_params_t for your kernel.
|
||||
|
||||
### Adding protokernels
|
||||
|
||||
The primary purpose of VOLK_GNSSSDR is to have multiple implementations of an operation
|
||||
tuned for a specific CPU architecture. Ideally there is at least one
|
||||
protokernel of each kernel for every architecture that VOLK_GNSSSDR supports.
|
||||
The pattern for protokernel naming is volk_gnsssdr_kernel_signature_architecture_nick.
|
||||
The architecture should be one of the supported VOLK_GNSSSDR architectures. The nick is
|
||||
an optional name to distinguish between multiple implementations for a
|
||||
particular architecture.
|
||||
|
||||
Architecture specific protokernels can be written in one of three ways.
|
||||
The first approach should always be to use compiler intrinsic functions.
|
||||
The second and third approaches are using either in-line assembly or
|
||||
assembly with .S files. Both methods of writing assembly exist in VOLK_GNSSSDR and
|
||||
should yield equivalent performance; which method you might choose is a
|
||||
matter of opinion. Regardless of the actual method the public function should
|
||||
be declared in the kernel header surrounded by ifdef fences on the symbol that
|
||||
fits the architecture implementation.
|
||||
|
||||
#### Compiler Intrinsics
|
||||
|
||||
Compiler intrinsics should be treated as functions that map to a specific
|
||||
assembly instruction. Most VOLK_GNSSSDR kernels take the form of a loop that iterates
|
||||
through a vector. Form a loop that iterates on a number of items that is natural
|
||||
for the architecture and then use compiler intrinsics to do the math for your
|
||||
operation or algorithm. Include the appropriate header inside the ifdef fences,
|
||||
but before your protokernel declaration.
|
||||
|
||||
|
||||
#### In-line Assembly
|
||||
|
||||
In-line assembly uses a compiler macro to include verbatim assembly with C code.
|
||||
The process of in-line assembly protokernels is very similar to protokernels
|
||||
based on intrinsics.
|
||||
|
||||
#### Assembly with .S files
|
||||
|
||||
To write pure assembly protokernels, first declare the function name in the
|
||||
kernel header file the same way as any other protokernel, but include the extern
|
||||
keyword. Second, create a file (one for each protokernel) in
|
||||
volk_gnsssdr/kernels/volk_gnsssdr/asm/$arch. Disassemble another protokernel and copy the
|
||||
disassembled code in to this file to bootstrap a working implementation. Often
|
||||
the disassembled code can be hand-tuned to improve performance.
|
||||
|
||||
## VOLK Modules
|
||||
|
||||
VOLK has a concept of modules. Each module is an independent VOLK tree. Modules
|
||||
can be managed with the volk_modtool application. At a high level the module is
|
||||
a clone of all of the VOLK machinery without kernels. volk_modtool also makes it
|
||||
easy to copy kernels to a module.
|
||||
|
||||
Kernels and protokernels are added to your own VOLK module the same way they are
|
||||
added to this repository, which was described in the previous section.
|
||||
|
||||
VOLK_GNSSSDR is a VOLK Module.
|
||||
|
||||
*/
|
||||
|
@ -0,0 +1,24 @@
|
||||
/*! \page kernels Kernels
|
||||
|
||||
\li \subpage volk_gnsssdr_32fc_convert_16ic
|
||||
\li \subpage volk_gnsssdr_32fc_convert_8ic
|
||||
\li \subpage volk_gnsssdr_16ic_convert_32fc
|
||||
\li \subpage volk_gnsssdr_16ic_resampler_16ic
|
||||
\li \subpage volk_gnsssdr_16ic_xn_resampler_16ic_xn
|
||||
\li \subpage volk_gnsssdr_16ic_s32fc_x2_rotator_16ic
|
||||
\li \subpage volk_gnsssdr_16ic_x2_multiply_16ic
|
||||
\li \subpage volk_gnsssdr_16ic_x2_dot_prod_16ic
|
||||
\li \subpage volk_gnsssdr_16ic_x2_dot_prod_16ic_xn
|
||||
\li \subpage volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn
|
||||
\li \subpage volk_gnsssdr_8ic_conjugate_8ic
|
||||
\li \subpage volk_gnsssdr_8ic_magnitude_squared_8i
|
||||
\li \subpage volk_gnsssdr_8ic_x2_dot_prod_8ic
|
||||
\li \subpage volk_gnsssdr_8ic_x2_multiply_8ic
|
||||
\li \subpage volk_gnsssdr_8ic_s8ic_multiply_8ic
|
||||
\li \subpage volk_gnsssdr_8i_accumulator_s8i
|
||||
\li \subpage volk_gnsssdr_8i_index_max_16u
|
||||
\li \subpage volk_gnsssdr_8i_max_s8i
|
||||
\li \subpage volk_gnsssdr_8i_x2_add_8i
|
||||
\li \subpage volk_gnsssdr_64f_accumulator_64f
|
||||
|
||||
*/
|
@ -0,0 +1,19 @@
|
||||
/*! \mainpage VOLK_GNSSSDR
|
||||
|
||||
Welcome to VOLK_GNSSSDR!
|
||||
|
||||
VOLK_GNSSSDR is the Vector-Optimized Library of Kernels for GNSS-SDR.
|
||||
It is a library that contains kernels of hand-written SIMD code for different
|
||||
mathematical operations. Since each SIMD architecture can be very different
|
||||
and no compiler has yet come along to handle vectorization properly or highly
|
||||
efficiently, VOLK_GNSSSDR approaches the problem differently.
|
||||
|
||||
For each architecture or platform that a developer wishes to vectorize for, a
|
||||
new proto-kernel is added to VOLK_GNSSSDR. At runtime, VOLK_GNSSSDR will select the correct
|
||||
proto-kernel. In this way, the users of VOLK_GNSSSDR call a kernel for performing the
|
||||
operation that is platform/architecture agnostic. This allows us to write
|
||||
portable SIMD code.
|
||||
|
||||
VOLK_GNSSSDR is a module generated from the original VOLK library http://libvolk.org
|
||||
|
||||
*/
|
@ -0,0 +1,121 @@
|
||||
/*! \page concepts_terms_and_techniques Concepts, Terms, and Techniques
|
||||
|
||||
This page is primarily a list of definitions and brief overview of successful
|
||||
techniques previously used to develop VOLK_GNSSSDR protokernels.
|
||||
|
||||
## Definitions and Concepts
|
||||
|
||||
### SIMD
|
||||
|
||||
SIMD stands for Single Instruction Multiple Data. Leveraging SIMD instructions
|
||||
is the primary optimization in VOLK_GNSSSDR.
|
||||
|
||||
### Architecture
|
||||
|
||||
A VOLK_GNSSSDR architecture is normally called an Instruction Set Architecture (ISA).
|
||||
The architectures we target in VOLK_GNSSSDR usually have SIMD instructions.
|
||||
|
||||
### Vector
|
||||
|
||||
A vector in VOLK_GNSSSDR is the same as a C array. It sometimes, but not always
|
||||
coincides with the mathematical definition of a vector.
|
||||
|
||||
### Kernel
|
||||
|
||||
The 'kernel' part of the VOLK_GNSSSDR name comes from the high performance computing
|
||||
use of the word. In this context it is the inner loop of a vector operation.
|
||||
Since we don't use the word vector in the math sense a vector operation is an
|
||||
operation that is performed on a C array.
|
||||
|
||||
### Protokernel
|
||||
|
||||
A protokernel is an implementation of a kernel. Every kernel has a 'generic'
|
||||
protokernel that is implemented in C. Other protokernels are optimized for a
|
||||
particular architecture.
|
||||
|
||||
|
||||
## Techniques
|
||||
|
||||
### New Kernels
|
||||
|
||||
Add new kernels to the list in lib/kernel_tests.h. This adds the kernel to
|
||||
VOLK_GNSSSDR's QA tool as well as the volk profiler. Many kernels are able to
|
||||
share test parameters, but new kernels might need new ones.
|
||||
|
||||
If the VOLK_GNSSSDR kernel does not 'fit' the the standard set of function parameters
|
||||
expected by the volk_profile structure, you need to create a VOLK_GNSSSDR puppet
|
||||
function to help the profiler call the kernel. This is essentially due to the
|
||||
function run_volk_gnsssdr_tests which has a limited number of function prototypes that
|
||||
it can test.
|
||||
|
||||
### Protokernels
|
||||
|
||||
Adding new proto-kernels (implementations of VOLK_GNSSSDR kernels for specific
|
||||
architectures) is relatively easy. In the relevant <kernel>.h file in
|
||||
the volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr<input-fingerprint_function-name_output-fingerprint>.h
|
||||
file, add a new #ifdef/#endif block for the LV_HAVE_<arch> corresponding
|
||||
to the <arch> you a working on (e.g. SSE, AVX, NEON, etc.).
|
||||
|
||||
For example, for volk_gnsssdr_16ic_x2_multiply_16ic_neon:
|
||||
|
||||
\code
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t *a_ptr = (lv_16sc_t*) in_a;
|
||||
lv_16sc_t *b_ptr = (lv_16sc_t*) in_b;
|
||||
unsigned int quarter_points = num_points / 4;
|
||||
int16x4x2_t a_val, b_val, c_val;
|
||||
int16x4x2_t tmp_real, tmp_imag;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(; number < quarter_points; ++number)
|
||||
{
|
||||
a_val = vld2_s16((int16_t*)a_ptr);
|
||||
b_val = vld2_s16((int16_t*)b_ptr);
|
||||
|
||||
tmp_real.val[0] = vmul_s16(a_val.val[0], b_val.val[0]);
|
||||
tmp_real.val[1] = vmul_s16(a_val.val[1], b_val.val[1]);
|
||||
|
||||
tmp_imag.val[0] = vmul_s16(a_val.val[0], b_val.val[1]);
|
||||
tmp_imag.val[1] = vmul_s16(a_val.val[1], b_val.val[0]);
|
||||
|
||||
c_val.val[0] = vsub_s16(tmp_real.val[0], tmp_real.val[1]);
|
||||
c_val.val[1] = vadd_s16(tmp_imag.val[0], tmp_imag.val[1]);
|
||||
vst2_s16((int16_t*)out, c_val);
|
||||
|
||||
a_ptr += 4;
|
||||
b_ptr += 4;
|
||||
out += 4;
|
||||
}
|
||||
|
||||
for(number = quarter_points * 4; number < num_points; number++)
|
||||
{
|
||||
*out++ = (*a_ptr++) * (*b_ptr++);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_NEON */
|
||||
\endcode
|
||||
|
||||
### Allocating Memory
|
||||
|
||||
SIMD code can be very sensitive to the alignment of the vectors, which is
|
||||
generally something like a 16-byte or 32-byte alignment requirement. The
|
||||
VOLK_GNSSSDR dispatcher functions, which is what we will normally call as users of
|
||||
VOLK_GNSSSDR, makes sure that the correct aligned or unaligned version is called
|
||||
depending on the state of the vectors passed to it. However, things typically
|
||||
work faster and more efficiently when the vectors are aligned. As such, VOLK_GNSSSDR
|
||||
has memory allocate and free methods to provide us with properly aligned
|
||||
vectors. We can also ask VOLK_GNSSSDR to give us the current machine's alignment
|
||||
requirement, which makes our job even easier when porting code.
|
||||
|
||||
To get the machine's alignment, simply call the size_t volk_gnsssdr_get_alignment().
|
||||
|
||||
Allocate memory using void* volk_gnsssdr_malloc(size_t size, size_t alignment).
|
||||
|
||||
Make sure that any memory allocated by VOLK_GNSSSDR is also freed by VOLK_GNSSSDR with volk_gnsssdr_free(void *p).
|
||||
|
||||
|
||||
*/
|
@ -0,0 +1,19 @@
|
||||
/*! \page using_volk_gnsssdr Using VOLK_GNSSSDR
|
||||
|
||||
Using VOLK_GNSSSDR in your code requires proper linking and including the correct headers.
|
||||
VOLK_GNSSSDR currently supports both C and C++ bindings.
|
||||
|
||||
VOLK_GNSSSDR provides both a pkgconfig and CMake module to help configuration and
|
||||
linking. The pkfconfig file is installed to
|
||||
$install_prefix/lib/pkgconfig/volk_gnsssdr.pc. The CMake configuration module is in
|
||||
$install_prefix/lib/cmake/volk_gnsssdr/VolkConfig.cmake.
|
||||
|
||||
The header in the VOLK_GNSSSDR include directory (includedir in pkgconfig,
|
||||
VOLK_GNSSSDR_INCLUDE_DIRS in cmake module) contains the header volk_gnsssdr/volk_gnsssdr.h defines all
|
||||
of the symbols exposed by VOLK_GNSSSDR. Alternatively individual kernel headers are in
|
||||
the same location.
|
||||
|
||||
In most cases it is sufficient to call the dispatcher for the kernel you are using.
|
||||
|
||||
*/
|
||||
|
@ -91,8 +91,12 @@
|
||||
#include <inttypes.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#ifdef _WIN32
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
union bit128{
|
||||
uint8_t i8[16];
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_32fc_convert_16ic.h
|
||||
* \brief Volk protokernel: converts 16 bit integer complex complex values to 32 bits float complex values
|
||||
* \file volk_gnsssdr_16ic_convert_32fc.h
|
||||
* \brief VOLK_GNSSSDR kernel: converts 16 bit integer complex complex values to 32 bits float complex values.
|
||||
* \authors <ul>
|
||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||
* </ul>
|
||||
@ -30,6 +30,28 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_16ic_convert_32fc
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Converts a complex vector of 16-bits integer each component
|
||||
* into a complex vector of 32-bits float each component.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_16ic_convert_32fc(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li inputVector: The complex 16-bit integer input data buffer.
|
||||
* \li num_points: The number of data values to be converted.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li outputVector: pointer to a vector holding the converted vector.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_convert_32fc_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_convert_32fc_H
|
||||
@ -38,12 +60,6 @@
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
|
||||
\param[out] outputVector The complex 32-bit float output data buffer
|
||||
\param[in] inputVector The complex 16-bit integer input data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
for(unsigned int i = 0; i < num_points; i++)
|
||||
@ -53,15 +69,10 @@ static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVecto
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
|
||||
\param[out] outputVector The complex 32-bit float output data buffer
|
||||
\param[in] inputVector The complex 16-bit integer input data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 2;
|
||||
@ -89,15 +100,10 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
|
||||
\param[out] outputVector The complex 32-bit float output data buffer
|
||||
\param[in] inputVector The complex 16-bit integer input data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 2;
|
||||
@ -128,12 +134,6 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
|
||||
\param[out] outputVector The complex 32-bit float output data buffer
|
||||
\param[in] inputVector The complex 16-bit integer input data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 2;
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_resampler_16ic.h
|
||||
* \brief Volk protokernel: resample a 16 bits complex vector
|
||||
* \brief VOLK_GNSSSDR kernel: resamples a 16 bits complex vector.
|
||||
* \authors <ul>
|
||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* VOLK_GNSSSDR kernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* and 8 bits the imaginary part) and accumulates them
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
@ -33,6 +33,30 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_16ic_resampler_16ic
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Resamples a complex vector (16-bit integer each component).
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_16ic_resampler_16ic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li local_code: One of the vectors to be multiplied.
|
||||
* \li rem_code_phase_chips: Remnant code phase [chips]
|
||||
* \li code_phase_step_chips: Phase increment per sample [chips/sample]
|
||||
* \li code_length_chips: Code length in chips.
|
||||
* \li num_points: The number of data values to be in the resampled vector.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li result: Pointer to the resampled vector.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_resampler_16ic_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_resampler_16ic_H
|
||||
|
||||
@ -48,15 +72,6 @@
|
||||
// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
|
||||
//}
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component)
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Remnant code phase [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)
|
||||
{
|
||||
int local_code_chip_index;
|
||||
@ -77,15 +92,6 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, c
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component)
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Remnant code phase [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
|
||||
{
|
||||
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
|
||||
@ -165,18 +171,10 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
|
||||
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component)
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Remnant code phase [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
|
||||
{
|
||||
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
|
||||
@ -255,18 +253,10 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
|
||||
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component)
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Remnant code phase [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
|
||||
{
|
||||
unsigned int number;
|
||||
@ -308,7 +298,6 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
|
||||
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||
float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
|
||||
|
||||
|
||||
for(number = 0; number < quarterPoints; number++)
|
||||
{
|
||||
_code_phase_out = vmulq_f32(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_resamplerpuppet_16ic.h
|
||||
* \brief Volk puppet for the 16-bit complex vector resampler kernel
|
||||
* \brief VOLK_GNSSSDR puppet for the 16-bit complex vector resampler kernel.
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
|
||||
* </ul>
|
||||
*
|
||||
* Volk puppet for integrating the resampler into volk's test system
|
||||
* VOLK_GNSSSDR puppet for integrating the resampler into the test system
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h
|
||||
* \brief Volk puppet for the multiple 16-bit complex vector resampler kernel
|
||||
* \brief VOLK_GNSSSDR puppet for the multiple 16-bit complex vector resampler kernel.
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
|
||||
* </ul>
|
||||
*
|
||||
* Volk puppet for integrating the multiple resampler into volk's test system
|
||||
* VOLK_GNSSSDR puppet for integrating the multiple resampler into the test system
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -68,6 +68,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_generic(lv_16sc_t* r
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
@ -94,8 +95,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse2(lv_16sc_t* re
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = 0.1;
|
||||
@ -121,8 +122,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse2(lv_16sc_t* re
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = 0.1;
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_rotatorpuppet_16ic.h
|
||||
* \brief Volk puppet for the 16-bit complex rotator kernel
|
||||
* \brief VOLK_GNSSSDR puppet for the 16-bit complex rotator kernel.
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
|
||||
* </ul>
|
||||
*
|
||||
* Volk puppet for integrating the resampler into volk's test system
|
||||
* VOLK_GNSSSDR puppet for integrating the rotator into the test system
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -42,7 +42,6 @@
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
@ -59,7 +58,6 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_generic(lv_16sc_t* outVe
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
@ -74,8 +72,8 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3(lv_16sc_t* outVec
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
@ -90,8 +88,8 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3(lv_16sc_t* outVec
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_16ic_s32fc_x2_rotator_16ic.h
|
||||
* \brief Volk protokernel: rotates a 16 bits complex vector
|
||||
* \file volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h
|
||||
* \brief VOLK_GNSSSDR kernel: rotates a 16 bits complex vector.
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez-Prades, 2015 cfernandez at cttc.es
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that rotates a 16-bit complex vector
|
||||
* VOLK_GNSSSDR kernel that rotates a 16-bit complex vector
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -32,6 +32,29 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_16ic_s32fc_x2_rotator_16ic
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Rotates a complex vector (16-bit integer samples each component).
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li inVector: Vector to be rotated.
|
||||
* \li phase_inc: Phase increment in each sample = lv_cmake(cos(phase_step_rad), sin(phase_step_rad))
|
||||
* \li phase: Initial phase = lv_cmake(cos(initial_phase_rad), sin(initial_phase_rad))
|
||||
* \li num_points: Number of complex values to be rotated and stored into \p outVector
|
||||
*
|
||||
* \b Outputs
|
||||
* \li phase: Final phase.
|
||||
* \li outVector: The resampled vector.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_H
|
||||
@ -42,14 +65,6 @@
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Rotates a complex vector (16-bit integer samples each component)
|
||||
\param[out] outVector Rotated vector
|
||||
\param[in] inVector Vector to be rotated
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
@ -70,14 +85,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* ou
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Rotates a complex vector (16-bit integer samples each component)
|
||||
\param[out] outVector Rotated vector
|
||||
\param[in] inVector Vector to be rotated
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -103,7 +110,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
@ -150,7 +156,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
|
||||
_out += 4;
|
||||
}
|
||||
|
||||
_mm_store_ps((float*)two_phase_acc, two_phase_acc_reg);
|
||||
_mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg);
|
||||
(*phase) = two_phase_acc[0];
|
||||
|
||||
for (unsigned int i = sse_iters * 4; i < num_points; ++i)
|
||||
@ -164,17 +170,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Rotates a complex vector (16-bit integer samples each component)
|
||||
\param[out] outVector Rotated vector
|
||||
\param[in] inVector Vector to be rotated
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -200,7 +199,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
@ -221,6 +219,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
|
||||
//next two samples
|
||||
_in += 2;
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
@ -261,17 +260,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Rotates a complex vector (16-bit integer samples each component)
|
||||
\param[out] outVector Rotated vector
|
||||
\param[in] inVector Vector to be rotated
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_x2_dot_prod_16ic.h
|
||||
* \brief Volk protokernel: multiplies two 16 bits vectors and accumulates them
|
||||
* \brief VOLK_GNSSSDR kernel: multiplies two 16 bits vectors and accumulates them.
|
||||
* \authors <ul>
|
||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* VOLK_GNSSSDR kernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* and 8 bits the imaginary part) and accumulates them
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
@ -33,6 +33,29 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_16ic_x2_dot_prod_16ic
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Multiplies two input complex vectors (16-bit integer each component) and accumulates them,
|
||||
* storing the result. Results are saturated so never go beyond the limits of the data type.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_16ic_x2_dot_prod_16ic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li in_a: One of the vectors to be multiplied and accumulated.
|
||||
* \li in_b: The other vector to be multiplied and accumulated.
|
||||
* \li num_points: Number of complex values to be multiplied together, accumulated and stored into \p result
|
||||
*
|
||||
* \b Outputs
|
||||
* \li result: Value of the accumulated result.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_H
|
||||
|
||||
@ -43,13 +66,6 @@
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] in_a One of the vectors to be multiplied and accumulated
|
||||
\param[in] in_b One of the vectors to be multiplied and accumulated
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
result[0] = lv_cmake((int16_t)0, (int16_t)0);
|
||||
@ -66,13 +82,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result,
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] in_a One of the vectors to be multiplied and accumulated
|
||||
\param[in] in_b One of the vectors to be multiplied and accumulated
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
|
||||
@ -85,7 +94,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, realcacc, imagcacc, result;
|
||||
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
|
||||
realcacc = _mm_setzero_si128();
|
||||
@ -100,7 +109,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
//imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1]
|
||||
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
|
||||
a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
|
||||
__builtin_prefetch(_in_a + 8);
|
||||
b = _mm_load_si128((__m128i*)_in_b);
|
||||
__builtin_prefetch(_in_b + 8);
|
||||
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
|
||||
|
||||
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
|
||||
@ -149,13 +160,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] in_a One of the vectors to be multiplied and accumulated
|
||||
\param[in] in_b One of the vectors to be multiplied and accumulated
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
|
||||
@ -169,7 +173,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
|
||||
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, realcacc, imagcacc, result;
|
||||
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
|
||||
realcacc = _mm_setzero_si128();
|
||||
@ -184,7 +188,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
|
||||
//imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1]
|
||||
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
|
||||
a = _mm_loadu_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
|
||||
__builtin_prefetch(_in_a + 8);
|
||||
b = _mm_loadu_si128((__m128i*)_in_b);
|
||||
__builtin_prefetch(_in_b + 8);
|
||||
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
|
||||
|
||||
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
|
||||
@ -210,7 +216,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
|
||||
|
||||
result = _mm_or_si128(realcacc, imagcacc);
|
||||
|
||||
_mm_storeu_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector
|
||||
_mm_storeu_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
@ -232,13 +238,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] in_a One of the vectors to be multiplied and accumulated
|
||||
\param[in] in_b One of the vectors to be multiplied and accumulated
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
unsigned int quarter_points = num_points / 4;
|
||||
@ -295,4 +294,56 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const
|
||||
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_vma(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
unsigned int quarter_points = num_points / 4;
|
||||
unsigned int number;
|
||||
|
||||
lv_16sc_t* a_ptr = (lv_16sc_t*) in_a;
|
||||
lv_16sc_t* b_ptr = (lv_16sc_t*) in_b;
|
||||
// for 2-lane vectors, 1st lane holds the real part,
|
||||
// 2nd lane holds the imaginary part
|
||||
int16x4x2_t a_val, b_val, accumulator;
|
||||
int16x4x2_t tmp;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
|
||||
accumulator.val[0] = vdup_n_s16(0);
|
||||
accumulator.val[1] = vdup_n_s16(0);
|
||||
|
||||
for(number = 0; number < quarter_points; ++number)
|
||||
{
|
||||
a_val = vld2_s16((int16_t*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
|
||||
b_val = vld2_s16((int16_t*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
|
||||
__builtin_prefetch(a_ptr + 8);
|
||||
__builtin_prefetch(b_ptr + 8);
|
||||
|
||||
tmp.val[0] = vmul_s16(a_val.val[0], b_val.val[0]);
|
||||
tmp.val[1] = vmul_s16(a_val.val[1], b_val.val[0]);
|
||||
|
||||
// use multiply accumulate/subtract to get result
|
||||
tmp.val[0] = vmls_s16(tmp.val[0], a_val.val[1], b_val.val[1]);
|
||||
tmp.val[1] = vmla_s16(tmp.val[1], a_val.val[0], b_val.val[1]);
|
||||
|
||||
accumulator.val[0] = vadd_s16(accumulator.val[0], tmp.val[0]);
|
||||
accumulator.val[1] = vadd_s16(accumulator.val[1], tmp.val[1]);
|
||||
|
||||
a_ptr += 4;
|
||||
b_ptr += 4;
|
||||
}
|
||||
|
||||
vst2_s16((int16_t*)accum_result, accumulator);
|
||||
*out = accum_result[0] + accum_result[1] + accum_result[2] + accum_result[3];
|
||||
|
||||
// tail case
|
||||
for(number = quarter_points * 4; number < num_points; ++number)
|
||||
{
|
||||
*out += (*a_ptr++) * (*b_ptr++);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_H*/
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
|
||||
* \brief Volk protokernel: multiplies N 16 bits vectors by a common vector and accumulates the results in N 16 bits short complex outputs.
|
||||
* \brief VOLK_GNSSSDR kernel: multiplies N 16 bits vectors by a common vector and accumulates the results in N 16 bits short complex outputs.
|
||||
* \authors <ul>
|
||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies N 16 bits vectors by a common vector and accumulates the results in N 16 bits short complex outputs.
|
||||
* VOLK_GNSSSDR kernel that multiplies N 16 bits vectors by a common vector and accumulates the results in N 16 bits short complex outputs.
|
||||
* It is optimized to perform the N tap correlation process in GNSS receivers.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
@ -33,6 +33,30 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_16ic_x2_dot_prod_16ic_xn
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Multiplies a reference complex vector by an arbitrary number of other complex vectors, accumulates the results and stores them in the output vector.
|
||||
* This function can be used as a multiple correlator.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li in_common: Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
||||
* \li in_a: Pointer to an array of pointers to other vectors to be multiplied by \p in_common and accumulated.
|
||||
* \li num_a_vectors: Number of vectors to be multiplied by the reference vector \p in_common and accumulated.
|
||||
* \li num_points: Number of complex values to be multiplied together, accumulated and stored into \p result
|
||||
*
|
||||
* \b Outputs
|
||||
* \li result: Vector of \p num_a_vectors components with vector \p in_common multiplied by the vectors in \p in_a and accumulated.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H
|
||||
|
||||
@ -41,14 +65,7 @@
|
||||
#include <volk_gnsssdr/saturation_arithmetic.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
@ -70,14 +87,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
@ -160,22 +169,15 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul
|
||||
|
||||
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
|
||||
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
@ -258,22 +260,15 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul
|
||||
|
||||
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
|
||||
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
@ -354,9 +349,85 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result,
|
||||
|
||||
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
|
||||
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
|
||||
const unsigned int neon_iters = num_points / 4;
|
||||
|
||||
const lv_16sc_t** _in_a = in_a;
|
||||
const lv_16sc_t* _in_common = in_common;
|
||||
lv_16sc_t* _out = result;
|
||||
|
||||
if (neon_iters > 0)
|
||||
{
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
|
||||
int16x4x2_t a_val, b_val, tmp;
|
||||
|
||||
int16x4x2_t* accumulator;
|
||||
accumulator = (int16x4x2_t*)malloc(num_a_vectors * sizeof(int16x4x2_t));
|
||||
|
||||
for(int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
accumulator[n_vec].val[0] = vdup_n_s16(0);
|
||||
accumulator[n_vec].val[1] = vdup_n_s16(0);
|
||||
}
|
||||
|
||||
for(unsigned int number = 0; number < neon_iters; number++)
|
||||
{
|
||||
b_val = vld2_s16((int16_t*)_in_common); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
|
||||
__builtin_prefetch(_in_common + 8);
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4]));
|
||||
|
||||
tmp.val[0] = vmul_s16(a_val.val[0], b_val.val[0]);
|
||||
tmp.val[1] = vmul_s16(a_val.val[1], b_val.val[0]);
|
||||
|
||||
// use multiply accumulate/subtract to get result
|
||||
tmp.val[0] = vmls_s16(tmp.val[0], a_val.val[1], b_val.val[1]);
|
||||
tmp.val[1] = vmla_s16(tmp.val[1], a_val.val[0], b_val.val[1]);
|
||||
|
||||
accumulator[n_vec].val[0] = vadd_s16(accumulator[n_vec].val[0], tmp.val[0]);
|
||||
accumulator[n_vec].val[1] = vadd_s16(accumulator[n_vec].val[1], tmp.val[1]);
|
||||
}
|
||||
_in_common += 4;
|
||||
}
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
vst2_s16((int16_t*)dotProductVector, accumulator[n_vec]); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0,0);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])),
|
||||
sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
|
||||
}
|
||||
_out[n_vec] = dotProduct;
|
||||
}
|
||||
free(accumulator);
|
||||
}
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
for(unsigned int n = neon_iters * 4; n < num_points; n++)
|
||||
{
|
||||
lv_16sc_t tmp = in_common[n] * in_a[n_vec][n];
|
||||
|
||||
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
|
||||
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h
|
||||
* \brief Volk puppet for the multiple 16-bit complex dot product kernel
|
||||
* \brief VOLK_GNSSSDR puppet for the multiple 16-bit complex dot product kernel.
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
|
||||
* </ul>
|
||||
*
|
||||
* Volk puppet for integrating the resampler into volk's test system
|
||||
* VOLK_GNSSSDR puppet for integrating the resampler into the test system
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -61,7 +61,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic(lv_16sc_t*
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // Generic
|
||||
#endif /* Generic */
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
@ -83,12 +83,10 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* r
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // SSE2
|
||||
#endif /* SSE2 */
|
||||
|
||||
#define WORKAROUND 1
|
||||
#ifdef WORKAROUND
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#if LV_HAVE_SSE2 && LV_HAVE_64
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
@ -108,8 +106,9 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_sse2(lv_16sc_t* r
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
#endif
|
||||
#endif // SSE2
|
||||
|
||||
#endif /* LV_HAVE_SSE2 && LV_HAVE_64 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
|
||||
@ -134,6 +133,29 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon(lv_16sc_t* res
|
||||
|
||||
#endif // NEON
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
int num_a_vectors = 3;
|
||||
lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(unsigned int n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points);
|
||||
}
|
||||
|
||||
volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(unsigned int n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // NEON
|
||||
#endif // INCLUDED_volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_H
|
||||
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_x2_dot_prod_16ic.h
|
||||
* \brief Volk protokernel: multiplies two 16 bits vectors and accumulates them
|
||||
* \file volk_gnsssdr_16ic_x2_multiply_16ic.h
|
||||
* \brief VOLK_GNSSSDR kernel: multiplies two 16 bits vectors and accumulates them.
|
||||
* \authors <ul>
|
||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* VOLK_GNSSSDR kernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* and 8 bits the imaginary part) and accumulates them
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
@ -33,6 +33,28 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_16ic_x2_multiply_16ic
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Multiplies two input complex vectors, point-by-point, storing the result in the third vector.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_16ic_x2_multiply_16ic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li in_a: One of the vectors to be multiplied.
|
||||
* \li in_b: The other vector to be multiplied.
|
||||
* \li num_points: The number of complex data points.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li result: The vector where the result will be stored.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_x2_multiply_16ic_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_x2_multiply_16ic_H
|
||||
|
||||
@ -40,13 +62,7 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] in_a One of the vectors to be multiplied
|
||||
\param[in] in_b One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
for (unsigned int n = 0; n < num_points; n++)
|
||||
@ -62,13 +78,6 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result,
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] in_a One of the vectors to be multiplied
|
||||
\param[in] in_b One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -118,16 +127,10 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] in_a One of the vectors to be multiplied
|
||||
\param[in] in_b One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -177,16 +180,10 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, con
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] in_a One of the vectors to be multiplied
|
||||
\param[in] in_b One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t *a_ptr = (lv_16sc_t*) in_a;
|
||||
|
@ -1,12 +1,12 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
|
||||
* \brief Volk protokernel: multiplies N 16 bits vectors by a common vector
|
||||
* \file volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h
|
||||
* \brief VOLK_GNSSSDR kernel: multiplies N 16 bits vectors by a common vector
|
||||
* phase rotated and accumulates the results in N 16 bits short complex outputs.
|
||||
* \authors <ul>
|
||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies N 16 bits vectors by a common vector, which is
|
||||
* VOLK_GNSSSDR kernel that multiplies N 16 bits vectors by a common vector, which is
|
||||
* phase-rotated by phase offset and phase increment, and accumulates the results
|
||||
* in N 16 bits short complex outputs.
|
||||
* It is optimized to perform the N tap correlation process in GNSS receivers.
|
||||
@ -36,8 +36,36 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_xn_rotator_dot_prod_16ic_xn_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_xn_rotator_dot_prod_16ic_xn_H
|
||||
/*!
|
||||
* \page volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Rotates and multiplies the reference complex vector with an arbitrary number of other complex vectors,
|
||||
* accumulates the results and stores them in the output vector.
|
||||
* This function can be used for Doppler wipe-off and multiple correlator.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn((lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li in_common: Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector).
|
||||
* \li phase_inc: Phase increment = lv_cmake(cos(phase_step_rad), sin(phase_step_rad))
|
||||
* \li phase: Initial phase = lv_cmake(cos(initial_phase_rad), sin(initial_phase_rad))
|
||||
* \li in_a: Pointer to an array of pointers to multiple vectors to be multiplied and accumulated.
|
||||
* \li num_a_vectors: Number of vectors to be multiplied by the reference vector and accumulated.
|
||||
* \li num_points: Number of complex values to be multiplied together, accumulated and stored into \p result.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li phase: Final phase.
|
||||
* \li result: Vector of \p num_a_vectors components with the multiple vectors of \p in_a rotated, multiplied by \p in_common and accumulated.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_H
|
||||
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
@ -46,16 +74,7 @@
|
||||
//#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t tmp16;
|
||||
@ -85,16 +104,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
@ -140,7 +149,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
|
||||
// Phase rotation on operand in_common starts here:
|
||||
//printf("generic phase %i: %f,%f\n", n*4,lv_creal(*phase),lv_cimag(*phase));
|
||||
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in_common + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
@ -248,19 +256,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
@ -304,6 +303,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
// Phase rotation on operand in_common starts here:
|
||||
|
||||
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in_common + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
@ -378,7 +378,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
||||
|
||||
a = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);
|
||||
|
||||
_mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
|
||||
_mm_storeu_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0,0);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
@ -390,9 +390,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
||||
free(realcacc);
|
||||
free(imagcacc);
|
||||
|
||||
_mm_store_ps((float*)two_phase_acc, two_phase_acc_reg);
|
||||
_mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg);
|
||||
(*phase) = two_phase_acc[0];
|
||||
|
||||
|
||||
for(unsigned int n = sse_iters * 4; n < num_points; n++)
|
||||
{
|
||||
tmp16 = in_common[n];
|
||||
@ -413,16 +414,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
const unsigned int neon_iters = num_points / 4;
|
||||
@ -583,4 +574,160 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
||||
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H*/
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
const unsigned int neon_iters = num_points / 4;
|
||||
|
||||
const lv_16sc_t** _in_a = in_a;
|
||||
const lv_16sc_t* _in_common = in_common;
|
||||
lv_16sc_t* _out = result;
|
||||
|
||||
lv_16sc_t tmp16_, tmp;
|
||||
lv_32fc_t tmp32_;
|
||||
|
||||
if (neon_iters > 0)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
|
||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
||||
|
||||
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
|
||||
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
|
||||
|
||||
lv_32fc_t phase2 = (lv_32fc_t)(*phase) * phase_inc;
|
||||
lv_32fc_t phase3 = phase2 * phase_inc;
|
||||
lv_32fc_t phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
|
||||
float32x4_t _phase_real = vld1q_f32(__phase_real);
|
||||
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
|
||||
|
||||
int16x4x2_t a_val, b_val;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
float32x4_t half = vdupq_n_f32(0.5f);
|
||||
int16x4x2_t tmp16;
|
||||
int32x4x2_t tmp32i;
|
||||
|
||||
float32x4x2_t tmp32f, tmp32_real, tmp32_imag;
|
||||
float32x4_t sign, PlusHalf, Round;
|
||||
|
||||
int16x4x2_t* accumulator;
|
||||
accumulator = (int16x4x2_t*)calloc(num_a_vectors, sizeof(int16x4x2_t));
|
||||
|
||||
for(int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
accumulator[n_vec].val[0] = vdup_n_s16(0);
|
||||
accumulator[n_vec].val[1] = vdup_n_s16(0);
|
||||
}
|
||||
|
||||
for(unsigned int number = 0; number < neon_iters; number++)
|
||||
{
|
||||
/* load 4 complex numbers (int 16 bits each component) */
|
||||
tmp16 = vld2_s16((int16_t*)_in_common);
|
||||
__builtin_prefetch(_in_common + 8);
|
||||
_in_common += 4;
|
||||
|
||||
/* promote them to int 32 bits */
|
||||
tmp32i.val[0] = vmovl_s16(tmp16.val[0]);
|
||||
tmp32i.val[1] = vmovl_s16(tmp16.val[1]);
|
||||
|
||||
/* promote them to float 32 bits */
|
||||
tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]);
|
||||
tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]);
|
||||
|
||||
/* complex multiplication of four complex samples (float 32 bits each component) */
|
||||
tmp32_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real);
|
||||
tmp32_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag);
|
||||
tmp32_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag);
|
||||
tmp32_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real);
|
||||
|
||||
tmp32f.val[0] = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]);
|
||||
tmp32f.val[1] = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]);
|
||||
|
||||
/* downcast results to int32 */
|
||||
/* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31)));
|
||||
PlusHalf = vaddq_f32(tmp32f.val[0], half);
|
||||
Round = vsubq_f32(PlusHalf, sign);
|
||||
tmp32i.val[0] = vcvtq_s32_f32(Round);
|
||||
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31)));
|
||||
PlusHalf = vaddq_f32(tmp32f.val[1], half);
|
||||
Round = vsubq_f32(PlusHalf, sign);
|
||||
tmp32i.val[1] = vcvtq_s32_f32(Round);
|
||||
|
||||
/* downcast results to int16 */
|
||||
tmp16.val[0] = vqmovn_s32(tmp32i.val[0]);
|
||||
tmp16.val[1] = vqmovn_s32(tmp32i.val[1]);
|
||||
|
||||
/* compute next four phases */
|
||||
tmp32_real.val[0] = vmulq_f32(_phase_real, _phase4_real);
|
||||
tmp32_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag);
|
||||
tmp32_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag);
|
||||
tmp32_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real);
|
||||
|
||||
_phase_real = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]);
|
||||
_phase_imag = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]);
|
||||
|
||||
vst1q_f32((float32_t*)__phase_real, _phase_real);
|
||||
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4]));
|
||||
|
||||
b_val.val[0] = vmul_s16(a_val.val[0], tmp16.val[0]);
|
||||
b_val.val[1] = vmul_s16(a_val.val[1], tmp16.val[0]);
|
||||
|
||||
// use multiply accumulate/subtract to get result
|
||||
b_val.val[0] = vmls_s16(b_val.val[0], a_val.val[1], tmp16.val[1]);
|
||||
b_val.val[1] = vmla_s16(b_val.val[1], a_val.val[0], tmp16.val[1]);
|
||||
|
||||
accumulator[n_vec].val[0] = vqadd_s16(accumulator[n_vec].val[0], b_val.val[0]);
|
||||
accumulator[n_vec].val[1] = vqadd_s16(accumulator[n_vec].val[1], b_val.val[1]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
vst2_s16((int16_t*)dotProductVector, accumulator[n_vec]); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0,0);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])),
|
||||
sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
|
||||
}
|
||||
_out[n_vec] = dotProduct;
|
||||
}
|
||||
free(accumulator);
|
||||
vst1q_f32((float32_t*)__phase_real, _phase_real);
|
||||
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
|
||||
|
||||
(*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]);
|
||||
}
|
||||
|
||||
for (unsigned int n = neon_iters * 4; n < num_points; n++)
|
||||
{
|
||||
tmp16_ = in_common[n]; //printf("neon phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
|
||||
tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase);
|
||||
tmp16_ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_)));
|
||||
(*phase) *= phase_inc;
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
tmp = tmp16_ * in_a[n_vec][n];
|
||||
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_H*/
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h
|
||||
* \brief Volk puppet for the multiple 16-bit complex dot product kernel
|
||||
* \file volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic.h
|
||||
* \brief Volk puppet for the multiple 16-bit complex dot product kernel.
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
|
||||
* </ul>
|
||||
@ -69,6 +69,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic(lv_
|
||||
|
||||
#endif // Generic
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
@ -99,8 +100,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_1
|
||||
|
||||
#endif // SSE3
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
@ -130,8 +131,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_1
|
||||
|
||||
#endif // SSE3
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
@ -161,6 +162,37 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon(lv_16s
|
||||
|
||||
#endif // NEON
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
|
||||
int num_a_vectors = 3;
|
||||
lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(unsigned int n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
|
||||
}
|
||||
|
||||
volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(unsigned int n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // NEON
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_H
|
||||
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
|
||||
* \brief Volk protokernel: Resamples N 16 bits integer short complex vectors using zero hold resample algorithm.
|
||||
* \brief VOLK_GNSSSDR kernel: Resamples N 16 bits integer short complex vectors using zero hold resample algorithm.
|
||||
* \authors <ul>
|
||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that esamples N 16 bits integer short complex vectors using zero hold resample algorithm.
|
||||
* VOLK_GNSSSDR kernel that esamples N 16 bits integer short complex vectors using zero hold resample algorithm.
|
||||
* It is optimized to resample a sigle GNSS local code signal replica into N vectors fractional-resampled and fractional-delayed
|
||||
* (i.e. it creates the Early, Prompt, and Late code replicas)
|
||||
*
|
||||
@ -34,6 +34,31 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_16ic_xn_resampler_16ic_xn
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Resamples a complex vector (16-bit integer each component), providing \p num_out_vectors outputs.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_16ic_xn_resampler_16ic_xn(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li local_code: One of the vectors to be multiplied.
|
||||
* \li rem_code_phase_chips: Remnant code phase [chips].
|
||||
* \li code_phase_step_chips: Phase increment per sample [chips/sample].
|
||||
* \li code_length_chips: Code length in chips.
|
||||
* \li num_out_vectors Number of output vectors.
|
||||
* \li num_output_samples: The number of data values to be in the resampled vector.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li result: Pointer to a vector of pointers where the results will be stored.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_H
|
||||
|
||||
@ -49,16 +74,7 @@
|
||||
// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
|
||||
//}
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
|
||||
\param[out] result Pointer to the vector where the results will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_out_vectors Number of output vectors
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
|
||||
{
|
||||
int local_code_chip_index;
|
||||
@ -84,16 +100,6 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** re
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
|
||||
\param[out] result Pointer to the vector where the results will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_out_vectors Number of output vectors
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
|
||||
{
|
||||
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
|
||||
@ -125,7 +131,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
|
||||
|
||||
__m128i negative_indexes, overflow_indexes,_code_phase_out_int, _code_phase_out_int_neg,_code_phase_out_int_over;
|
||||
|
||||
__m128i zero=_mm_setzero_si128();
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
|
||||
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__m128 _4output_index = _mm_load_ps(init_idx_float);
|
||||
@ -148,11 +154,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
|
||||
_code_phase_out_with_offset = _mm_add_ps(_code_phase_out, _rem_code_phase); //add the phase offset
|
||||
_code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer
|
||||
|
||||
negative_indexes = _mm_cmplt_epi32 (_code_phase_out_int, zero); //test for negative values
|
||||
negative_indexes = _mm_cmplt_epi32(_code_phase_out_int, zero); //test for negative values
|
||||
_code_phase_out_int_neg = _mm_add_epi32(_code_phase_out_int, _code_length_chips); //the negative values branch
|
||||
_code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes,_mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
|
||||
_code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes, _mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
|
||||
|
||||
overflow_indexes = _mm_cmpgt_epi32 (_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
|
||||
overflow_indexes = _mm_cmpgt_epi32(_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
|
||||
_code_phase_out_int_over = _mm_sub_epi32(_code_phase_out_int_neg, _code_length_chips); //the negative values branch
|
||||
_code_phase_out_int_over = _mm_xor_si128(_code_phase_out_int_neg, _mm_and_si128( overflow_indexes, _mm_xor_si128( _code_phase_out_int_over, _code_phase_out_int_neg )));
|
||||
|
||||
@ -183,19 +189,10 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
|
||||
\param[out] result Pointer to the vector where the results will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_out_vectors Number of output vectors
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
|
||||
{
|
||||
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
|
||||
@ -227,7 +224,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
|
||||
|
||||
__m128i negative_indexes, overflow_indexes,_code_phase_out_int, _code_phase_out_int_neg,_code_phase_out_int_over;
|
||||
|
||||
__m128i zero=_mm_setzero_si128();
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
|
||||
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__m128 _4output_index = _mm_loadu_ps(init_idx_float);
|
||||
@ -250,11 +247,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
|
||||
_code_phase_out_with_offset = _mm_add_ps(_code_phase_out, _rem_code_phase); //add the phase offset
|
||||
_code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer
|
||||
|
||||
negative_indexes = _mm_cmplt_epi32 (_code_phase_out_int, zero); //test for negative values
|
||||
negative_indexes = _mm_cmplt_epi32(_code_phase_out_int, zero); //test for negative values
|
||||
_code_phase_out_int_neg = _mm_add_epi32(_code_phase_out_int, _code_length_chips); //the negative values branch
|
||||
_code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes,_mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
|
||||
_code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes, _mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
|
||||
|
||||
overflow_indexes = _mm_cmpgt_epi32 (_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
|
||||
overflow_indexes = _mm_cmpgt_epi32(_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
|
||||
_code_phase_out_int_over = _mm_sub_epi32(_code_phase_out_int_neg, _code_length_chips); //the negative values branch
|
||||
_code_phase_out_int_over = _mm_xor_si128(_code_phase_out_int_neg, _mm_and_si128( overflow_indexes, _mm_xor_si128( _code_phase_out_int_over, _code_phase_out_int_neg )));
|
||||
|
||||
@ -286,19 +283,10 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
|
||||
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
|
||||
\param[out] result Pointer to the vector where the results will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_out_vectors Number of output vectors
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
|
||||
{
|
||||
unsigned int number;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_32fc_convert_16ic.h
|
||||
* \brief Volk protokernel: converts float32 complex values to 16 integer complex values taking care of overflow
|
||||
* \brief VOLK_GNSSSDR kernel: converts float32 complex values to 16 integer complex values taking care of overflow.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
@ -30,6 +30,29 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_32fc_convert_16ic
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Converts a complex vector of 32-bits float each component into
|
||||
* a complex vector of 16-bits integer each component.
|
||||
* Values are saturated to the limit values of the output data type.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_32fc_convert_16ic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li inputVector: The complex 32-bit float input data buffer.
|
||||
* \li num_points: The number of data values to be converted.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li outputVector: The complex 16-bit integer output data buffer.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_H
|
||||
|
||||
@ -40,12 +63,6 @@
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -91,15 +108,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h> // __m64, __m128 ??
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -149,12 +161,6 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector,
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -200,15 +206,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points/4;
|
||||
@ -254,15 +255,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector,
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int neon_iters = num_points / 4;
|
||||
@ -318,14 +314,9 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
|
||||
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
float* inputVectorPtr = (float*)inputVector;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_32fc_convert_8ic.h
|
||||
* \brief Volk protokernel: converts float32 complex values to 8 integer complex values taking care of overflow
|
||||
* \brief VOLK_GNSSSDR kernel: converts float32 complex values to 8 integer complex values taking care of overflow.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
@ -30,6 +30,29 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_32fc_convert_8ic
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Converts a complex vector of 32-bits float each component into
|
||||
* a complex vector of 8-bits integer each component.
|
||||
* Values are saturated to the limit values of the output data type.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_32fc_convert_8ic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li inputVector: The complex 32-bit float input data buffer.
|
||||
* \li num_points: The number of data values to be converted.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li outputVector: The complex 8-bit integer output data buffer.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_H
|
||||
|
||||
@ -42,12 +65,6 @@
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 8-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
unsigned i = 0;
|
||||
@ -103,14 +120,9 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector,
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 8-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
float* inputVectorPtr = (float*)inputVector;
|
||||
@ -133,12 +145,6 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector,
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 8-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
@ -197,12 +203,6 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 8-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int neon_iters = num_points / 8;
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_64f_accumulator_64f.h
|
||||
* \brief Volk protokernel: 64 bits (double) scalar accumulator
|
||||
* \brief VOLK_GNSSSDR kernel: 64 bits (double) scalar accumulator.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that implements an accumulator of char values
|
||||
* VOLK_GNSSSDR kernel that implements an accumulator of double (64-bit float) values.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -32,6 +32,27 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_64f_accumulator_64f
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Accumulates the values in the input buffer.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_64f_accumulator_64f(double* result, const double* inputBuffer, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li inputBuffer: The buffer of data to be accumulated.
|
||||
* \li num_points: The number of values in \p inputBuffer to be accumulated.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li result: The accumulated result.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_64f_accumulator_64f_u_H
|
||||
#define INCLUDED_volk_gnsssdr_64f_accumulator_64f_u_H
|
||||
|
||||
@ -39,96 +60,91 @@
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_64f_accumulator_64f_u_avx(double* result,const double* inputBuffer, unsigned int num_points){
|
||||
|
||||
static inline void volk_gnsssdr_64f_accumulator_64f_u_avx(double* result, const double* inputBuffer, unsigned int num_points)
|
||||
{
|
||||
double returnValue = 0;
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
|
||||
|
||||
const double* aPtr = inputBuffer;
|
||||
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) double tempBuffer[4];
|
||||
__m256d accumulator = _mm256_setzero_pd();
|
||||
__m256d aVal = _mm256_setzero_pd();
|
||||
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
aVal = _mm256_loadu_pd(aPtr);
|
||||
accumulator = _mm256_add_pd(accumulator, aVal);
|
||||
aPtr += 4;
|
||||
}
|
||||
|
||||
_mm256_storeu_pd((double*)tempBuffer,accumulator);
|
||||
|
||||
for(unsigned int i = 0; i<4; ++i){
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i<(num_points % 4); ++i){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
{
|
||||
aVal = _mm256_loadu_pd(aPtr);
|
||||
accumulator = _mm256_add_pd(accumulator, aVal);
|
||||
aPtr += 4;
|
||||
}
|
||||
|
||||
_mm256_storeu_pd((double*)tempBuffer, accumulator);
|
||||
|
||||
for(unsigned int i = 0; i < 4; ++i)
|
||||
{
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i < (num_points % 4); ++i)
|
||||
{
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_AVX */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_64f_accumulator_64f_u_sse3(double* result,const double* inputBuffer, unsigned int num_points){
|
||||
|
||||
static inline void volk_gnsssdr_64f_accumulator_64f_u_sse3(double* result,const double* inputBuffer, unsigned int num_points)
|
||||
{
|
||||
double returnValue = 0;
|
||||
const unsigned int sse_iters = num_points / 2;
|
||||
|
||||
|
||||
const double* aPtr = inputBuffer;
|
||||
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) double tempBuffer[2];
|
||||
__m128d accumulator = _mm_setzero_pd();
|
||||
__m128d aVal = _mm_setzero_pd();
|
||||
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
aVal = _mm_loadu_pd(aPtr);
|
||||
accumulator = _mm_add_pd(accumulator, aVal);
|
||||
aPtr += 2;
|
||||
}
|
||||
|
||||
_mm_storeu_pd((double*)tempBuffer,accumulator);
|
||||
|
||||
for(unsigned int i = 0; i<2; ++i){
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i<(num_points % 2); ++i){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
{
|
||||
aVal = _mm_loadu_pd(aPtr);
|
||||
accumulator = _mm_add_pd(accumulator, aVal);
|
||||
aPtr += 2;
|
||||
}
|
||||
|
||||
_mm_storeu_pd((double*)tempBuffer, accumulator);
|
||||
|
||||
for(unsigned int i = 0; i < 2; ++i)
|
||||
{
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i < (num_points % 2); ++i)
|
||||
{
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const double* inputBuffer, unsigned int num_points){
|
||||
|
||||
static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const double* inputBuffer, unsigned int num_points)
|
||||
{
|
||||
const double* aPtr = inputBuffer;
|
||||
double returnValue = 0;
|
||||
|
||||
for(unsigned int number = 0;number < num_points; number++){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
for(unsigned int number = 0;number < num_points; number++)
|
||||
{
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
@ -136,78 +152,75 @@ static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_64f_accumulator_64f_a_avx(double* result,const double* inputBuffer, unsigned int num_points){
|
||||
|
||||
static inline void volk_gnsssdr_64f_accumulator_64f_a_avx(double* result,const double* inputBuffer, unsigned int num_points)
|
||||
{
|
||||
double returnValue = 0;
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
|
||||
|
||||
const double* aPtr = inputBuffer;
|
||||
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) double tempBuffer[4];
|
||||
__m256d accumulator = _mm256_setzero_pd();
|
||||
__m256d aVal = _mm256_setzero_pd();
|
||||
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
aVal = _mm256_load_pd(aPtr);
|
||||
accumulator = _mm256_add_pd(accumulator, aVal);
|
||||
aPtr += 4;
|
||||
}
|
||||
|
||||
_mm256_store_pd((double*)tempBuffer,accumulator);
|
||||
|
||||
for(unsigned int i = 0; i<4; ++i){
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i<(num_points % 4); ++i){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
{
|
||||
aVal = _mm256_load_pd(aPtr);
|
||||
accumulator = _mm256_add_pd(accumulator, aVal);
|
||||
aPtr += 4;
|
||||
}
|
||||
|
||||
_mm256_store_pd((double*)tempBuffer, accumulator);
|
||||
|
||||
for(unsigned int i = 0; i < 4; ++i)
|
||||
{
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i < (num_points % 4); ++i)
|
||||
{
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_AVX */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const double* inputBuffer, unsigned int num_points){
|
||||
|
||||
static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const double* inputBuffer, unsigned int num_points)
|
||||
{
|
||||
double returnValue = 0;
|
||||
const unsigned int sse_iters = num_points / 2;
|
||||
|
||||
|
||||
const double* aPtr = inputBuffer;
|
||||
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) double tempBuffer[2];
|
||||
__m128d accumulator = _mm_setzero_pd();
|
||||
__m128d aVal = _mm_setzero_pd();
|
||||
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
aVal = _mm_load_pd(aPtr);
|
||||
accumulator = _mm_add_pd(accumulator, aVal);
|
||||
aPtr += 2;
|
||||
}
|
||||
|
||||
_mm_store_pd((double*)tempBuffer,accumulator);
|
||||
|
||||
for(unsigned int i = 0; i<2; ++i){
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i<(num_points % 2); ++i){
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
{
|
||||
aVal = _mm_load_pd(aPtr);
|
||||
accumulator = _mm_add_pd(accumulator, aVal);
|
||||
aPtr += 2;
|
||||
}
|
||||
|
||||
_mm_store_pd((double*)tempBuffer, accumulator);
|
||||
|
||||
for(unsigned int i = 0; i < 2; ++i)
|
||||
{
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i < (num_points % 2); ++i)
|
||||
{
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
|
||||
*result = returnValue;
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8i_accumulator_s8i.h
|
||||
* \brief Volk protokernel: 8 bits (char) scalar accumulator
|
||||
* \brief VOLK_GNSSSDR kernel: 8 bits (char) scalar accumulator.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that implements an accumulator of char values
|
||||
* VOLK_GNSSSDR kernel that implements an accumulator of char values
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -32,6 +32,27 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_8i_accumulator_s8i
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Accumulates the values in the input buffer.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_8i_accumulator_s8i(char* result, const char* inputBuffer, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li inputBuffer: The buffer of data to be accumulated.
|
||||
* \li num_points: The number of values in \p inputBuffer to be accumulated.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li result: The accumulated result.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H
|
||||
|
||||
@ -40,12 +61,7 @@
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const char* inputBuffer, unsigned int num_points)
|
||||
{
|
||||
char returnValue = 0;
|
||||
@ -63,14 +79,14 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const ch
|
||||
accumulator = _mm_add_epi8(accumulator, aVal);
|
||||
aPtr += 16;
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)tempBuffer,accumulator);
|
||||
_mm_storeu_si128((__m128i*)tempBuffer, accumulator);
|
||||
|
||||
for(unsigned int i = 0; i<16; ++i)
|
||||
for(unsigned int i = 0; i < 16; ++i)
|
||||
{
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i<(num_points % 16); ++i)
|
||||
for(unsigned int i = 0; i < (num_points % 16); ++i)
|
||||
{
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
@ -79,13 +95,9 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const ch
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const char* inputBuffer, unsigned int num_points)
|
||||
{
|
||||
const char* aPtr = inputBuffer;
|
||||
@ -99,14 +111,10 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const c
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const char* inputBuffer, unsigned int num_points)
|
||||
{
|
||||
char returnValue = 0;
|
||||
@ -126,11 +134,12 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch
|
||||
}
|
||||
_mm_store_si128((__m128i*)tempBuffer,accumulator);
|
||||
|
||||
for(unsigned int i = 0; i<16; ++i){
|
||||
for(unsigned int i = 0; i < 16; ++i)
|
||||
{
|
||||
returnValue += tempBuffer[i];
|
||||
}
|
||||
}
|
||||
|
||||
for(unsigned int i = 0; i<(num_points % 16); ++i)
|
||||
for(unsigned int i = 0; i < (num_points % 16); ++i)
|
||||
{
|
||||
returnValue += (*aPtr++);
|
||||
}
|
||||
@ -139,13 +148,9 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Accumulates the values in the input buffer
|
||||
\param result The accumulated result
|
||||
\param inputBuffer The buffer of data to be accumulated
|
||||
\param num_points The number of values in inputBuffer to be accumulated
|
||||
*/
|
||||
|
||||
extern void volk_gnsssdr_8i_accumulator_s8i_a_orc_impl(short* result, const char* inputBuffer, unsigned int num_points);
|
||||
|
||||
static inline void volk_gnsssdr_8i_accumulator_s8i_u_orc(char* result, const char* inputBuffer, unsigned int num_points)
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8i_index_max_16u.h
|
||||
* \brief Volk protokernel: calculates the index of the maximum value in a group of 8 bits (char) scalars
|
||||
* \brief VOLK_GNSSSDR kernel: calculates the index of the maximum value in a group of 8 bits (char) scalars.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that returns the index of the maximum value of a group of 8 bits (char) scalars
|
||||
* VOLK_GNSSSDR kernel that returns the index of the maximum value of a group of 8 bits (char) scalars
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -32,6 +32,27 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_8i_index_max_16u
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Returns the index of the max value in \p src0
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_8i_index_max_16u(unsigned int* target, const char* src0, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li src0: The buffer of data to be analyzed.
|
||||
* \li num_points: The number of values in \p src0 to be analyzed.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li target: The index of the maximum value in \p src0
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_index_max_16u_H
|
||||
|
||||
@ -39,12 +60,7 @@
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -107,14 +123,10 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, con
|
||||
|
||||
#endif /*LV_HAVE_AVX*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -168,14 +180,10 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target,
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include<emmintrin.h>
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -235,13 +243,9 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, co
|
||||
|
||||
#endif /*LV_HAVE_SSE2*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -266,12 +270,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, c
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -334,14 +333,10 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con
|
||||
|
||||
#endif /*LV_HAVE_AVX*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -395,14 +390,10 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target,
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
/*!
|
||||
\brief Returns the index of the max value in src0
|
||||
\param target The index of the max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -463,5 +454,4 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, co
|
||||
#endif /*LV_HAVE_SSE2*/
|
||||
|
||||
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_H*/
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8i_max_s8i.h
|
||||
* \brief Volk protokernel: calculates the maximum value in a group of 8 bits (char) scalars
|
||||
* \brief VOLK_GNSSSDR kernel: calculates the maximum value in a group of 8 bits (char) scalars.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that returns the maximum value of a group of 8 bits (char) scalars
|
||||
* VOLK_GNSSSDR kernel that returns the maximum value of a group of 8 bits (char) scalars
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -32,6 +32,27 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_8i_max_s8i
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Returns the max value in \p src0
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_8i_max_s8i(char* target, const char* src0, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li src0: The buffer of data to be analyzed.
|
||||
* \li num_points: The number of values in \p src0 to be analyzed.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li target: The max value in \p src0
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_max_s8i_H
|
||||
|
||||
@ -39,12 +60,7 @@
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -89,14 +105,10 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char* target, const char* sr
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include<emmintrin.h>
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -152,13 +164,9 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char* target, const char* src0
|
||||
|
||||
#endif /*LV_HAVE_SSE2*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_max_s8i_generic(char* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -179,16 +187,9 @@ static inline void volk_gnsssdr_8i_max_s8i_generic(char* target, const char* src
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
@ -233,14 +234,10 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char* target, const char* sr
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
/*!
|
||||
\brief Returns the max value in src0
|
||||
\param target The max value in src0
|
||||
\param src0 The buffer of data to be analysed
|
||||
\param num_points The number of values in src0 to be analysed
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char* target, const char* src0, unsigned int num_points)
|
||||
{
|
||||
if(num_points > 0)
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8i_x2_add_8i.h
|
||||
* \brief Volk protokernel: adds pairs of 8 bits (char) scalars
|
||||
* \brief VOLK_GNSSSDR kernel: adds pairs of 8 bits (char) scalars.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that adds pairs of 8 bits (char) scalars
|
||||
* VOLK_GNSSSDR kernel that adds pairs of 8 bits (char) scalars
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -32,26 +32,42 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_8i_x2_add_8i
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Adds the two input vectors and store the results in the third vector.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_8i_x2_add_8i(char* cVector, const char* aVector, const char* bVector, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li aVector: One of the vectors of to be added.
|
||||
* \li bVector: The other vector to be added.
|
||||
* \li num_points: Number of values in \p aVector and \p bVector to be added together and stored into \p cVector
|
||||
*
|
||||
* \b Outputs
|
||||
* \li cVector: The vector where the result will be stored.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_H
|
||||
#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_H
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* cPtr = cVector;
|
||||
const char* aPtr = aVector;
|
||||
const char* bPtr= bVector;
|
||||
const char* bPtr = bVector;
|
||||
|
||||
__m128i aVal, bVal, cVal;
|
||||
|
||||
@ -62,7 +78,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a
|
||||
|
||||
cVal = _mm_add_epi8(aVal, bVal);
|
||||
|
||||
_mm_storeu_si128((__m128i*)cPtr,cVal); // Store the results back into the C container
|
||||
_mm_storeu_si128((__m128i*)cPtr, cVal); // Store the results back into the C container
|
||||
|
||||
aPtr += 16;
|
||||
bPtr += 16;
|
||||
@ -76,19 +92,14 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points)
|
||||
{
|
||||
char* cPtr = cVector;
|
||||
const char* aPtr = aVector;
|
||||
const char* bPtr= bVector;
|
||||
const char* bPtr = bVector;
|
||||
unsigned int number = 0;
|
||||
|
||||
for(number = 0; number < num_points; number++)
|
||||
@ -101,20 +112,14 @@ static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char*
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
|
||||
char* cPtr = cVector;
|
||||
const char* aPtr = aVector;
|
||||
const char* bPtr= bVector;
|
||||
const char* bPtr = bVector;
|
||||
|
||||
__m128i aVal, bVal, cVal;
|
||||
|
||||
@ -141,13 +146,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* a
|
||||
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Adds the two input vectors and store their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be added
|
||||
\param bVector One of the vectors to be added
|
||||
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
|
||||
*/
|
||||
|
||||
extern void volk_gnsssdr_8i_x2_add_8i_a_orc_impl(char* cVector, const char* aVector, const char* bVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8i_x2_add_8i_u_orc(char* cVector, const char* aVector, const char* bVector, unsigned int num_points)
|
||||
{
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_conjugate_8ic.h
|
||||
* \brief Volk protokernel: calculates the conjugate of a 16 bits vector
|
||||
* \brief VOLK_GNSSSDR kernel: calculates the conjugate of a 16 bits vector.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that calculates the conjugate of a
|
||||
* VOLK_GNSSSDR kernel that calculates the conjugate of a
|
||||
* 16 bits vector (8 bits the real part and 8 bits the imaginary part)
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
@ -33,6 +33,27 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_8ic_conjugate_8ic
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Takes the conjugate of a complex unsigned char vector.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_8ic_conjugate_8ic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li aVector: Vector of complex items to be conjugated
|
||||
* \li num_points: The number of complex data points.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li cVector: The vector where the result will be stored
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H
|
||||
|
||||
@ -40,12 +61,7 @@
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
@ -81,14 +97,10 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const
|
||||
}
|
||||
#endif /* LV_HAVE_AVX */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
@ -116,14 +128,10 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, con
|
||||
}
|
||||
#endif /* LV_HAVE_SSSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
@ -153,13 +161,9 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, cons
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
|
||||
{
|
||||
lv_8sc_t* cPtr = cVector;
|
||||
@ -176,12 +180,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, con
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
@ -217,14 +216,10 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const
|
||||
}
|
||||
#endif /* LV_HAVE_AVX */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
@ -252,14 +247,10 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, con
|
||||
}
|
||||
#endif /* LV_HAVE_SSSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
@ -291,12 +282,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons
|
||||
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
|
||||
extern void volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
|
||||
{
|
||||
@ -307,12 +293,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
/*!
|
||||
\brief Takes the conjugate of an unsigned char vector.
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector Vector to be conjugated
|
||||
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_conjugate_8ic_neon(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
@ -335,7 +316,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_neon(lv_8sc_t* cVector, const
|
||||
{
|
||||
*c++ = lv_conj(*a++);
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_magnitude_squared_8i.h
|
||||
* \brief Volk protokernel: calculates the magnitude squared of a 16 bits vector
|
||||
* \brief VOLK_GNSSSDR kernel: calculates the magnitude squared of a 16 bits vector.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that calculates the magnitude squared of a
|
||||
* VOLK_GNSSSDR kernel that calculates the magnitude squared of a
|
||||
* 16 bits vector (8 bits the real part and 8 bits the imaginary part)
|
||||
* result = (real*real) + (imag*imag)
|
||||
*
|
||||
@ -34,18 +34,34 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_8ic_magnitude_squared_8i
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Calculates the magnitude squared of the complex data items in \p complexVector and stores the results in \p magnitudeVector
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_8ic_magnitude_squared_8i(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li complexVector: The vector containing the complex input values
|
||||
* \li num_points: The number of complex data points.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li magnitudeVector: The vector containing the real output values
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_H
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
@ -99,12 +115,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeV
|
||||
|
||||
//#ifdef LV_HAVE_SSE
|
||||
//#include <xmmintrin.h>
|
||||
///*!
|
||||
// \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
// \param complexVector The vector containing the complex input values
|
||||
// \param magnitudeVector The vector containing the real output values
|
||||
// \param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
// */
|
||||
//
|
||||
//static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
|
||||
// unsigned int number = 0;
|
||||
// const unsigned int quarterPoints = num_points / 4;
|
||||
@ -144,12 +155,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeV
|
||||
//#endif /* LV_HAVE_SSE */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points)
|
||||
{
|
||||
const char* complexVectorPtr = (char*)complexVector;
|
||||
@ -167,12 +173,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitude
|
||||
|
||||
#ifdef LV_HAVE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
@ -226,12 +227,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV
|
||||
|
||||
//#ifdef LV_HAVE_SSE
|
||||
//#include <xmmintrin.h>
|
||||
///*!
|
||||
// \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
// \param complexVector The vector containing the complex input values
|
||||
// \param magnitudeVector The vector containing the real output values
|
||||
// \param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
// */
|
||||
//
|
||||
//static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
|
||||
// unsigned int number = 0;
|
||||
// const unsigned int quarterPoints = num_points / 4;
|
||||
@ -272,12 +268,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV
|
||||
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
|
||||
\param complexVector The vector containing the complex input values
|
||||
\param magnitudeVector The vector containing the real output values
|
||||
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
|
||||
*/
|
||||
|
||||
extern void volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_orc(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points)
|
||||
{
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_s8ic_multiply_8ic.h
|
||||
* \brief Volk protokernel: multiplies a group of 16 bits vectors by one constant vector
|
||||
* \brief VOLK_GNSSSDR kernel: multiplies a group of 16 bits vectors by one constant vector.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies a group of 16 bits vectors
|
||||
* VOLK_GNSSSDR kernel that multiplies a group of 16 bits vectors
|
||||
* (8 bits the real part and 8 bits the imaginary part) by one constant vector
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
@ -33,6 +33,28 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_8ic_s8ic_multiply_8ic
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_8ic_s8ic_multiply_8ic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li aVector: The vector to be multiplied.
|
||||
* \li scalar The complex scalar to multiply \p aVector
|
||||
* \li num_points: The number of complex values in \p aVector to be multiplied by \p scalar and stored into \p cVector.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li cVector: The vector where the results will be stored
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_H
|
||||
|
||||
@ -40,13 +62,7 @@
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
|
||||
{
|
||||
unsigned int number = 0;
|
||||
@ -59,31 +75,31 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector,
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
y = _mm_set1_epi16 (*(short*)&scalar);
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
y = _mm_set1_epi16(*(short*)&scalar);
|
||||
imagy = _mm_srli_si128(y, 1);
|
||||
imagy = _mm_and_si128(imagy, mult1);
|
||||
realy = _mm_and_si128(y, mult1);
|
||||
|
||||
for(; number < sse_iters; number++)
|
||||
{
|
||||
x = _mm_lddqu_si128((__m128i*)a);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
imagx = _mm_srli_si128(x, 1);
|
||||
imagx = _mm_and_si128(imagx, mult1);
|
||||
realx = _mm_and_si128(x, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
realx_mult_realy = _mm_mullo_epi16(realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128 (realc, mult1);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128 (imagc, mult1);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128(realc, mult1);
|
||||
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128(imagc, mult1);
|
||||
imagc = _mm_slli_si128(imagc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realc, imagc);
|
||||
totalc = _mm_or_si128(realc, imagc);
|
||||
|
||||
_mm_storeu_si128((__m128i*)c, totalc);
|
||||
|
||||
@ -99,14 +115,9 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector,
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
|
||||
{
|
||||
/*lv_8sc_t* cPtr = cVector;
|
||||
@ -144,13 +155,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector,
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
|
||||
{
|
||||
unsigned int number = 0;
|
||||
@ -163,31 +168,31 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
|
||||
y = _mm_set1_epi16 (*(short*)&scalar);
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
y = _mm_set1_epi16(*(short*)&scalar);
|
||||
imagy = _mm_srli_si128(y, 1);
|
||||
imagy = _mm_and_si128(imagy, mult1);
|
||||
realy = _mm_and_si128(y, mult1);
|
||||
|
||||
for(; number < sse_iters; number++)
|
||||
{
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
imagx = _mm_srli_si128(x, 1);
|
||||
imagx = _mm_and_si128(imagx, mult1);
|
||||
realx = _mm_and_si128(x, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
realx_mult_realy = _mm_mullo_epi16(realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128 (realc, mult1);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128 (imagc, mult1);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128(realc, mult1);
|
||||
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128(imagc, mult1);
|
||||
imagc = _mm_slli_si128(imagc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realc, imagc);
|
||||
totalc = _mm_or_si128(realc, imagc);
|
||||
|
||||
_mm_store_si128((__m128i*)c, totalc);
|
||||
|
||||
@ -205,13 +210,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
|
||||
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Multiplies the input vector by a scalar and stores the results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector The vector to be multiplied
|
||||
\param scalar The complex scalar to multiply aVector
|
||||
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
|
||||
*/
|
||||
|
||||
extern void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const char scalarreal, const char scalarimag, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
|
||||
{
|
||||
|
@ -1,12 +1,12 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_x2_dot_prod_8ic.h
|
||||
* \brief Volk protokernel: multiplies two 16 bits vectors and accumulates them
|
||||
* \brief VOLK_GNSSSDR kernel: multiplies two 16 bits vectors and accumulates them.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* and 8 bits the imaginary part) and accumulates them
|
||||
* VOLK_GNSSSDR kernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* and 8 bits the imaginary part) and accumulates them.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -33,6 +33,29 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_8ic_x2_dot_prod_8ic
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Multiplies two input complex vectors (8-bit integer each component) and accumulates them,
|
||||
* storing the result.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_16ic_x2_dot_prod_16ic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li in_a: One of the vectors to be multiplied and accumulated
|
||||
* \li in_b: The other vector to be multiplied and accumulated
|
||||
* \li num_points: The Number of complex values to be multiplied together, accumulated and stored into \p result
|
||||
*
|
||||
* \b Outputs
|
||||
* \li result: Value of the accumulated result
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H
|
||||
|
||||
@ -42,26 +65,19 @@
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
/*lv_8sc_t* cPtr = result;
|
||||
const lv_8sc_t* aPtr = input;
|
||||
const lv_8sc_t* bPtr = taps;
|
||||
const lv_8sc_t* aPtr = in_a;
|
||||
const lv_8sc_t* bPtr = in_b;
|
||||
|
||||
for(int number = 0; number < num_points; number++){
|
||||
*cPtr += (*aPtr++) * (*bPtr++);
|
||||
}*/
|
||||
|
||||
char * res = (char*) result;
|
||||
char * in = (char*) input;
|
||||
char * tp = (char*) taps;
|
||||
char * in = (char*) in_a;
|
||||
char * tp = (char*) in_b;
|
||||
unsigned int n_2_ccomplex_blocks = num_points/2;
|
||||
unsigned int isodd = num_points & 1;
|
||||
|
||||
@ -86,29 +102,23 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, co
|
||||
// Cleanup if we had an odd number of points
|
||||
for(i = 0; i < isodd; ++i)
|
||||
{
|
||||
*result += input[num_points - 1] * taps[num_points - 1];
|
||||
*result += in_a[num_points - 1] * in_b[num_points - 1];
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_8sc_t dotProduct;
|
||||
memset(&dotProduct, 0x0, 2*sizeof(char));
|
||||
|
||||
const lv_8sc_t* a = input;
|
||||
const lv_8sc_t* b = taps;
|
||||
const lv_8sc_t* a = in_a;
|
||||
const lv_8sc_t* b = in_b;
|
||||
|
||||
const unsigned int sse_iters = num_points/8;
|
||||
|
||||
@ -125,40 +135,40 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
|
||||
x = _mm_loadu_si128((__m128i*)a);
|
||||
y = _mm_loadu_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
imagx = _mm_srli_si128(x, 1);
|
||||
imagx = _mm_and_si128(imagx, mult1);
|
||||
realx = _mm_and_si128(x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
imagy = _mm_srli_si128(y, 1);
|
||||
imagy = _mm_and_si128(imagy, mult1);
|
||||
realy = _mm_and_si128(y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
realx_mult_realy = _mm_mullo_epi16(realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
realcacc = _mm_add_epi16 (realcacc, realc);
|
||||
imagcacc = _mm_add_epi16 (imagcacc, imagc);
|
||||
realcacc = _mm_add_epi16(realcacc, realc);
|
||||
imagcacc = _mm_add_epi16(imagcacc, imagc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
|
||||
realcacc = _mm_and_si128 (realcacc, mult1);
|
||||
imagcacc = _mm_and_si128 (imagcacc, mult1);
|
||||
imagcacc = _mm_slli_si128 (imagcacc, 1);
|
||||
realcacc = _mm_and_si128(realcacc, mult1);
|
||||
imagcacc = _mm_and_si128(imagcacc, mult1);
|
||||
imagcacc = _mm_slli_si128(imagcacc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realcacc, imagcacc);
|
||||
totalc = _mm_or_si128(realcacc, imagcacc);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_storeu_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
|
||||
_mm_storeu_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
|
||||
|
||||
for (int i = 0; i<8; ++i)
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
dotProduct += dotProductVector[i];
|
||||
}
|
||||
@ -174,23 +184,17 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
|
||||
|
||||
#endif /*LV_HAVE_SSE2*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_8sc_t dotProduct;
|
||||
memset(&dotProduct, 0x0, 2*sizeof(char));
|
||||
|
||||
const lv_8sc_t* a = input;
|
||||
const lv_8sc_t* b = taps;
|
||||
const lv_8sc_t* a = in_a;
|
||||
const lv_8sc_t* b = in_b;
|
||||
|
||||
const unsigned int sse_iters = num_points/8;
|
||||
|
||||
@ -207,24 +211,24 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
|
||||
x = _mm_lddqu_si128((__m128i*)a);
|
||||
y = _mm_lddqu_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
imagx = _mm_srli_si128(x, 1);
|
||||
imagx = _mm_and_si128(imagx, mult1);
|
||||
realx = _mm_and_si128(x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
imagy = _mm_srli_si128(y, 1);
|
||||
imagy = _mm_and_si128(imagy, mult1);
|
||||
realy = _mm_and_si128(y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
realx_mult_realy = _mm_mullo_epi16(realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
realcacc = _mm_add_epi16 (realcacc, realc);
|
||||
imagcacc = _mm_add_epi16 (imagcacc, imagc);
|
||||
realcacc = _mm_add_epi16(realcacc, realc);
|
||||
imagcacc = _mm_add_epi16(imagcacc, imagc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
@ -236,9 +240,9 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_storeu_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
|
||||
_mm_storeu_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
|
||||
|
||||
for (unsigned int i = 0; i<8; ++i)
|
||||
for (unsigned int i = 0; i < 8; ++i)
|
||||
{
|
||||
dotProduct += dotProductVector[i];
|
||||
}
|
||||
@ -258,20 +262,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_8sc_t dotProduct;
|
||||
memset(&dotProduct, 0x0, 2*sizeof(char));
|
||||
|
||||
const lv_8sc_t* a = input;
|
||||
const lv_8sc_t* b = taps;
|
||||
const lv_8sc_t* a = in_a;
|
||||
const lv_8sc_t* b = in_b;
|
||||
|
||||
const unsigned int sse_iters = num_points/8;
|
||||
|
||||
@ -288,40 +285,40 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
y = _mm_load_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
imagx = _mm_srli_si128(x, 1);
|
||||
imagx = _mm_and_si128(imagx, mult1);
|
||||
realx = _mm_and_si128(x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
imagy = _mm_srli_si128(y, 1);
|
||||
imagy = _mm_and_si128(imagy, mult1);
|
||||
realy = _mm_and_si128(y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
realx_mult_realy = _mm_mullo_epi16(realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
realcacc = _mm_add_epi16 (realcacc, realc);
|
||||
imagcacc = _mm_add_epi16 (imagcacc, imagc);
|
||||
realcacc = _mm_add_epi16(realcacc, realc);
|
||||
imagcacc = _mm_add_epi16(imagcacc, imagc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
|
||||
realcacc = _mm_and_si128 (realcacc, mult1);
|
||||
imagcacc = _mm_and_si128 (imagcacc, mult1);
|
||||
imagcacc = _mm_slli_si128 (imagcacc, 1);
|
||||
realcacc = _mm_and_si128(realcacc, mult1);
|
||||
imagcacc = _mm_and_si128(imagcacc, mult1);
|
||||
imagcacc = _mm_slli_si128(imagcacc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realcacc, imagcacc);
|
||||
totalc = _mm_or_si128(realcacc, imagcacc);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_store_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
|
||||
_mm_store_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
|
||||
|
||||
for (unsigned int i = 0; i<8; ++i)
|
||||
for (unsigned int i = 0; i < 8; ++i)
|
||||
{
|
||||
dotProduct += dotProductVector[i];
|
||||
}
|
||||
@ -340,24 +337,17 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_8sc_t dotProduct;
|
||||
memset(&dotProduct, 0x0, 2*sizeof(char));
|
||||
|
||||
const lv_8sc_t* a = input;
|
||||
const lv_8sc_t* b = taps;
|
||||
const lv_8sc_t* a = in_a;
|
||||
const lv_8sc_t* b = in_b;
|
||||
|
||||
const unsigned int sse_iters = num_points/8;
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
|
||||
if (sse_iters>0)
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc;
|
||||
|
||||
@ -370,24 +360,24 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
y = _mm_load_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
imagx = _mm_srli_si128(x, 1);
|
||||
imagx = _mm_and_si128(imagx, mult1);
|
||||
realx = _mm_and_si128(x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
imagy = _mm_srli_si128(y, 1);
|
||||
imagy = _mm_and_si128(imagy, mult1);
|
||||
realy = _mm_and_si128(y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
realx_mult_realy = _mm_mullo_epi16(realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
|
||||
|
||||
realcacc = _mm_add_epi16 (realcacc, realc);
|
||||
imagcacc = _mm_add_epi16 (imagcacc, imagc);
|
||||
realcacc = _mm_add_epi16(realcacc, realc);
|
||||
imagcacc = _mm_add_epi16(imagcacc, imagc);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
@ -399,9 +389,9 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_store_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
|
||||
_mm_store_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
|
||||
|
||||
for (unsigned int i = 0; i<8; ++i)
|
||||
for (unsigned int i = 0; i < 8; ++i)
|
||||
{
|
||||
dotProduct += dotProductVector[i];
|
||||
}
|
||||
@ -417,17 +407,11 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
|
||||
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
short resReal = 0;
|
||||
char* resRealChar = (char*)&resReal;
|
||||
@ -437,7 +421,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons
|
||||
char* resImagChar = (char*)&resImag;
|
||||
resImagChar++;
|
||||
|
||||
volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(&resReal, &resImag, input, taps, num_points);
|
||||
volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(&resReal, &resImag, in_a, in_b, num_points);
|
||||
|
||||
*result = lv_cmake(*resRealChar, *resImagChar);
|
||||
}
|
||||
@ -447,21 +431,14 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_8sc_t dotProduct;
|
||||
dotProduct = lv_cmake(0,0);
|
||||
*result = lv_cmake(0,0);
|
||||
|
||||
const lv_8sc_t* a = input;
|
||||
const lv_8sc_t* b = taps;
|
||||
const lv_8sc_t* a = in_a;
|
||||
const lv_8sc_t* b = in_b;
|
||||
// for 2-lane vectors, 1st lane holds the real part,
|
||||
// 2nd lane holds the imaginary part
|
||||
int8x8x2_t a_val, b_val, c_val, accumulator, tmp_real, tmp_imag;
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8ic_x2_multiply_8ic.h
|
||||
* \brief Volk protokernel: multiplies two 16 bits vectors
|
||||
* \brief VOLK_GNSSSDR kernel: multiplies two 16 bits vectors.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* VOLK_GNSSSDR kernel that multiplies two 16 bits vectors (8 bits the real part
|
||||
* and 8 bits the imaginary part)
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
@ -33,6 +33,28 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_8ic_x2_multiply_8ic
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Multiplies two input complex vectors, point-by-point, storing the result in the third vector
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_8ic_x2_multiply_8ic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li aVector: One of the vectors to be multiplied
|
||||
* \li bVector: The other vector to be multiplied
|
||||
* \li num_points: The number of complex data points.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li cVector: The vector where the result will be stored
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H
|
||||
#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H
|
||||
|
||||
@ -41,13 +63,6 @@
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
@ -99,16 +114,10 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
@ -160,15 +169,9 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector,
|
||||
}
|
||||
#endif /* LV_HAVE_SSE4_1 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
lv_8sc_t* cPtr = cVector;
|
||||
@ -186,13 +189,6 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
@ -244,16 +240,10 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 8;
|
||||
@ -310,13 +300,6 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
|
||||
|
||||
extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points);
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(cVector, aVector, bVector, num_points);
|
||||
|
@ -1,11 +1,11 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_8u_x2_multiply_8u.h
|
||||
* \brief Volk protokernel: multiplies unsigned char values
|
||||
* \brief VOLK_GNSSSDR kernel: multiplies unsigned char values.
|
||||
* \authors <ul>
|
||||
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Volk protokernel that multiplies unsigned char values (8 bits data)
|
||||
* VOLK_GNSSSDR kernel that multiplies unsigned char values (8 bits data)
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
@ -32,19 +32,36 @@
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_8u_x2_multiply_8u
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Multiplies two input vectors of unsigned char, point-by-point, storing the result in the third vector
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_8u_x2_multiply_8u(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li aChar: One of the vectors to be multiplied
|
||||
* \li bChar: The other vector to be multiplied
|
||||
* \li num_points: The number of complex data points.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li cChar: The vector where the result will be stored
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H
|
||||
#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
/*!
|
||||
\brief Multiplies the two input unsigned char values and stores their results in the third unsigned char
|
||||
\param cChar The unsigned char where the results will be stored
|
||||
\param aChar One of the unsigned char to be multiplied
|
||||
\param bChar One of the unsigned char to be multiplied
|
||||
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
@ -60,21 +77,21 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, c
|
||||
y = _mm_lddqu_si128((__m128i*)b);
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
x1 = _mm_srli_si128 (x, 1);
|
||||
x1 = _mm_and_si128 (x1, mult1);
|
||||
x2 = _mm_and_si128 (x, mult1);
|
||||
x1 = _mm_srli_si128(x, 1);
|
||||
x1 = _mm_and_si128(x1, mult1);
|
||||
x2 = _mm_and_si128(x, mult1);
|
||||
|
||||
y1 = _mm_srli_si128 (y, 1);
|
||||
y1 = _mm_and_si128 (y1, mult1);
|
||||
y2 = _mm_and_si128 (y, mult1);
|
||||
y1 = _mm_srli_si128(y, 1);
|
||||
y1 = _mm_and_si128(y1, mult1);
|
||||
y2 = _mm_and_si128(y, mult1);
|
||||
|
||||
x1_mult_y1 = _mm_mullo_epi16 (x1, y1);
|
||||
x2_mult_y2 = _mm_mullo_epi16 (x2, y2);
|
||||
x1_mult_y1 = _mm_mullo_epi16(x1, y1);
|
||||
x2_mult_y2 = _mm_mullo_epi16(x2, y2);
|
||||
|
||||
tmp = _mm_and_si128 (x1_mult_y1, mult1);
|
||||
tmp1 = _mm_slli_si128 (tmp, 1);
|
||||
tmp2 = _mm_and_si128 (x2_mult_y2, mult1);
|
||||
totalc = _mm_or_si128 (tmp1, tmp2);
|
||||
tmp = _mm_and_si128(x1_mult_y1, mult1);
|
||||
tmp1 = _mm_slli_si128(tmp, 1);
|
||||
tmp2 = _mm_and_si128(x2_mult_y2, mult1);
|
||||
totalc = _mm_or_si128(tmp1, tmp2);
|
||||
|
||||
_mm_storeu_si128((__m128i*)c, totalc);
|
||||
|
||||
@ -91,13 +108,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, c
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
|
||||
\param cChar The unsigned char where the results will be stored
|
||||
\param aChar One of the unsigned char to be multiplied
|
||||
\param bChar One of the unsigned char to be multiplied
|
||||
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points)
|
||||
{
|
||||
unsigned char* cPtr = cChar;
|
||||
@ -115,13 +126,6 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar,
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
|
||||
\param cChar The unsigned char where the results will be stored
|
||||
\param aChar One of the unsigned char to be multiplied
|
||||
\param bChar One of the unsigned char to be multiplied
|
||||
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
|
||||
*/
|
||||
static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 16;
|
||||
@ -137,21 +141,21 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c
|
||||
y = _mm_load_si128((__m128i*)b);
|
||||
|
||||
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
|
||||
x1 = _mm_srli_si128 (x, 1);
|
||||
x1 = _mm_and_si128 (x1, mult1);
|
||||
x2 = _mm_and_si128 (x, mult1);
|
||||
x1 = _mm_srli_si128(x, 1);
|
||||
x1 = _mm_and_si128(x1, mult1);
|
||||
x2 = _mm_and_si128(x, mult1);
|
||||
|
||||
y1 = _mm_srli_si128 (y, 1);
|
||||
y1 = _mm_and_si128 (y1, mult1);
|
||||
y2 = _mm_and_si128 (y, mult1);
|
||||
y1 = _mm_srli_si128(y, 1);
|
||||
y1 = _mm_and_si128(y1, mult1);
|
||||
y2 = _mm_and_si128(y, mult1);
|
||||
|
||||
x1_mult_y1 = _mm_mullo_epi16 (x1, y1);
|
||||
x2_mult_y2 = _mm_mullo_epi16 (x2, y2);
|
||||
x1_mult_y1 = _mm_mullo_epi16(x1, y1);
|
||||
x2_mult_y2 = _mm_mullo_epi16(x2, y2);
|
||||
|
||||
tmp = _mm_and_si128 (x1_mult_y1, mult1);
|
||||
tmp1 = _mm_slli_si128 (tmp, 1);
|
||||
tmp2 = _mm_and_si128 (x2_mult_y2, mult1);
|
||||
totalc = _mm_or_si128 (tmp1, tmp2);
|
||||
tmp = _mm_and_si128(x1_mult_y1, mult1);
|
||||
tmp1 = _mm_slli_si128(tmp, 1);
|
||||
tmp2 = _mm_and_si128(x2_mult_y2, mult1);
|
||||
totalc = _mm_or_si128(tmp1, tmp2);
|
||||
|
||||
_mm_store_si128((__m128i*)c, totalc);
|
||||
|
||||
@ -169,13 +173,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c
|
||||
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
|
||||
\param cChar The unsigned char where the results will be stored
|
||||
\param aChar One of the unsigned char to be multiplied
|
||||
\param bChar One of the unsigned char to be multiplied
|
||||
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
|
||||
*/
|
||||
|
||||
extern void volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl(unsigned char* cVector, const unsigned char* aVector, const unsigned char* bVector, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8u_x2_multiply_8u_u_orc(unsigned char* cVector, const unsigned char* aVector, const unsigned char* bVector, unsigned int num_points)
|
||||
{
|
||||
|
@ -106,7 +106,7 @@ macro(check_arch arch_name)
|
||||
set(flags ${ARGN})
|
||||
set(have_${arch_name} TRUE)
|
||||
foreach(flag ${flags})
|
||||
if ( (${COMPILER_NAME} STREQUAL "MSVC") AND (${flag} STREQUAL "/arch:SSE2" OR ${flag} STREQUAL "/arch:SSE" ))
|
||||
if (MSVC AND (${flag} STREQUAL "/arch:SSE2" OR ${flag} STREQUAL "/arch:SSE" ))
|
||||
# SSE/SSE2 is supported in MSVC since VS 2005 but flag not available when compiling 64-bit so do not check
|
||||
else()
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
@ -1,183 +0,0 @@
|
||||
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/* %ecx */
|
||||
#define bit_SSE3 (1 << 0)
|
||||
#define bit_PCLMUL (1 << 1)
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#define bit_FMA (1 << 12)
|
||||
#define bit_CMPXCHG16B (1 << 13)
|
||||
#define bit_SSE4_1 (1 << 19)
|
||||
#define bit_SSE4_2 (1 << 20)
|
||||
#define bit_MOVBE (1 << 22)
|
||||
#define bit_POPCNT (1 << 23)
|
||||
#define bit_AES (1 << 25)
|
||||
#define bit_XSAVE (1 << 26)
|
||||
#define bit_OSXSAVE (1 << 27)
|
||||
#define bit_AVX (1 << 28)
|
||||
#define bit_F16C (1 << 29)
|
||||
#define bit_RDRND (1 << 30)
|
||||
|
||||
/* %edx */
|
||||
#define bit_CMPXCHG8B (1 << 8)
|
||||
#define bit_CMOV (1 << 15)
|
||||
#define bit_MMX (1 << 23)
|
||||
#define bit_FXSAVE (1 << 24)
|
||||
#define bit_SSE (1 << 25)
|
||||
#define bit_SSE2 (1 << 26)
|
||||
|
||||
/* Extended Features */
|
||||
/* %ecx */
|
||||
#define bit_LAHF_LM (1 << 0)
|
||||
#define bit_ABM (1 << 5)
|
||||
#define bit_SSE4a (1 << 6)
|
||||
#define bit_XOP (1 << 11)
|
||||
#define bit_LWP (1 << 15)
|
||||
#define bit_FMA4 (1 << 16)
|
||||
#define bit_TBM (1 << 21)
|
||||
|
||||
/* %edx */
|
||||
#define bit_MMXEXT (1 << 22)
|
||||
#define bit_LM (1 << 29)
|
||||
#define bit_3DNOWP (1 << 30)
|
||||
#define bit_3DNOW (1 << 31)
|
||||
|
||||
/* Extended Features (%eax == 7) */
|
||||
#define bit_FSGSBASE (1 << 0)
|
||||
#define bit_BMI (1 << 3)
|
||||
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
/* %ebx may be the PIC register. */
|
||||
#if __GNUC__ >= 3
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
__asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
|
||||
"cpuid\n\t" \
|
||||
"xchg{l}\t{%%}ebx, %1\n\t" \
|
||||
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level))
|
||||
|
||||
#define __cpuid_count(level, count, a, b, c, d) \
|
||||
__asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
|
||||
"cpuid\n\t" \
|
||||
"xchg{l}\t{%%}ebx, %1\n\t" \
|
||||
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level), "2" (count))
|
||||
#else
|
||||
/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
|
||||
nor alternatives in i386 code. */
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
__asm__ ("xchgl\t%%ebx, %1\n\t" \
|
||||
"cpuid\n\t" \
|
||||
"xchgl\t%%ebx, %1\n\t" \
|
||||
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level))
|
||||
|
||||
#define __cpuid_count(level, count, a, b, c, d) \
|
||||
__asm__ ("xchgl\t%%ebx, %1\n\t" \
|
||||
"cpuid\n\t" \
|
||||
"xchgl\t%%ebx, %1\n\t" \
|
||||
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level), "2" (count))
|
||||
#endif
|
||||
#else
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
__asm__ ("cpuid\n\t" \
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level))
|
||||
|
||||
#define __cpuid_count(level, count, a, b, c, d) \
|
||||
__asm__ ("cpuid\n\t" \
|
||||
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
|
||||
: "0" (level), "2" (count))
|
||||
#endif
|
||||
|
||||
/* Return highest supported input value for cpuid instruction. ext can
|
||||
be either 0x0 or 0x8000000 to return highest supported value for
|
||||
basic or extended cpuid information. Function returns 0 if cpuid
|
||||
is not supported or whatever cpuid returns in eax register. If sig
|
||||
pointer is non-null, then first four bytes of the signature
|
||||
(as found in ebx register) are returned in location pointed by sig. */
|
||||
|
||||
static __inline unsigned int
|
||||
__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
|
||||
{
|
||||
unsigned int __eax, __ebx, __ecx, __edx;
|
||||
|
||||
#ifndef __x86_64__
|
||||
/* See if we can use cpuid. On AMD64 we always can. */
|
||||
#if __GNUC__ >= 3
|
||||
__asm__ ("pushf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"mov{l}\t{%0, %1|%1, %0}\n\t"
|
||||
"xor{l}\t{%2, %0|%0, %2}\n\t"
|
||||
"push{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
: "=&r" (__eax), "=&r" (__ebx)
|
||||
: "i" (0x00200000));
|
||||
#else
|
||||
/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
|
||||
nor alternatives in i386 code. */
|
||||
__asm__ ("pushfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"movl\t%0, %1\n\t"
|
||||
"xorl\t%2, %0\n\t"
|
||||
"pushl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
: "=&r" (__eax), "=&r" (__ebx)
|
||||
: "i" (0x00200000));
|
||||
#endif
|
||||
|
||||
if (!((__eax ^ __ebx) & 0x00200000))
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
/* Host supports cpuid. Return highest supported cpuid input value. */
|
||||
__cpuid (__ext, __eax, __ebx, __ecx, __edx);
|
||||
|
||||
if (__sig)
|
||||
*__sig = __ebx;
|
||||
|
||||
return __eax;
|
||||
}
|
||||
|
||||
/* Return cpuid data for requested cpuid level, as found in returned
|
||||
eax, ebx, ecx and edx registers. The function checks if cpuid is
|
||||
supported and returns 1 for valid cpuid information or 0 for
|
||||
unsupported cpuid level. All pointers are required to be non-null. */
|
||||
|
||||
static __inline int
|
||||
__get_cpuid (unsigned int __level,
|
||||
unsigned int *__eax, unsigned int *__ebx,
|
||||
unsigned int *__ecx, unsigned int *__edx)
|
||||
{
|
||||
unsigned int __ext = __level & 0x80000000;
|
||||
|
||||
if (__get_cpuid_max (__ext, 0) < __level)
|
||||
return 0;
|
||||
|
||||
__cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
|
||||
return 1;
|
||||
}
|
@ -33,6 +33,7 @@ struct VOLK_CPU volk_gnsssdr_cpu;
|
||||
#if defined(__GNUC__)
|
||||
#include <cpuid.h>
|
||||
#define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3)
|
||||
#define cpuid_x86_count(op, count, regs) __cpuid_count(op, count, *((unsigned int*)regs), *((unsigned int*)regs+1), *((unsigned int*)regs+2), *((unsigned int*)regs+3))
|
||||
|
||||
/* Return Intel AVX extended CPU capabilities register.
|
||||
* This function will bomb on non-AVX-capable machines, so
|
||||
@ -68,8 +69,8 @@ struct VOLK_CPU volk_gnsssdr_cpu;
|
||||
|
||||
static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit) {
|
||||
#if defined(VOLK_CPU_x86)
|
||||
unsigned int regs[4];
|
||||
__cpuid_count(level, count, regs[0], regs[1], regs[2], regs[3]);
|
||||
unsigned int regs[4] = {0};
|
||||
cpuid_x86_count(level, count, regs);
|
||||
return regs[reg] >> bit & 0x01;
|
||||
#else
|
||||
return 0;
|
||||
|
@ -8,7 +8,7 @@ LV_CXXFLAGS=@LV_CXXFLAGS@
|
||||
Name: volk_gnsssdr
|
||||
Description: VOLK_GNSSSDR: Vector Optimized Library of Kernels specific for GNSS-SDR
|
||||
Requires:
|
||||
Version: @VERSION@
|
||||
Version: @LIBVER@
|
||||
Libs: -L${libdir} -lvolk_gnsssdr
|
||||
Cflags: -I${includedir} ${LV_CXXFLAGS}
|
||||
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include <iostream>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <gnuradio/io_signature.h>
|
||||
#include <pmt/pmt.h>
|
||||
#include <glog/logging.h>
|
||||
#include "control_message_factory.h"
|
||||
#include "gnss_synchro.h"
|
||||
@ -78,6 +79,9 @@ gps_l1_ca_telemetry_decoder_cc::gps_l1_ca_telemetry_decoder_cc(
|
||||
gr::block("gps_navigation_cc", gr::io_signature::make(1, 1, sizeof(Gnss_Synchro)),
|
||||
gr::io_signature::make(1, 1, sizeof(Gnss_Synchro)))
|
||||
{
|
||||
// create asynchronous message ports
|
||||
this->message_port_register_out(pmt::mp("preamble_index"));
|
||||
|
||||
// initialize internal vars
|
||||
d_queue = queue;
|
||||
d_dump = dump;
|
||||
@ -178,6 +182,8 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
|
||||
const Gnss_Synchro **in = (const Gnss_Synchro **) &input_items[0]; //Get the input samples pointer
|
||||
|
||||
// TODO Optimize me!
|
||||
if (in[0][d_samples_per_bit*8 - 1].symbol_integration_enabled==false)
|
||||
{
|
||||
//******* preamble correlation ********
|
||||
for (unsigned int i = 0; i < d_samples_per_bit*8; i++)
|
||||
{
|
||||
@ -190,20 +196,37 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
|
||||
corr_value += d_preambles_symbols[i];
|
||||
}
|
||||
}
|
||||
}else{
|
||||
//******* preamble correlation ********
|
||||
for (unsigned int i = 0; i < d_samples_per_bit*8; i++)
|
||||
{
|
||||
if (in[0][i].Flag_valid_symbol_output==true)
|
||||
{
|
||||
if (in[0][i].Prompt_I < 0) // symbols clipping
|
||||
{
|
||||
corr_value -= d_preambles_symbols[i]*d_samples_per_bit;
|
||||
}
|
||||
else
|
||||
{
|
||||
corr_value += d_preambles_symbols[i]*d_samples_per_bit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
d_flag_preamble = false;
|
||||
|
||||
//******* frame sync ******************
|
||||
if (abs(corr_value) >= 160)
|
||||
if (abs(corr_value) == 160)
|
||||
{
|
||||
//TODO: Rewrite with state machine
|
||||
if (d_stat == 0)
|
||||
{
|
||||
d_GPS_FSM.Event_gps_word_preamble();
|
||||
d_preamble_index = d_sample_counter;//record the preamble sample stamp
|
||||
LOG(INFO) << "Preamble detection for SAT " << this->d_satellite;
|
||||
DLOG(INFO) << "Preamble detection for SAT " << this->d_satellite;
|
||||
d_symbol_accumulator = 0; //sync the symbol to bits integrator
|
||||
d_symbol_accumulator_counter = 0;
|
||||
d_frame_bit_index = 8;
|
||||
d_frame_bit_index = 7;
|
||||
d_stat = 1; // enter into frame pre-detection status
|
||||
}
|
||||
else if (d_stat == 1) //check 6 seconds of preamble separation
|
||||
@ -215,20 +238,24 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
|
||||
d_flag_preamble = true;
|
||||
d_preamble_index = d_sample_counter; //record the preamble sample stamp (t_P)
|
||||
d_preamble_time_seconds = in[0][0].Tracking_timestamp_secs;// - d_preamble_duration_seconds; //record the PRN start sample index associated to the preamble
|
||||
|
||||
d_frame_bit_index = 7;
|
||||
if (!d_flag_frame_sync)
|
||||
{
|
||||
//send asynchronous message to tracking to inform of frame sync and extend correlation time
|
||||
pmt::pmt_t value = pmt::from_long(d_preamble_index-1);
|
||||
this->message_port_pub(pmt::mp("preamble_index"),value);
|
||||
|
||||
d_flag_frame_sync = true;
|
||||
if (corr_value < 0)
|
||||
{
|
||||
flag_PLL_180_deg_phase_locked = true; //PLL is locked to opposite phase!
|
||||
LOG(INFO) << " PLL in opposite phase for Sat "<< this->d_satellite.get_PRN();
|
||||
DLOG(INFO) << " PLL in opposite phase for Sat "<< this->d_satellite.get_PRN();
|
||||
}
|
||||
else
|
||||
{
|
||||
flag_PLL_180_deg_phase_locked = false;
|
||||
}
|
||||
LOG(INFO) << " Frame sync SAT " << this->d_satellite << " with preamble start at " << d_preamble_time_seconds << " [s]";
|
||||
DLOG(INFO) << " Frame sync SAT " << this->d_satellite << " with preamble start at " << d_preamble_time_seconds << " [s]";
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -240,7 +267,7 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
|
||||
preamble_diff = d_sample_counter - d_preamble_index;
|
||||
if (preamble_diff > 6001)
|
||||
{
|
||||
LOG(INFO) << "Lost of frame sync SAT " << this->d_satellite << " preamble_diff= " << preamble_diff;
|
||||
DLOG(INFO) << "Lost of frame sync SAT " << this->d_satellite << " preamble_diff= " << preamble_diff;
|
||||
d_stat = 0; //lost of frame sync
|
||||
d_flag_frame_sync = false;
|
||||
flag_TOW_set = false;
|
||||
@ -249,16 +276,29 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
|
||||
}
|
||||
|
||||
//******* SYMBOL TO BIT *******
|
||||
d_symbol_accumulator += in[0][d_samples_per_bit*8 - 1].Prompt_I; // accumulate the input value in d_symbol_accumulator
|
||||
d_symbol_accumulator_counter++;
|
||||
if (d_symbol_accumulator_counter == 20)
|
||||
if (in[0][d_samples_per_bit*8 - 1].Flag_valid_symbol_output==true)
|
||||
{
|
||||
if (in[0][d_samples_per_bit*8 - 1].symbol_integration_enabled==true)
|
||||
{
|
||||
// extended correlation to bit period is enabled in tracking!
|
||||
// 1 symbol = 1 bit
|
||||
d_symbol_accumulator = in[0][d_samples_per_bit*8 - 1].Prompt_I; // accumulate the input value in d_symbol_accumulator
|
||||
d_symbol_accumulator_counter=20;
|
||||
}else{
|
||||
// 20 symbols = 1 bit: do symbols integration in telemetry decoder
|
||||
d_symbol_accumulator += in[0][d_samples_per_bit*8 - 1].Prompt_I; // accumulate the input value in d_symbol_accumulator
|
||||
d_symbol_accumulator_counter++;
|
||||
}
|
||||
}
|
||||
|
||||
if (d_symbol_accumulator_counter == 20 )
|
||||
{
|
||||
if (d_symbol_accumulator > 0)
|
||||
{ //symbol to bit
|
||||
d_GPS_frame_4bytes += 1; //insert the telemetry bit in LSB
|
||||
}
|
||||
d_symbol_accumulator = 0;
|
||||
d_symbol_accumulator_counter = 0;
|
||||
if (d_symbol_accumulator > 0)
|
||||
{ //symbol to bit
|
||||
d_GPS_frame_4bytes += 1; //insert the telemetry bit in LSB
|
||||
}
|
||||
d_symbol_accumulator = 0;
|
||||
d_symbol_accumulator_counter = 0;
|
||||
//******* bits to words ******
|
||||
d_frame_bit_index++;
|
||||
if (d_frame_bit_index == 30)
|
||||
@ -302,6 +342,8 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
|
||||
{
|
||||
d_GPS_frame_4bytes <<= 1; //shift 1 bit left the telemetry word
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
// output the frame
|
||||
consume_each(1); //one by one
|
||||
|
@ -34,7 +34,9 @@
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <boost/bind.hpp>
|
||||
#include <gnuradio/io_signature.h>
|
||||
#include <pmt/pmt.h>
|
||||
#include <volk/volk.h>
|
||||
#include <glog/logging.h>
|
||||
#include "gps_sdr_signal_processing.h"
|
||||
@ -82,6 +84,17 @@ void gps_l1_ca_dll_pll_c_aid_tracking_cc::forecast (int noutput_items,
|
||||
}
|
||||
}
|
||||
|
||||
void gps_l1_ca_dll_pll_c_aid_tracking_cc::msg_handler_preamble_index(pmt::pmt_t msg)
|
||||
{
|
||||
//pmt::print(msg);
|
||||
DLOG(INFO) << "Extended correlation for Tracking CH " << d_channel << ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN)<< std::endl;
|
||||
if (d_enable_20ms_integration==false) //avoid re-setting preamble indicator
|
||||
{
|
||||
d_preamble_index=pmt::to_long(msg);
|
||||
d_enable_20ms_integration=true;
|
||||
d_preamble_synchronized=false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
gps_l1_ca_dll_pll_c_aid_tracking_cc::gps_l1_ca_dll_pll_c_aid_tracking_cc(
|
||||
@ -97,6 +110,13 @@ gps_l1_ca_dll_pll_c_aid_tracking_cc::gps_l1_ca_dll_pll_c_aid_tracking_cc(
|
||||
gr::block("gps_l1_ca_dll_pll_c_aid_tracking_cc", gr::io_signature::make(1, 1, sizeof(gr_complex)),
|
||||
gr::io_signature::make(1, 1, sizeof(Gnss_Synchro)))
|
||||
{
|
||||
// create asynchronous message ports
|
||||
this->message_port_register_in(pmt::mp("preamble_index"));
|
||||
|
||||
this->set_msg_handler(pmt::mp("preamble_index"),
|
||||
boost::bind(&gps_l1_ca_dll_pll_c_aid_tracking_cc::msg_handler_preamble_index, this, _1));
|
||||
|
||||
|
||||
// initialize internal vars
|
||||
d_queue = queue;
|
||||
d_dump = dump;
|
||||
@ -107,8 +127,10 @@ gps_l1_ca_dll_pll_c_aid_tracking_cc::gps_l1_ca_dll_pll_c_aid_tracking_cc(
|
||||
d_correlation_length_samples = static_cast<int>(d_vector_length);
|
||||
|
||||
// Initialize tracking ==========================================
|
||||
d_code_loop_filter.set_DLL_BW(dll_bw_hz);
|
||||
d_carrier_loop_filter.set_params(10.0, pll_bw_hz,2);
|
||||
d_pll_bw_hz=pll_bw_hz;
|
||||
d_dll_bw_hz=dll_bw_hz;
|
||||
d_code_loop_filter.set_DLL_BW(d_dll_bw_hz);
|
||||
d_carrier_loop_filter.set_params(10.0, d_pll_bw_hz,2);
|
||||
|
||||
//--- DLL variables --------------------------------------------------------
|
||||
d_early_late_spc_chips = early_late_space_chips; // Define early-late offset (in chips)
|
||||
@ -141,7 +163,9 @@ gps_l1_ca_dll_pll_c_aid_tracking_cc::gps_l1_ca_dll_pll_c_aid_tracking_cc(
|
||||
d_rem_carrier_phase_rad = 0.0;
|
||||
|
||||
// sample synchronization
|
||||
d_sample_counter = 0;
|
||||
d_sample_counter = 0; //(from trk to tlm)
|
||||
// symbol synchronization (from tlm to trk)
|
||||
d_symbol_counter =0;
|
||||
//d_sample_counter_seconds = 0;
|
||||
d_acq_sample_stamp = 0;
|
||||
|
||||
@ -175,6 +199,8 @@ gps_l1_ca_dll_pll_c_aid_tracking_cc::gps_l1_ca_dll_pll_c_aid_tracking_cc(
|
||||
d_rem_code_phase_chips = 0.0;
|
||||
d_code_phase_step_chips = 0.0;
|
||||
d_carrier_phase_step_rad = 0.0;
|
||||
d_enable_20ms_integration=false;
|
||||
d_preamble_synchronized=false;
|
||||
//set_min_output_buffer((long int)300);
|
||||
}
|
||||
|
||||
@ -258,7 +284,8 @@ void gps_l1_ca_dll_pll_c_aid_tracking_cc::start_tracking()
|
||||
// enable tracking
|
||||
d_pull_in = true;
|
||||
d_enable_tracking = true;
|
||||
|
||||
d_enable_20ms_integration=false;
|
||||
d_preamble_synchronized=false;
|
||||
LOG(INFO) << "PULL-IN Doppler [Hz]=" << d_carrier_doppler_hz
|
||||
<< " Code Phase correction [samples]=" << delay_correction_samples
|
||||
<< " PULL-IN Code Phase [samples]=" << d_acq_code_phase_samples;
|
||||
@ -315,6 +342,7 @@ int gps_l1_ca_dll_pll_c_aid_tracking_cc::general_work (int noutput_items, gr_vec
|
||||
current_synchro_data = *d_acquisition_gnss_synchro;
|
||||
*out[0] = current_synchro_data;
|
||||
consume_each(samples_offset); //shift input to perform alignment with local replica
|
||||
d_symbol_counter++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -326,115 +354,203 @@ int gps_l1_ca_dll_pll_c_aid_tracking_cc::general_work (int noutput_items, gr_vec
|
||||
multicorrelator_cpu.set_input_output_vectors(d_correlator_outs,in);
|
||||
multicorrelator_cpu.Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad, d_carrier_phase_step_rad, d_rem_code_phase_chips, d_code_phase_step_chips, d_correlation_length_samples);
|
||||
|
||||
// UPDATE INTEGRATION TIME
|
||||
CURRENT_INTEGRATION_TIME_S = static_cast<double>(d_correlation_length_samples) / static_cast<double>(d_fs_in);
|
||||
// ####### 20ms coherent intergration extension (experimental)
|
||||
// keep the last 40 symbols (2 bits to detect transitions)
|
||||
d_E_history.push_back(d_correlator_outs[0]); // save early output
|
||||
d_P_history.push_back(d_correlator_outs[1]); // save prompt output
|
||||
d_L_history.push_back(d_correlator_outs[2]); // save late output
|
||||
|
||||
// ################## PLL ##########################################################
|
||||
// Update PLL discriminator [rads/Ti -> Secs/Ti]
|
||||
carr_phase_error_secs_Ti = pll_cloop_two_quadrant_atan(d_correlator_outs[1]) / GPS_TWO_PI; //prompt output
|
||||
// Carrier discriminator filter
|
||||
// NOTICE: The carrier loop filter includes the Carrier Doppler accumulator, as described in Kaplan
|
||||
//d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_phase_error_filt_secs_ti/INTEGRATION_TIME;
|
||||
// Input [s/Ti] -> output [Hz]
|
||||
d_carrier_doppler_hz = d_carrier_loop_filter.get_carrier_error(0.0, carr_phase_error_secs_Ti, CURRENT_INTEGRATION_TIME_S);
|
||||
// PLL to DLL assistance [Secs/Ti]
|
||||
d_pll_to_dll_assist_secs_Ti = (d_carrier_doppler_hz * CURRENT_INTEGRATION_TIME_S) / GPS_L1_FREQ_HZ;
|
||||
// code Doppler frequency update
|
||||
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ);
|
||||
if (d_P_history.size()>GPS_CA_TELEMETRY_SYMBOLS_PER_BIT)
|
||||
{
|
||||
d_E_history.pop_front();
|
||||
d_P_history.pop_front();
|
||||
d_L_history.pop_front();
|
||||
}
|
||||
|
||||
// ################## DLL ##########################################################
|
||||
// DLL discriminator
|
||||
code_error_chips_Ti = dll_nc_e_minus_l_normalized(d_correlator_outs[0], d_correlator_outs[2]); //[chips/Ti] //early and late
|
||||
// Code discriminator filter
|
||||
code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips_Ti); //input [chips/Ti] -> output [chips/second]
|
||||
code_error_filt_secs_Ti = code_error_filt_chips*CURRENT_INTEGRATION_TIME_S/d_code_freq_chips; // [s/Ti]
|
||||
// DLL code error estimation [s/Ti]
|
||||
// TODO: PLL carrier aid to DLL is disabled. Re-enable it and measure performance
|
||||
dll_code_error_secs_Ti = - code_error_filt_secs_Ti + d_pll_to_dll_assist_secs_Ti;
|
||||
bool enable_dll_pll;
|
||||
if (d_enable_20ms_integration==true)
|
||||
{
|
||||
long int symbol_diff=d_symbol_counter-d_preamble_index;
|
||||
if (symbol_diff % GPS_CA_TELEMETRY_SYMBOLS_PER_BIT == 0)
|
||||
{
|
||||
// compute coherent integration and enable tracking loop
|
||||
// perform coherent integration using correlator output history
|
||||
//gr_complex d_correlator_outs_2[3];
|
||||
//std::cout<<"##### RESET COHERENT INTEGRATION ####"<<std::endl;
|
||||
d_correlator_outs[0]=gr_complex(0.0,0.0);
|
||||
d_correlator_outs[1]=gr_complex(0.0,0.0);
|
||||
d_correlator_outs[2]=gr_complex(0.0,0.0);
|
||||
for (int n=0;n<GPS_CA_TELEMETRY_SYMBOLS_PER_BIT;n++)
|
||||
{
|
||||
d_correlator_outs[0]+=d_E_history.at(n);
|
||||
d_correlator_outs[1]+=d_P_history.at(n);
|
||||
d_correlator_outs[2]+=d_L_history.at(n);
|
||||
}
|
||||
|
||||
// ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT #######################
|
||||
// keep alignment parameters for the next input buffer
|
||||
double T_chip_seconds;
|
||||
double T_prn_seconds;
|
||||
double T_prn_samples;
|
||||
double K_blk_samples;
|
||||
// Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation
|
||||
T_chip_seconds = 1 / d_code_freq_chips;
|
||||
T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
|
||||
T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
|
||||
K_blk_samples = T_prn_samples + d_rem_code_phase_samples - dll_code_error_secs_Ti * static_cast<double>(d_fs_in);
|
||||
if (d_preamble_synchronized==false)
|
||||
{
|
||||
d_preamble_synchronized=true;
|
||||
}
|
||||
current_synchro_data.symbol_integration_enabled=true;
|
||||
// UPDATE INTEGRATION TIME
|
||||
CURRENT_INTEGRATION_TIME_S = static_cast<double>(GPS_CA_TELEMETRY_SYMBOLS_PER_BIT)*GPS_L1_CA_CODE_PERIOD;
|
||||
d_code_loop_filter.set_DLL_BW(d_dll_bw_hz);
|
||||
d_carrier_loop_filter.set_params(10.0, d_pll_bw_hz/5,2);
|
||||
enable_dll_pll=true;
|
||||
|
||||
d_correlation_length_samples = round(K_blk_samples); //round to a discrete samples
|
||||
old_d_rem_code_phase_samples=d_rem_code_phase_samples;
|
||||
d_rem_code_phase_samples = K_blk_samples - static_cast<double>(d_correlation_length_samples); //rounding error < 1 sample
|
||||
}else{
|
||||
current_synchro_data.symbol_integration_enabled=false;
|
||||
if(d_preamble_synchronized==true)
|
||||
{
|
||||
// continue extended coherent correlation
|
||||
d_correlation_length_samples=d_correlation_length_samples-d_rem_code_phase_integer_samples;
|
||||
d_rem_code_phase_integer_samples=0;
|
||||
d_rem_carrier_phase_rad = fmod(d_rem_carrier_phase_rad + d_carrier_phase_step_rad * d_correlation_length_samples, GPS_TWO_PI);
|
||||
d_rem_code_phase_chips = fmod(d_rem_code_phase_chips + d_code_phase_step_chips*d_correlation_length_samples,GPS_L1_CA_CODE_LENGTH_CHIPS);
|
||||
// disable tracking loop and inform telemetry decoder
|
||||
enable_dll_pll=false;
|
||||
}else{
|
||||
// perform basic (1ms) correlation
|
||||
// UPDATE INTEGRATION TIME
|
||||
CURRENT_INTEGRATION_TIME_S = static_cast<double>(d_correlation_length_samples) / static_cast<double>(d_fs_in);
|
||||
enable_dll_pll=true;
|
||||
}
|
||||
}
|
||||
}else{
|
||||
current_synchro_data.symbol_integration_enabled=false;
|
||||
// UPDATE INTEGRATION TIME
|
||||
CURRENT_INTEGRATION_TIME_S = static_cast<double>(d_correlation_length_samples) / static_cast<double>(d_fs_in);
|
||||
enable_dll_pll=true;
|
||||
}
|
||||
|
||||
// UPDATE REMNANT CARRIER PHASE
|
||||
CORRECTED_INTEGRATION_TIME_S=(static_cast<double>(d_correlation_length_samples)/static_cast<double>(d_fs_in));
|
||||
//remnant carrier phase [rad]
|
||||
d_rem_carrier_phase_rad = fmod(d_rem_carrier_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S, GPS_TWO_PI);
|
||||
// UPDATE CARRIER PHASE ACCUULATOR
|
||||
//carrier phase accumulator prior to update the PLL estimators (accumulated carrier in this loop depends on the old estimations!)
|
||||
d_acc_carrier_phase_cycles -= d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S;
|
||||
// ###### end 20ms correlation extension
|
||||
|
||||
//################### PLL COMMANDS #################################################
|
||||
//carrier phase step (NCO phase increment per sample) [rads/sample]
|
||||
d_carrier_phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in);
|
||||
if (enable_dll_pll==true)
|
||||
{
|
||||
// ################## PLL ##########################################################
|
||||
// Update PLL discriminator [rads/Ti -> Secs/Ti]
|
||||
carr_phase_error_secs_Ti = pll_cloop_two_quadrant_atan(d_correlator_outs[1]) / GPS_TWO_PI; //prompt output
|
||||
// Carrier discriminator filter
|
||||
// NOTICE: The carrier loop filter includes the Carrier Doppler accumulator, as described in Kaplan
|
||||
//d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_phase_error_filt_secs_ti/INTEGRATION_TIME;
|
||||
// Input [s/Ti] -> output [Hz]
|
||||
d_carrier_doppler_hz = d_carrier_loop_filter.get_carrier_error(0.0, carr_phase_error_secs_Ti, CURRENT_INTEGRATION_TIME_S);
|
||||
// PLL to DLL assistance [Secs/Ti]
|
||||
d_pll_to_dll_assist_secs_Ti = (d_carrier_doppler_hz * CURRENT_INTEGRATION_TIME_S) / GPS_L1_FREQ_HZ;
|
||||
// code Doppler frequency update
|
||||
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ);
|
||||
|
||||
//################### DLL COMMANDS #################################################
|
||||
//code phase step (Code resampler phase increment per sample) [chips/sample]
|
||||
d_code_phase_step_chips = d_code_freq_chips / static_cast<double>(d_fs_in);
|
||||
//remnant code phase [chips]
|
||||
d_rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / static_cast<double>(d_fs_in));
|
||||
// ################## DLL ##########################################################
|
||||
// DLL discriminator
|
||||
code_error_chips_Ti = dll_nc_e_minus_l_normalized(d_correlator_outs[0], d_correlator_outs[2]); //[chips/Ti] //early and late
|
||||
// Code discriminator filter
|
||||
code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips_Ti); //input [chips/Ti] -> output [chips/second]
|
||||
code_error_filt_secs_Ti = code_error_filt_chips*CURRENT_INTEGRATION_TIME_S/d_code_freq_chips; // [s/Ti]
|
||||
// DLL code error estimation [s/Ti]
|
||||
dll_code_error_secs_Ti = - code_error_filt_secs_Ti + d_pll_to_dll_assist_secs_Ti;
|
||||
|
||||
// ####### CN0 ESTIMATION AND LOCK DETECTORS #######################################
|
||||
if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
|
||||
{
|
||||
// fill buffer with prompt correlator output values
|
||||
d_Prompt_buffer[d_cn0_estimation_counter] = d_correlator_outs[1]; //prompt
|
||||
d_cn0_estimation_counter++;
|
||||
}
|
||||
else
|
||||
{
|
||||
d_cn0_estimation_counter = 0;
|
||||
// Code lock indicator
|
||||
d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
|
||||
// Carrier lock indicator
|
||||
d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
|
||||
// Loss of lock detection
|
||||
if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
|
||||
{
|
||||
d_carrier_lock_fail_counter++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
|
||||
}
|
||||
if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
|
||||
{
|
||||
std::cout << "Loss of lock in channel " << d_channel << "!" << std::endl;
|
||||
LOG(INFO) << "Loss of lock in channel " << d_channel << "!";
|
||||
std::unique_ptr<ControlMessageFactory> cmf(new ControlMessageFactory());
|
||||
if (d_queue != gr::msg_queue::sptr())
|
||||
{
|
||||
d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
|
||||
}
|
||||
d_carrier_lock_fail_counter = 0;
|
||||
d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
|
||||
}
|
||||
}
|
||||
// ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT #######################
|
||||
|
||||
// ########### Output the tracking data to navigation and PVT ##########
|
||||
current_synchro_data.Prompt_I = static_cast<double>((d_correlator_outs[1]).real());
|
||||
current_synchro_data.Prompt_Q = static_cast<double>((d_correlator_outs[1]).imag());
|
||||
// Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!)
|
||||
current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + old_d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
|
||||
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
|
||||
current_synchro_data.Code_phase_secs = 0;
|
||||
current_synchro_data.Carrier_phase_rads = GPS_TWO_PI * d_acc_carrier_phase_cycles;
|
||||
current_synchro_data.Carrier_Doppler_hz = d_carrier_doppler_hz;
|
||||
current_synchro_data.CN0_dB_hz = d_CN0_SNV_dB_Hz;
|
||||
current_synchro_data.Flag_valid_pseudorange = false;
|
||||
*out[0] = current_synchro_data;
|
||||
// keep alignment parameters for the next input buffer
|
||||
double T_chip_seconds;
|
||||
double T_prn_seconds;
|
||||
double T_prn_samples;
|
||||
double K_prn_samples;
|
||||
// Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation
|
||||
T_chip_seconds = 1 / d_code_freq_chips;
|
||||
T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
|
||||
T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
|
||||
K_prn_samples = round(T_prn_samples);
|
||||
double K_T_prn_error_samples=K_prn_samples-T_prn_samples;
|
||||
|
||||
old_d_rem_code_phase_samples=d_rem_code_phase_samples;
|
||||
d_rem_code_phase_samples= d_rem_code_phase_samples - K_T_prn_error_samples -dll_code_error_secs_Ti * static_cast<double>(d_fs_in);
|
||||
d_rem_code_phase_integer_samples=round(d_rem_code_phase_samples);
|
||||
d_correlation_length_samples = K_prn_samples + d_rem_code_phase_integer_samples; //round to a discrete samples
|
||||
d_rem_code_phase_samples=d_rem_code_phase_samples-d_rem_code_phase_integer_samples;
|
||||
|
||||
// UPDATE REMNANT CARRIER PHASE
|
||||
CORRECTED_INTEGRATION_TIME_S=(static_cast<double>(d_correlation_length_samples)/static_cast<double>(d_fs_in));
|
||||
//remnant carrier phase [rad]
|
||||
d_rem_carrier_phase_rad = fmod(d_rem_carrier_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S, GPS_TWO_PI);
|
||||
// UPDATE CARRIER PHASE ACCUULATOR
|
||||
//carrier phase accumulator prior to update the PLL estimators (accumulated carrier in this loop depends on the old estimations!)
|
||||
d_acc_carrier_phase_cycles -= d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S;
|
||||
|
||||
//################### PLL COMMANDS #################################################
|
||||
//carrier phase step (NCO phase increment per sample) [rads/sample]
|
||||
d_carrier_phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in);
|
||||
|
||||
//################### DLL COMMANDS #################################################
|
||||
//code phase step (Code resampler phase increment per sample) [chips/sample]
|
||||
d_code_phase_step_chips = d_code_freq_chips / static_cast<double>(d_fs_in);
|
||||
//remnant code phase [chips]
|
||||
d_rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / static_cast<double>(d_fs_in));
|
||||
|
||||
// ####### CN0 ESTIMATION AND LOCK DETECTORS #######################################
|
||||
if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
|
||||
{
|
||||
// fill buffer with prompt correlator output values
|
||||
d_Prompt_buffer[d_cn0_estimation_counter] = d_correlator_outs[1]; //prompt
|
||||
d_cn0_estimation_counter++;
|
||||
}
|
||||
else
|
||||
{
|
||||
d_cn0_estimation_counter = 0;
|
||||
// Code lock indicator
|
||||
d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
|
||||
// Carrier lock indicator
|
||||
d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
|
||||
// Loss of lock detection
|
||||
if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
|
||||
{
|
||||
d_carrier_lock_fail_counter++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
|
||||
}
|
||||
if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
|
||||
{
|
||||
std::cout << "Loss of lock in channel " << d_channel << "!" << std::endl;
|
||||
LOG(INFO) << "Loss of lock in channel " << d_channel << "!";
|
||||
std::unique_ptr<ControlMessageFactory> cmf(new ControlMessageFactory());
|
||||
if (d_queue != gr::msg_queue::sptr())
|
||||
{
|
||||
d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
|
||||
}
|
||||
d_carrier_lock_fail_counter = 0;
|
||||
d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
|
||||
}
|
||||
}
|
||||
// ########### Output the tracking data to navigation and PVT ##########
|
||||
current_synchro_data.Prompt_I = static_cast<double>((d_correlator_outs[1]).real());
|
||||
current_synchro_data.Prompt_Q = static_cast<double>((d_correlator_outs[1]).imag());
|
||||
// Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!)
|
||||
current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + old_d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
|
||||
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
|
||||
current_synchro_data.Code_phase_secs = 0;
|
||||
current_synchro_data.Carrier_phase_rads = GPS_TWO_PI * d_acc_carrier_phase_cycles;
|
||||
current_synchro_data.Carrier_Doppler_hz = d_carrier_doppler_hz;
|
||||
current_synchro_data.CN0_dB_hz = d_CN0_SNV_dB_Hz;
|
||||
current_synchro_data.Flag_valid_pseudorange = false;
|
||||
current_synchro_data.Flag_valid_symbol_output = true;
|
||||
*out[0] = current_synchro_data;
|
||||
}else{
|
||||
//todo: fill synchronization data to produce output while coherent integration is running
|
||||
current_synchro_data.Flag_valid_symbol_output = false;
|
||||
current_synchro_data.Prompt_I = static_cast<double>((d_correlator_outs[1]).real());
|
||||
current_synchro_data.Prompt_Q = static_cast<double>((d_correlator_outs[1]).imag());
|
||||
// Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!)
|
||||
current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
|
||||
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
|
||||
current_synchro_data.Code_phase_secs = 0;
|
||||
current_synchro_data.Carrier_phase_rads = GPS_TWO_PI * d_acc_carrier_phase_cycles; // todo: project the acc carrier phase
|
||||
current_synchro_data.Carrier_Doppler_hz = d_carrier_doppler_hz;// todo: project the carrier doppler
|
||||
current_synchro_data.CN0_dB_hz = d_CN0_SNV_dB_Hz;
|
||||
current_synchro_data.Flag_valid_pseudorange = false;
|
||||
*out[0] = current_synchro_data;
|
||||
}
|
||||
|
||||
// ########## DEBUG OUTPUT
|
||||
/*!
|
||||
@ -553,6 +669,7 @@ int gps_l1_ca_dll_pll_c_aid_tracking_cc::general_work (int noutput_items, gr_vec
|
||||
{
|
||||
LOG(WARNING) << "noutput_items = 0";
|
||||
}
|
||||
d_symbol_counter++;
|
||||
return 1; //output tracking result ALWAYS even in the case of d_enable_tracking==false
|
||||
}
|
||||
|
||||
|
@ -39,9 +39,11 @@
|
||||
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <deque>
|
||||
#include <string>
|
||||
#include <gnuradio/block.h>
|
||||
#include <gnuradio/msg_queue.h>
|
||||
#include <pmt/pmt.h>
|
||||
#include "concurrent_queue.h"
|
||||
#include "gnss_synchro.h"
|
||||
#include "tracking_2nd_DLL_filter.h"
|
||||
@ -130,6 +132,7 @@ private:
|
||||
double d_rem_code_phase_samples;
|
||||
double d_rem_code_phase_chips;
|
||||
double d_rem_carrier_phase_rad;
|
||||
int d_rem_code_phase_integer_samples;
|
||||
|
||||
// PLL and DLL filter library
|
||||
Tracking_2nd_DLL_filter d_code_loop_filter;
|
||||
@ -140,6 +143,8 @@ private:
|
||||
double d_acq_carrier_doppler_hz;
|
||||
|
||||
// tracking vars
|
||||
float d_dll_bw_hz;
|
||||
float d_pll_bw_hz;
|
||||
double d_code_freq_chips;
|
||||
double d_code_phase_step_chips;
|
||||
double d_carrier_doppler_hz;
|
||||
@ -148,6 +153,17 @@ private:
|
||||
double d_code_phase_samples;
|
||||
double d_pll_to_dll_assist_secs_Ti;
|
||||
|
||||
// symbol history to detect bit transition
|
||||
std::deque<gr_complex> d_E_history;
|
||||
std::deque<gr_complex> d_P_history;
|
||||
std::deque<gr_complex> d_L_history;
|
||||
long int d_preamble_index;
|
||||
long int d_symbol_counter;
|
||||
bool d_enable_20ms_integration;
|
||||
bool d_preamble_synchronized;
|
||||
int d_correlation_symbol_counter;
|
||||
void msg_handler_preamble_index(pmt::pmt_t msg);
|
||||
|
||||
//Integration period in samples
|
||||
int d_correlation_length_samples;
|
||||
|
||||
|
@ -390,12 +390,12 @@ void GNSSFlowgraph::wait()
|
||||
*/
|
||||
void GNSSFlowgraph::apply_action(unsigned int who, unsigned int what)
|
||||
{
|
||||
DLOG(INFO) << "received " << what << " from " << who;
|
||||
DLOG(INFO) << "received " << what << " from " << who;
|
||||
|
||||
switch (what)
|
||||
{
|
||||
case 0:
|
||||
LOG(INFO) << "Channel " << who << " ACQ FAILED satellite " << channels_.at(who)->get_signal().get_satellite() << ", Signal " << channels_.at(who)->get_signal().get_signal_str();
|
||||
DLOG(INFO) << "Channel " << who << " ACQ FAILED satellite " << channels_.at(who)->get_signal().get_satellite() << ", Signal " << channels_.at(who)->get_signal().get_signal_str();
|
||||
available_GNSS_signals_.push_back(channels_.at(who)->get_signal());
|
||||
|
||||
//TODO: Optimize the channel and signal matching!
|
||||
@ -403,9 +403,11 @@ void GNSSFlowgraph::apply_action(unsigned int who, unsigned int what)
|
||||
{
|
||||
available_GNSS_signals_.push_back(available_GNSS_signals_.front());
|
||||
available_GNSS_signals_.pop_front();
|
||||
std::cout << "loop"<<std::endl;
|
||||
}
|
||||
channels_.at(who)->set_signal(available_GNSS_signals_.front());
|
||||
available_GNSS_signals_.pop_front();
|
||||
|
||||
//todo: This is a provisional bug fix to avoid random channel state machine deadlock caused by an incorrect sequence of events
|
||||
// Correct sequence: start_acquisition() is triggered after the negative acquisition driven by the process_channel_messages() thread inside channel class
|
||||
// Incorrect sequence: due to thread concurrency, some times start_acquisition is triggered BEFORE the last negative_acquisition notification, thus producing a deadlock
|
||||
@ -417,7 +419,7 @@ void GNSSFlowgraph::apply_action(unsigned int who, unsigned int what)
|
||||
// TODO: Tracking messages
|
||||
|
||||
case 1:
|
||||
LOG(INFO) << "Channel " << who << " ACQ SUCCESS satellite " << channels_.at(who)->get_signal().get_satellite();
|
||||
DLOG(INFO) << "Channel " << who << " ACQ SUCCESS satellite " << channels_.at(who)->get_signal().get_satellite();
|
||||
channels_state_[who] = 2;
|
||||
acq_channels_count_--;
|
||||
if (!available_GNSS_signals_.empty() && acq_channels_count_ < max_acq_channels_)
|
||||
@ -445,7 +447,7 @@ void GNSSFlowgraph::apply_action(unsigned int who, unsigned int what)
|
||||
break;
|
||||
|
||||
case 2:
|
||||
LOG(INFO) << "Channel " << who << " TRK FAILED satellite " << channels_.at(who)->get_signal().get_satellite();
|
||||
DLOG(INFO) << "Channel " << who << " TRK FAILED satellite " << channels_.at(who)->get_signal().get_satellite();
|
||||
if (acq_channels_count_ < max_acq_channels_)
|
||||
{
|
||||
channels_state_[who] = 1;
|
||||
@ -468,7 +470,7 @@ void GNSSFlowgraph::apply_action(unsigned int who, unsigned int what)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
DLOG(INFO) << "Number of available signals: " << available_GNSS_signals_.size();
|
||||
DLOG(INFO) << "Number of available signals: " << available_GNSS_signals_.size();
|
||||
}
|
||||
|
||||
|
||||
|
@ -75,6 +75,7 @@ const int GPS_L1_CA_HISTORY_DEEP = 100;
|
||||
#define GPS_PREAMBLE {1, 0, 0, 0, 1, 0, 1, 1}
|
||||
const int GPS_CA_PREAMBLE_LENGTH_BITS = 8;
|
||||
const int GPS_CA_TELEMETRY_RATE_BITS_SECOND = 50; //!< NAV message bit rate [bits/s]
|
||||
const int GPS_CA_TELEMETRY_SYMBOLS_PER_BIT = 20;
|
||||
const int GPS_CA_TELEMETRY_RATE_SYMBOLS_SECOND = GPS_CA_TELEMETRY_RATE_BITS_SECOND*20; //!< NAV message bit rate [symbols/s]
|
||||
const int GPS_WORD_LENGTH = 4; //!< CRC + GPS WORD (-2 -1 0 ... 29) Bits = 4 bytes
|
||||
const int GPS_SUBFRAME_LENGTH = 40; //!< GPS_WORD_LENGTH x 10 = 40 bytes
|
||||
|
@ -60,6 +60,8 @@ public:
|
||||
double Code_phase_secs; //!< Set by Tracking processing block
|
||||
double Tracking_timestamp_secs; //!< Set by Tracking processing block
|
||||
bool Flag_valid_tracking;
|
||||
bool Flag_valid_symbol_output;
|
||||
bool symbol_integration_enabled; //!< Set by Tracking processing block
|
||||
|
||||
//Telemetry Decoder
|
||||
double Prn_timestamp_ms; //!< Set by Telemetry Decoder processing block
|
||||
|
Loading…
Reference in New Issue
Block a user