1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-12-14 20:20:35 +00:00

Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next

This commit is contained in:
Carles Fernandez 2016-03-15 09:35:59 +01:00
commit 0507618736
48 changed files with 1879 additions and 1385 deletions

View File

@ -19,7 +19,7 @@ ControlThread.wait_for_flowgraph=false
SignalSource.implementation=Nsr_File_Signal_Source
;#filename: path to file with the captured GNSS signal samples to be processed
SignalSource.filename=/datalogger/signals/ifen/E1L1_FE0_Band0.stream
SignalSource.filename=/media/javier/SISTEMA/signals/ifen/E1L1_FE0_Band0.stream
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
SignalSource.item_type=byte
@ -150,7 +150,7 @@ Resampler.implementation=Pass_Through
;#count: Number of available GPS satellite channels.
Channels_1C.count=8
;#count: Number of available Galileo satellite channels.
Channels_1B.count=8
Channels_1B.count=0
;#in_acquisition: Number of channels simultaneously acquiring for the whole receiver
Channels.in_acquisition=1
@ -193,14 +193,15 @@ Acquisition_1C.if=0
Acquisition_1C.sampled_ms=1
;#implementation: Acquisition algorithm selection for this channel: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition]
Acquisition_1C.implementation=GPS_L1_CA_PCPS_Acquisition
Acquisition_1C.use_CFAR_algorithm=false;
;#threshold: Acquisition threshold
Acquisition_1C.threshold=0.0075
Acquisition_1C.threshold=40
;#pfa: Acquisition false alarm probability. This option overrides the threshold option. Only use with implementations: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition]
;Acquisition_1C.pfa=0.01
;#doppler_max: Maximum expected Doppler shift [Hz]
Acquisition_1C.doppler_max=10000
;#doppler_max: Doppler step in the grid search [Hz]
Acquisition_1C.doppler_step=500
Acquisition_1C.doppler_step=250
;######### GALILEO ACQUISITION CONFIG ############
@ -229,7 +230,7 @@ Acquisition_1B.doppler_step=125
;######### TRACKING GPS CONFIG ############
;#implementation: Selected tracking algorithm: [GPS_L1_CA_DLL_PLL_Tracking] or [GPS_L1_CA_DLL_FLL_PLL_Tracking] or [GPS_L1_CA_TCP_CONNECTOR_Tracking] or [Galileo_E1_DLL_PLL_VEML_Tracking]
Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_Tracking
Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_C_Aid_Tracking
;#item_type: Type and resolution for each of the signal samples. Use only [gr_complex] in this version.
Tracking_1C.item_type=gr_complex
@ -237,19 +238,19 @@ Tracking_1C.item_type=gr_complex
Tracking_1C.if=0
;#dump: Enable or disable the Tracking internal binary data file logging [true] or [false]
Tracking_1C.dump=false
Tracking_1C.dump=true
;#dump_filename: Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
Tracking_1C.dump_filename=../data/epl_tracking_ch_
;#pll_bw_hz: PLL loop filter bandwidth [Hz]
Tracking_1C.pll_bw_hz=45.0;
Tracking_1C.pll_bw_hz=40;
;#dll_bw_hz: DLL loop filter bandwidth [Hz]
Tracking_1C.dll_bw_hz=2.0;
Tracking_1C.dll_bw_hz=2.5;
;#fll_bw_hz: FLL loop filter bandwidth [Hz]
Tracking_1C.fll_bw_hz=10.0;
Tracking_1C.fll_bw_hz=2.0;
;#order: PLL/DLL loop filter order [2] or [3]
Tracking_1C.order=3;

View File

@ -634,6 +634,7 @@ EXCLUDE = @top_srcdir@/docs/html \
@top_srcdir@/cmake \
@top_srcdir@/data \
@top_srcdir@/src/core/libs/supl \
@top_srcdir@/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr \
# The EXCLUDE_SYMLINKS tag can be used select whether or not files or

View File

@ -125,6 +125,10 @@ void Channel::connect(gr::top_block_sptr top_block)
DLOG(INFO) << "pass_through_ -> tracking";
top_block->connect(trk_->get_right_block(), 0, nav_->get_left_block(), 0);
DLOG(INFO) << "tracking -> telemetry_decoder";
top_block->msg_connect(nav_->get_left_block(),pmt::mp("preamble_index"),trk_->get_right_block(),pmt::mp("preamble_index"));
DLOG(INFO) << "MSG FEEDBACK CHANNEL telemetry_decoder -> tracking";
connected_ = true;
}

View File

@ -42,6 +42,6 @@ From now on, GNSS-SDR (and any other program of your own that makes use of VOLK_
___
VOLK_GNSSSDR was originally created by Andres Cecilia Luque in the framework of the [Summer Of Code In Space (SOCIS 2014)](http://sophia.estec.esa.int/socis2014/?q=about "SOCIS 2014 webpage") program organized by the European Space Agency, and evolved since then by other authors (see each file header for main authorship). This software is released under the GNU General Public License version 3, see the file COPYING.
VOLK_GNSSSDR was originally created by Andres Cecilia Luque in the framework of the [Summer Of Code In Space (SOCIS 2014)](http://sophia.estec.esa.int/socis2014/?q=about "SOCIS 2014 webpage") program organized by the European Space Agency, and then evolved and maintained by Carles Fernandez-Prades and Javier Arribas. This software is released under the GNU General Public License version 3, see the file COPYING.
This project is managed by [Centre Tecnologic de Telecomunicacions de Catalunya](http://www.cttc.es "CTTC webpage").

View File

@ -112,7 +112,7 @@ int main(int argc, char *argv[]) {
/** --help option */
if ( vm.count("help") ) {
std::cout << "The VOLK profiler." << std::endl
std::cout << "The VOLK_GNSSSDR profiler." << std::endl
<< desc << std::endl;
return 0;
}

View File

@ -166,7 +166,7 @@ function(VOLK_ADD_TEST test_name)
#functionality will be removed in the future. Leave it here for
#now until someone can figure out how to do this in Windows.
foreach(target ${test_name} ${VOLK_TEST_TARGET_DEPS})
get_target_property(location ${target} LOCATION)
get_target_property(location "${target}" LOCATION)
if(location)
get_filename_component(path ${location} PATH)
string(REGEX REPLACE "\\$\\(.*\\)" ${CMAKE_BUILD_TYPE} path ${path})

View File

@ -0,0 +1,92 @@
/*! \page extending_volk Extending VOLK
There are two primary routes for extending VOLK for your own use. The
preferred route is by writing kernels and proto-kernels as part of this
repository and sending patches upstream. The alternative is creating your
own VOLK module, as it is the case of VOLK_GNSSSDR ;-). There is a good reason
for that: to provide GNSS-SDR users with adequate protokernels as soon as possible,
without needing to upgrade to the latest VOLK version to enjoy the benefits.
Notwithstanding, some of VOLK_GNSSSDR can be integrated into VOLK in the future,
if other users find them useful.
## Modifying this repository
### Adding kernels
Adding kernels refers to introducing a new function to the VOLK_GNSSSDR API that is
presumably a useful math function/operation. The first step is to create
the file in volk_gnsssdr/kernels/volk_gnsssdr. Follow the naming scheme provided in the
VOLK_GNSSSDR terms and techniques page. First create the generic protokernel.
The generic protokernel should be written in plain C using explicitly sized
types from stdint.h or volk_gnsssdr_complex.h when appropriate. volk_gnsssdr_complex.h
includes explicitly sized complex types for floats and ints. The name of
the generic kernel should be volk_gnsssdr_signature_from_file_generic. If multiple
versions of the generic kernel exist then a description can be appended to
generic_, but it is not required to use alignment flags in the generic
protokernel name. It is required to surround the entire generic function
with preprocessor ifdef fences on the symbol LV_HAVE_GENERIC.
Finally, add the kernel to the list of test cases in volk_gnsssdr/lib/kernel_tests.h.
Many kernels should be able to use the default test parameters, but if yours
requires a lower tolerance, specific vector length, or other test parameters
just create a new instance of volk_gnsssdr_test_params_t for your kernel.
### Adding protokernels
The primary purpose of VOLK_GNSSSDR is to have multiple implementations of an operation
tuned for a specific CPU architecture. Ideally there is at least one
protokernel of each kernel for every architecture that VOLK_GNSSSDR supports.
The pattern for protokernel naming is volk_gnsssdr_kernel_signature_architecture_nick.
The architecture should be one of the supported VOLK_GNSSSDR architectures. The nick is
an optional name to distinguish between multiple implementations for a
particular architecture.
Architecture specific protokernels can be written in one of three ways.
The first approach should always be to use compiler intrinsic functions.
The second and third approaches are using either in-line assembly or
assembly with .S files. Both methods of writing assembly exist in VOLK_GNSSSDR and
should yield equivalent performance; which method you might choose is a
matter of opinion. Regardless of the actual method the public function should
be declared in the kernel header surrounded by ifdef fences on the symbol that
fits the architecture implementation.
#### Compiler Intrinsics
Compiler intrinsics should be treated as functions that map to a specific
assembly instruction. Most VOLK_GNSSSDR kernels take the form of a loop that iterates
through a vector. Form a loop that iterates on a number of items that is natural
for the architecture and then use compiler intrinsics to do the math for your
operation or algorithm. Include the appropriate header inside the ifdef fences,
but before your protokernel declaration.
#### In-line Assembly
In-line assembly uses a compiler macro to include verbatim assembly with C code.
The process of in-line assembly protokernels is very similar to protokernels
based on intrinsics.
#### Assembly with .S files
To write pure assembly protokernels, first declare the function name in the
kernel header file the same way as any other protokernel, but include the extern
keyword. Second, create a file (one for each protokernel) in
volk_gnsssdr/kernels/volk_gnsssdr/asm/$arch. Disassemble another protokernel and copy the
disassembled code in to this file to bootstrap a working implementation. Often
the disassembled code can be hand-tuned to improve performance.
## VOLK Modules
VOLK has a concept of modules. Each module is an independent VOLK tree. Modules
can be managed with the volk_modtool application. At a high level the module is
a clone of all of the VOLK machinery without kernels. volk_modtool also makes it
easy to copy kernels to a module.
Kernels and protokernels are added to your own VOLK module the same way they are
added to this repository, which was described in the previous section.
VOLK_GNSSSDR is a VOLK Module.
*/

View File

@ -0,0 +1,24 @@
/*! \page kernels Kernels
\li \subpage volk_gnsssdr_32fc_convert_16ic
\li \subpage volk_gnsssdr_32fc_convert_8ic
\li \subpage volk_gnsssdr_16ic_convert_32fc
\li \subpage volk_gnsssdr_16ic_resampler_16ic
\li \subpage volk_gnsssdr_16ic_xn_resampler_16ic_xn
\li \subpage volk_gnsssdr_16ic_s32fc_x2_rotator_16ic
\li \subpage volk_gnsssdr_16ic_x2_multiply_16ic
\li \subpage volk_gnsssdr_16ic_x2_dot_prod_16ic
\li \subpage volk_gnsssdr_16ic_x2_dot_prod_16ic_xn
\li \subpage volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn
\li \subpage volk_gnsssdr_8ic_conjugate_8ic
\li \subpage volk_gnsssdr_8ic_magnitude_squared_8i
\li \subpage volk_gnsssdr_8ic_x2_dot_prod_8ic
\li \subpage volk_gnsssdr_8ic_x2_multiply_8ic
\li \subpage volk_gnsssdr_8ic_s8ic_multiply_8ic
\li \subpage volk_gnsssdr_8i_accumulator_s8i
\li \subpage volk_gnsssdr_8i_index_max_16u
\li \subpage volk_gnsssdr_8i_max_s8i
\li \subpage volk_gnsssdr_8i_x2_add_8i
\li \subpage volk_gnsssdr_64f_accumulator_64f
*/

View File

@ -0,0 +1,19 @@
/*! \mainpage VOLK_GNSSSDR
Welcome to VOLK_GNSSSDR!
VOLK_GNSSSDR is the Vector-Optimized Library of Kernels for GNSS-SDR.
It is a library that contains kernels of hand-written SIMD code for different
mathematical operations. Since each SIMD architecture can be very different
and no compiler has yet come along to handle vectorization properly or highly
efficiently, VOLK_GNSSSDR approaches the problem differently.
For each architecture or platform that a developer wishes to vectorize for, a
new proto-kernel is added to VOLK_GNSSSDR. At runtime, VOLK_GNSSSDR will select the correct
proto-kernel. In this way, the users of VOLK_GNSSSDR call a kernel for performing the
operation that is platform/architecture agnostic. This allows us to write
portable SIMD code.
VOLK_GNSSSDR is a module generated from the original VOLK library http://libvolk.org
*/

View File

@ -0,0 +1,121 @@
/*! \page concepts_terms_and_techniques Concepts, Terms, and Techniques
This page is primarily a list of definitions and brief overview of successful
techniques previously used to develop VOLK_GNSSSDR protokernels.
## Definitions and Concepts
### SIMD
SIMD stands for Single Instruction Multiple Data. Leveraging SIMD instructions
is the primary optimization in VOLK_GNSSSDR.
### Architecture
A VOLK_GNSSSDR architecture is normally called an Instruction Set Architecture (ISA).
The architectures we target in VOLK_GNSSSDR usually have SIMD instructions.
### Vector
A vector in VOLK_GNSSSDR is the same as a C array. It sometimes, but not always
coincides with the mathematical definition of a vector.
### Kernel
The 'kernel' part of the VOLK_GNSSSDR name comes from the high performance computing
use of the word. In this context it is the inner loop of a vector operation.
Since we don't use the word vector in the math sense a vector operation is an
operation that is performed on a C array.
### Protokernel
A protokernel is an implementation of a kernel. Every kernel has a 'generic'
protokernel that is implemented in C. Other protokernels are optimized for a
particular architecture.
## Techniques
### New Kernels
Add new kernels to the list in lib/kernel_tests.h. This adds the kernel to
VOLK_GNSSSDR's QA tool as well as the volk profiler. Many kernels are able to
share test parameters, but new kernels might need new ones.
If the VOLK_GNSSSDR kernel does not 'fit' the the standard set of function parameters
expected by the volk_profile structure, you need to create a VOLK_GNSSSDR puppet
function to help the profiler call the kernel. This is essentially due to the
function run_volk_gnsssdr_tests which has a limited number of function prototypes that
it can test.
### Protokernels
Adding new proto-kernels (implementations of VOLK_GNSSSDR kernels for specific
architectures) is relatively easy. In the relevant <kernel>.h file in
the volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr<input-fingerprint_function-name_output-fingerprint>.h
file, add a new #ifdef/#endif block for the LV_HAVE_<arch> corresponding
to the <arch> you a working on (e.g. SSE, AVX, NEON, etc.).
For example, for volk_gnsssdr_16ic_x2_multiply_16ic_neon:
\code
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
lv_16sc_t *a_ptr = (lv_16sc_t*) in_a;
lv_16sc_t *b_ptr = (lv_16sc_t*) in_b;
unsigned int quarter_points = num_points / 4;
int16x4x2_t a_val, b_val, c_val;
int16x4x2_t tmp_real, tmp_imag;
unsigned int number = 0;
for(; number < quarter_points; ++number)
{
a_val = vld2_s16((int16_t*)a_ptr);
b_val = vld2_s16((int16_t*)b_ptr);
tmp_real.val[0] = vmul_s16(a_val.val[0], b_val.val[0]);
tmp_real.val[1] = vmul_s16(a_val.val[1], b_val.val[1]);
tmp_imag.val[0] = vmul_s16(a_val.val[0], b_val.val[1]);
tmp_imag.val[1] = vmul_s16(a_val.val[1], b_val.val[0]);
c_val.val[0] = vsub_s16(tmp_real.val[0], tmp_real.val[1]);
c_val.val[1] = vadd_s16(tmp_imag.val[0], tmp_imag.val[1]);
vst2_s16((int16_t*)out, c_val);
a_ptr += 4;
b_ptr += 4;
out += 4;
}
for(number = quarter_points * 4; number < num_points; number++)
{
*out++ = (*a_ptr++) * (*b_ptr++);
}
}
#endif /* LV_HAVE_NEON */
\endcode
### Allocating Memory
SIMD code can be very sensitive to the alignment of the vectors, which is
generally something like a 16-byte or 32-byte alignment requirement. The
VOLK_GNSSSDR dispatcher functions, which is what we will normally call as users of
VOLK_GNSSSDR, makes sure that the correct aligned or unaligned version is called
depending on the state of the vectors passed to it. However, things typically
work faster and more efficiently when the vectors are aligned. As such, VOLK_GNSSSDR
has memory allocate and free methods to provide us with properly aligned
vectors. We can also ask VOLK_GNSSSDR to give us the current machine's alignment
requirement, which makes our job even easier when porting code.
To get the machine's alignment, simply call the size_t volk_gnsssdr_get_alignment().
Allocate memory using void* volk_gnsssdr_malloc(size_t size, size_t alignment).
Make sure that any memory allocated by VOLK_GNSSSDR is also freed by VOLK_GNSSSDR with volk_gnsssdr_free(void *p).
*/

View File

@ -0,0 +1,19 @@
/*! \page using_volk_gnsssdr Using VOLK_GNSSSDR
Using VOLK_GNSSSDR in your code requires proper linking and including the correct headers.
VOLK_GNSSSDR currently supports both C and C++ bindings.
VOLK_GNSSSDR provides both a pkgconfig and CMake module to help configuration and
linking. The pkfconfig file is installed to
$install_prefix/lib/pkgconfig/volk_gnsssdr.pc. The CMake configuration module is in
$install_prefix/lib/cmake/volk_gnsssdr/VolkConfig.cmake.
The header in the VOLK_GNSSSDR include directory (includedir in pkgconfig,
VOLK_GNSSSDR_INCLUDE_DIRS in cmake module) contains the header volk_gnsssdr/volk_gnsssdr.h defines all
of the symbols exposed by VOLK_GNSSSDR. Alternatively individual kernel headers are in
the same location.
In most cases it is sufficient to call the dispatcher for the kernel you are using.
*/

View File

@ -91,8 +91,12 @@
#include <inttypes.h>
#ifdef LV_HAVE_SSE
#ifdef _WIN32
#include <intrin.h>
#else
#include <x86intrin.h>
#endif
#endif
union bit128{
uint8_t i8[16];

View File

@ -1,6 +1,6 @@
/*!
* \file volk_gnsssdr_32fc_convert_16ic.h
* \brief Volk protokernel: converts 16 bit integer complex complex values to 32 bits float complex values
* \file volk_gnsssdr_16ic_convert_32fc.h
* \brief VOLK_GNSSSDR kernel: converts 16 bit integer complex complex values to 32 bits float complex values.
* \authors <ul>
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
* </ul>
@ -30,6 +30,28 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_16ic_convert_32fc
*
* \b Overview
*
* Converts a complex vector of 16-bits integer each component
* into a complex vector of 32-bits float each component.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_16ic_convert_32fc(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
* \endcode
*
* \b Inputs
* \li inputVector: The complex 16-bit integer input data buffer.
* \li num_points: The number of data values to be converted.
*
* \b Outputs
* \li outputVector: pointer to a vector holding the converted vector.
*
*/
#ifndef INCLUDED_volk_gnsssdr_16ic_convert_32fc_H
#define INCLUDED_volk_gnsssdr_16ic_convert_32fc_H
@ -38,12 +60,6 @@
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
for(unsigned int i = 0; i < num_points; i++)
@ -53,15 +69,10 @@ static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVecto
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 2;
@ -89,15 +100,10 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 2;
@ -128,12 +134,6 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 2;

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_16ic_resampler_16ic.h
* \brief Volk protokernel: resample a 16 bits complex vector
* \brief VOLK_GNSSSDR kernel: resamples a 16 bits complex vector.
* \authors <ul>
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
* </ul>
*
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
* VOLK_GNSSSDR kernel that multiplies two 16 bits vectors (8 bits the real part
* and 8 bits the imaginary part) and accumulates them
*
* -------------------------------------------------------------------------
@ -33,6 +33,30 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_16ic_resampler_16ic
*
* \b Overview
*
* Resamples a complex vector (16-bit integer each component).
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_16ic_resampler_16ic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)
* \endcode
*
* \b Inputs
* \li local_code: One of the vectors to be multiplied.
* \li rem_code_phase_chips: Remnant code phase [chips]
* \li code_phase_step_chips: Phase increment per sample [chips/sample]
* \li code_length_chips: Code length in chips.
* \li num_points: The number of data values to be in the resampled vector.
*
* \b Outputs
* \li result: Pointer to the resampled vector.
*
*/
#ifndef INCLUDED_volk_gnsssdr_16ic_resampler_16ic_H
#define INCLUDED_volk_gnsssdr_16ic_resampler_16ic_H
@ -48,15 +72,6 @@
// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
//}
/*!
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)
{
int local_code_chip_index;
@ -77,15 +92,6 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, c
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
{
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -165,18 +171,10 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
{
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -255,18 +253,10 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
{
unsigned int number;
@ -308,7 +298,6 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
__attribute__((aligned(16))) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
for(number = 0; number < quarterPoints; number++)
{
_code_phase_out = vmulq_f32(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_16ic_resamplerpuppet_16ic.h
* \brief Volk puppet for the 16-bit complex vector resampler kernel
* \brief VOLK_GNSSSDR puppet for the 16-bit complex vector resampler kernel.
* \authors <ul>
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
* </ul>
*
* Volk puppet for integrating the resampler into volk's test system
* VOLK_GNSSSDR puppet for integrating the resampler into the test system
*
* -------------------------------------------------------------------------
*

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h
* \brief Volk puppet for the multiple 16-bit complex vector resampler kernel
* \brief VOLK_GNSSSDR puppet for the multiple 16-bit complex vector resampler kernel.
* \authors <ul>
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
* </ul>
*
* Volk puppet for integrating the multiple resampler into volk's test system
* VOLK_GNSSSDR puppet for integrating the multiple resampler into the test system
*
* -------------------------------------------------------------------------
*
@ -68,6 +68,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_generic(lv_16sc_t* r
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_SSE2
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
{
@ -94,8 +95,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse2(lv_16sc_t* re
#endif
#ifdef LV_HAVE_SSE2
#ifdef LV_HAVE_SSE2
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
{
float code_phase_step_chips = 0.1;
@ -121,8 +122,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse2(lv_16sc_t* re
#endif
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEON
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
{
float code_phase_step_chips = 0.1;

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_16ic_rotatorpuppet_16ic.h
* \brief Volk puppet for the 16-bit complex rotator kernel
* \brief VOLK_GNSSSDR puppet for the 16-bit complex rotator kernel.
* \authors <ul>
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
* </ul>
*
* Volk puppet for integrating the resampler into volk's test system
* VOLK_GNSSSDR puppet for integrating the rotator into the test system
*
* -------------------------------------------------------------------------
*
@ -42,7 +42,6 @@
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
@ -59,7 +58,6 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_generic(lv_16sc_t* outVe
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
@ -74,8 +72,8 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3(lv_16sc_t* outVec
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
@ -90,8 +88,8 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3(lv_16sc_t* outVec
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEON
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!

View File

@ -1,11 +1,11 @@
/*!
* \file volk_16ic_s32fc_x2_rotator_16ic.h
* \brief Volk protokernel: rotates a 16 bits complex vector
* \file volk_gnsssdr_16ic_s32fc_x2_rotator_16ic.h
* \brief VOLK_GNSSSDR kernel: rotates a 16 bits complex vector.
* \authors <ul>
* <li> Carles Fernandez-Prades, 2015 cfernandez at cttc.es
* </ul>
*
* Volk protokernel that rotates a 16-bit complex vector
* VOLK_GNSSSDR kernel that rotates a 16-bit complex vector
*
* -------------------------------------------------------------------------
*
@ -32,6 +32,29 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_16ic_s32fc_x2_rotator_16ic
*
* \b Overview
*
* Rotates a complex vector (16-bit integer samples each component).
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li inVector: Vector to be rotated.
* \li phase_inc: Phase increment in each sample = lv_cmake(cos(phase_step_rad), sin(phase_step_rad))
* \li phase: Initial phase = lv_cmake(cos(initial_phase_rad), sin(initial_phase_rad))
* \li num_points: Number of complex values to be rotated and stored into \p outVector
*
* \b Outputs
* \li phase: Final phase.
* \li outVector: The resampled vector.
*
*/
#ifndef INCLUDED_volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_H
#define INCLUDED_volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_H
@ -42,14 +65,6 @@
#ifdef LV_HAVE_GENERIC
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
unsigned int i = 0;
@ -70,14 +85,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* ou
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -103,7 +110,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
for(unsigned int number = 0; number < sse_iters; number++)
{
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
@ -150,7 +156,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
_out += 4;
}
_mm_store_ps((float*)two_phase_acc, two_phase_acc_reg);
_mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg);
(*phase) = two_phase_acc[0];
for (unsigned int i = sse_iters * 4; i < num_points; ++i)
@ -164,17 +170,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -200,7 +199,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
for(unsigned int number = 0; number < sse_iters; number++)
{
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
@ -221,6 +219,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
//next two samples
_in += 2;
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
@ -261,17 +260,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
unsigned int i = 0;

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_16ic_x2_dot_prod_16ic.h
* \brief Volk protokernel: multiplies two 16 bits vectors and accumulates them
* \brief VOLK_GNSSSDR kernel: multiplies two 16 bits vectors and accumulates them.
* \authors <ul>
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
* </ul>
*
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
* VOLK_GNSSSDR kernel that multiplies two 16 bits vectors (8 bits the real part
* and 8 bits the imaginary part) and accumulates them
*
* -------------------------------------------------------------------------
@ -33,6 +33,29 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_16ic_x2_dot_prod_16ic
*
* \b Overview
*
* Multiplies two input complex vectors (16-bit integer each component) and accumulates them,
* storing the result. Results are saturated so never go beyond the limits of the data type.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_16ic_x2_dot_prod_16ic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li in_a: One of the vectors to be multiplied and accumulated.
* \li in_b: The other vector to be multiplied and accumulated.
* \li num_points: Number of complex values to be multiplied together, accumulated and stored into \p result
*
* \b Outputs
* \li result: Value of the accumulated result.
*
*/
#ifndef INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_H
#define INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_H
@ -43,13 +66,6 @@
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
result[0] = lv_cmake((int16_t)0, (int16_t)0);
@ -66,13 +82,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result,
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
@ -85,7 +94,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
if (sse_iters > 0)
{
__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, realcacc, imagcacc, result;
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
realcacc = _mm_setzero_si128();
@ -100,7 +109,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
//imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1]
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
__builtin_prefetch(_in_a + 8);
b = _mm_load_si128((__m128i*)_in_b);
__builtin_prefetch(_in_b + 8);
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
@ -149,13 +160,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
@ -169,7 +173,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
if (sse_iters > 0)
{
__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, realcacc, imagcacc, result;
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
realcacc = _mm_setzero_si128();
@ -184,7 +188,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
//imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1]
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
a = _mm_loadu_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
__builtin_prefetch(_in_a + 8);
b = _mm_loadu_si128((__m128i*)_in_b);
__builtin_prefetch(_in_b + 8);
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
@ -210,7 +216,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
result = _mm_or_si128(realcacc, imagcacc);
_mm_storeu_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector
_mm_storeu_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector
for (i = 0; i < 4; ++i)
{
@ -232,13 +238,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
unsigned int quarter_points = num_points / 4;
@ -295,4 +294,56 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const
#endif /* LV_HAVE_NEON */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_vma(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
unsigned int quarter_points = num_points / 4;
unsigned int number;
lv_16sc_t* a_ptr = (lv_16sc_t*) in_a;
lv_16sc_t* b_ptr = (lv_16sc_t*) in_b;
// for 2-lane vectors, 1st lane holds the real part,
// 2nd lane holds the imaginary part
int16x4x2_t a_val, b_val, accumulator;
int16x4x2_t tmp;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
accumulator.val[0] = vdup_n_s16(0);
accumulator.val[1] = vdup_n_s16(0);
for(number = 0; number < quarter_points; ++number)
{
a_val = vld2_s16((int16_t*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
b_val = vld2_s16((int16_t*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
__builtin_prefetch(a_ptr + 8);
__builtin_prefetch(b_ptr + 8);
tmp.val[0] = vmul_s16(a_val.val[0], b_val.val[0]);
tmp.val[1] = vmul_s16(a_val.val[1], b_val.val[0]);
// use multiply accumulate/subtract to get result
tmp.val[0] = vmls_s16(tmp.val[0], a_val.val[1], b_val.val[1]);
tmp.val[1] = vmla_s16(tmp.val[1], a_val.val[0], b_val.val[1]);
accumulator.val[0] = vadd_s16(accumulator.val[0], tmp.val[0]);
accumulator.val[1] = vadd_s16(accumulator.val[1], tmp.val[1]);
a_ptr += 4;
b_ptr += 4;
}
vst2_s16((int16_t*)accum_result, accumulator);
*out = accum_result[0] + accum_result[1] + accum_result[2] + accum_result[3];
// tail case
for(number = quarter_points * 4; number < num_points; ++number)
{
*out += (*a_ptr++) * (*b_ptr++);
}
}
#endif /* LV_HAVE_NEON */
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_H*/

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
* \brief Volk protokernel: multiplies N 16 bits vectors by a common vector and accumulates the results in N 16 bits short complex outputs.
* \brief VOLK_GNSSSDR kernel: multiplies N 16 bits vectors by a common vector and accumulates the results in N 16 bits short complex outputs.
* \authors <ul>
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
* </ul>
*
* Volk protokernel that multiplies N 16 bits vectors by a common vector and accumulates the results in N 16 bits short complex outputs.
* VOLK_GNSSSDR kernel that multiplies N 16 bits vectors by a common vector and accumulates the results in N 16 bits short complex outputs.
* It is optimized to perform the N tap correlation process in GNSS receivers.
*
* -------------------------------------------------------------------------
@ -33,6 +33,30 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_16ic_x2_dot_prod_16ic_xn
*
* \b Overview
*
* Multiplies a reference complex vector by an arbitrary number of other complex vectors, accumulates the results and stores them in the output vector.
* This function can be used as a multiple correlator.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li in_common: Pointer to one of the vectors to be multiplied and accumulated (reference vector)
* \li in_a: Pointer to an array of pointers to other vectors to be multiplied by \p in_common and accumulated.
* \li num_a_vectors: Number of vectors to be multiplied by the reference vector \p in_common and accumulated.
* \li num_points: Number of complex values to be multiplied together, accumulated and stored into \p result
*
* \b Outputs
* \li result: Vector of \p num_a_vectors components with vector \p in_common multiplied by the vectors in \p in_a and accumulated.
*
*/
#ifndef INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H
#define INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H
@ -41,14 +65,7 @@
#include <volk_gnsssdr/saturation_arithmetic.h>
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
@ -70,14 +87,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
@ -160,22 +169,15 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
}
}
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
@ -258,22 +260,15 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
}
}
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
@ -354,9 +349,85 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result,
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
}
}
}
}
#endif /* LV_HAVE_NEON */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
const unsigned int neon_iters = num_points / 4;
const lv_16sc_t** _in_a = in_a;
const lv_16sc_t* _in_common = in_common;
lv_16sc_t* _out = result;
if (neon_iters > 0)
{
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
int16x4x2_t a_val, b_val, tmp;
int16x4x2_t* accumulator;
accumulator = (int16x4x2_t*)malloc(num_a_vectors * sizeof(int16x4x2_t));
for(int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
accumulator[n_vec].val[0] = vdup_n_s16(0);
accumulator[n_vec].val[1] = vdup_n_s16(0);
}
for(unsigned int number = 0; number < neon_iters; number++)
{
b_val = vld2_s16((int16_t*)_in_common); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
__builtin_prefetch(_in_common + 8);
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4]));
tmp.val[0] = vmul_s16(a_val.val[0], b_val.val[0]);
tmp.val[1] = vmul_s16(a_val.val[1], b_val.val[0]);
// use multiply accumulate/subtract to get result
tmp.val[0] = vmls_s16(tmp.val[0], a_val.val[1], b_val.val[1]);
tmp.val[1] = vmla_s16(tmp.val[1], a_val.val[0], b_val.val[1]);
accumulator[n_vec].val[0] = vadd_s16(accumulator[n_vec].val[0], tmp.val[0]);
accumulator[n_vec].val[1] = vadd_s16(accumulator[n_vec].val[1], tmp.val[1]);
}
_in_common += 4;
}
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
vst2_s16((int16_t*)dotProductVector, accumulator[n_vec]); // Store the results back into the dot product vector
dotProduct = lv_cmake(0,0);
for (int i = 0; i < 4; ++i)
{
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])),
sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
}
_out[n_vec] = dotProduct;
}
free(accumulator);
}
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
for(unsigned int n = neon_iters * 4; n < num_points; n++)
{
lv_16sc_t tmp = in_common[n] * in_a[n_vec][n];
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
}
}
}
#endif /* LV_HAVE_NEON */

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h
* \brief Volk puppet for the multiple 16-bit complex dot product kernel
* \brief VOLK_GNSSSDR puppet for the multiple 16-bit complex dot product kernel.
* \authors <ul>
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
* </ul>
*
* Volk puppet for integrating the resampler into volk's test system
* VOLK_GNSSSDR puppet for integrating the resampler into the test system
*
* -------------------------------------------------------------------------
*
@ -61,7 +61,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic(lv_16sc_t*
volk_gnsssdr_free(in_a);
}
#endif // Generic
#endif /* Generic */
#ifdef LV_HAVE_SSE2
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
@ -83,12 +83,10 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* r
volk_gnsssdr_free(in_a);
}
#endif // SSE2
#endif /* SSE2 */
#define WORKAROUND 1
#ifdef WORKAROUND
#ifdef LV_HAVE_SSE2
#if LV_HAVE_SSE2 && LV_HAVE_64
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
@ -108,8 +106,9 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_sse2(lv_16sc_t* r
}
volk_gnsssdr_free(in_a);
}
#endif
#endif // SSE2
#endif /* LV_HAVE_SSE2 && LV_HAVE_64 */
#ifdef LV_HAVE_NEON
@ -134,6 +133,29 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon(lv_16sc_t* res
#endif // NEON
#ifdef LV_HAVE_NEON
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
int num_a_vectors = 3;
lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
for(unsigned int n = 0; n < num_a_vectors; n++)
{
in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment());
memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points);
}
volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points);
for(unsigned int n = 0; n < num_a_vectors; n++)
{
volk_gnsssdr_free(in_a[n]);
}
volk_gnsssdr_free(in_a);
}
#endif // NEON
#endif // INCLUDED_volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_H

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_16ic_x2_dot_prod_16ic.h
* \brief Volk protokernel: multiplies two 16 bits vectors and accumulates them
* \file volk_gnsssdr_16ic_x2_multiply_16ic.h
* \brief VOLK_GNSSSDR kernel: multiplies two 16 bits vectors and accumulates them.
* \authors <ul>
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
* </ul>
*
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
* VOLK_GNSSSDR kernel that multiplies two 16 bits vectors (8 bits the real part
* and 8 bits the imaginary part) and accumulates them
*
* -------------------------------------------------------------------------
@ -33,6 +33,28 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_16ic_x2_multiply_16ic
*
* \b Overview
*
* Multiplies two input complex vectors, point-by-point, storing the result in the third vector.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_16ic_x2_multiply_16ic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li in_a: One of the vectors to be multiplied.
* \li in_b: The other vector to be multiplied.
* \li num_points: The number of complex data points.
*
* \b Outputs
* \li result: The vector where the result will be stored.
*
*/
#ifndef INCLUDED_volk_gnsssdr_16ic_x2_multiply_16ic_H
#define INCLUDED_volk_gnsssdr_16ic_x2_multiply_16ic_H
@ -40,13 +62,7 @@
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
for (unsigned int n = 0; n < num_points; n++)
@ -62,13 +78,6 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result,
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -118,16 +127,10 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -177,16 +180,10 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, con
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
lv_16sc_t *a_ptr = (lv_16sc_t*) in_a;

View File

@ -1,12 +1,12 @@
/*!
* \file volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h
* \brief Volk protokernel: multiplies N 16 bits vectors by a common vector
* \file volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h
* \brief VOLK_GNSSSDR kernel: multiplies N 16 bits vectors by a common vector
* phase rotated and accumulates the results in N 16 bits short complex outputs.
* \authors <ul>
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
* </ul>
*
* Volk protokernel that multiplies N 16 bits vectors by a common vector, which is
* VOLK_GNSSSDR kernel that multiplies N 16 bits vectors by a common vector, which is
* phase-rotated by phase offset and phase increment, and accumulates the results
* in N 16 bits short complex outputs.
* It is optimized to perform the N tap correlation process in GNSS receivers.
@ -36,8 +36,36 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_16ic_xn_rotator_dot_prod_16ic_xn_H
#define INCLUDED_volk_gnsssdr_16ic_xn_rotator_dot_prod_16ic_xn_H
/*!
* \page volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn
*
* \b Overview
*
* Rotates and multiplies the reference complex vector with an arbitrary number of other complex vectors,
* accumulates the results and stores them in the output vector.
* This function can be used for Doppler wipe-off and multiple correlator.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn((lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li in_common: Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector).
* \li phase_inc: Phase increment = lv_cmake(cos(phase_step_rad), sin(phase_step_rad))
* \li phase: Initial phase = lv_cmake(cos(initial_phase_rad), sin(initial_phase_rad))
* \li in_a: Pointer to an array of pointers to multiple vectors to be multiplied and accumulated.
* \li num_a_vectors: Number of vectors to be multiplied by the reference vector and accumulated.
* \li num_points: Number of complex values to be multiplied together, accumulated and stored into \p result.
*
* \b Outputs
* \li phase: Final phase.
* \li result: Vector of \p num_a_vectors components with the multiple vectors of \p in_a rotated, multiplied by \p in_common and accumulated.
*
*/
#ifndef INCLUDED_volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_H
#define INCLUDED_volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_H
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
@ -46,16 +74,7 @@
//#include <stdio.h>
#ifdef LV_HAVE_GENERIC
/*!
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t tmp16;
@ -85,16 +104,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
@ -140,7 +149,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
// Phase rotation on operand in_common starts here:
//printf("generic phase %i: %f,%f\n", n*4,lv_creal(*phase),lv_cimag(*phase));
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in_common + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
@ -248,19 +256,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
@ -304,6 +303,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
for(unsigned int number = 0; number < sse_iters; number++)
{
// Phase rotation on operand in_common starts here:
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in_common + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
@ -378,7 +378,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
a = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);
_mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
_mm_storeu_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
dotProduct = lv_cmake(0,0);
for (int i = 0; i < 4; ++i)
{
@ -390,9 +390,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
free(realcacc);
free(imagcacc);
_mm_store_ps((float*)two_phase_acc, two_phase_acc_reg);
_mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg);
(*phase) = two_phase_acc[0];
for(unsigned int n = sse_iters * 4; n < num_points; n++)
{
tmp16 = in_common[n];
@ -413,16 +414,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Rotates and multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector)
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
const unsigned int neon_iters = num_points / 4;
@ -583,4 +574,160 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
#endif /* LV_HAVE_NEON */
#endif /*INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H*/
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
const unsigned int neon_iters = num_points / 4;
const lv_16sc_t** _in_a = in_a;
const lv_16sc_t* _in_common = in_common;
lv_16sc_t* _out = result;
lv_16sc_t tmp16_, tmp;
lv_32fc_t tmp32_;
if (neon_iters > 0)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
lv_32fc_t phase2 = (lv_32fc_t)(*phase) * phase_inc;
lv_32fc_t phase3 = phase2 * phase_inc;
lv_32fc_t phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
float32x4_t _phase_real = vld1q_f32(__phase_real);
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
int16x4x2_t a_val, b_val;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
float32x4_t half = vdupq_n_f32(0.5f);
int16x4x2_t tmp16;
int32x4x2_t tmp32i;
float32x4x2_t tmp32f, tmp32_real, tmp32_imag;
float32x4_t sign, PlusHalf, Round;
int16x4x2_t* accumulator;
accumulator = (int16x4x2_t*)calloc(num_a_vectors, sizeof(int16x4x2_t));
for(int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
accumulator[n_vec].val[0] = vdup_n_s16(0);
accumulator[n_vec].val[1] = vdup_n_s16(0);
}
for(unsigned int number = 0; number < neon_iters; number++)
{
/* load 4 complex numbers (int 16 bits each component) */
tmp16 = vld2_s16((int16_t*)_in_common);
__builtin_prefetch(_in_common + 8);
_in_common += 4;
/* promote them to int 32 bits */
tmp32i.val[0] = vmovl_s16(tmp16.val[0]);
tmp32i.val[1] = vmovl_s16(tmp16.val[1]);
/* promote them to float 32 bits */
tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]);
tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]);
/* complex multiplication of four complex samples (float 32 bits each component) */
tmp32_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real);
tmp32_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag);
tmp32_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag);
tmp32_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real);
tmp32f.val[0] = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]);
tmp32f.val[1] = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]);
/* downcast results to int32 */
/* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31)));
PlusHalf = vaddq_f32(tmp32f.val[0], half);
Round = vsubq_f32(PlusHalf, sign);
tmp32i.val[0] = vcvtq_s32_f32(Round);
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31)));
PlusHalf = vaddq_f32(tmp32f.val[1], half);
Round = vsubq_f32(PlusHalf, sign);
tmp32i.val[1] = vcvtq_s32_f32(Round);
/* downcast results to int16 */
tmp16.val[0] = vqmovn_s32(tmp32i.val[0]);
tmp16.val[1] = vqmovn_s32(tmp32i.val[1]);
/* compute next four phases */
tmp32_real.val[0] = vmulq_f32(_phase_real, _phase4_real);
tmp32_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag);
tmp32_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag);
tmp32_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real);
_phase_real = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]);
_phase_imag = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]);
vst1q_f32((float32_t*)__phase_real, _phase_real);
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4]));
b_val.val[0] = vmul_s16(a_val.val[0], tmp16.val[0]);
b_val.val[1] = vmul_s16(a_val.val[1], tmp16.val[0]);
// use multiply accumulate/subtract to get result
b_val.val[0] = vmls_s16(b_val.val[0], a_val.val[1], tmp16.val[1]);
b_val.val[1] = vmla_s16(b_val.val[1], a_val.val[0], tmp16.val[1]);
accumulator[n_vec].val[0] = vqadd_s16(accumulator[n_vec].val[0], b_val.val[0]);
accumulator[n_vec].val[1] = vqadd_s16(accumulator[n_vec].val[1], b_val.val[1]);
}
}
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
vst2_s16((int16_t*)dotProductVector, accumulator[n_vec]); // Store the results back into the dot product vector
dotProduct = lv_cmake(0,0);
for (int i = 0; i < 4; ++i)
{
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])),
sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
}
_out[n_vec] = dotProduct;
}
free(accumulator);
vst1q_f32((float32_t*)__phase_real, _phase_real);
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
(*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]);
}
for (unsigned int n = neon_iters * 4; n < num_points; n++)
{
tmp16_ = in_common[n]; //printf("neon phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase);
tmp16_ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_)));
(*phase) *= phase_inc;
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
tmp = tmp16_ * in_a[n_vec][n];
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
}
}
}
#endif /* LV_HAVE_NEON */
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_H*/

View File

@ -1,6 +1,6 @@
/*!
* \file volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h
* \brief Volk puppet for the multiple 16-bit complex dot product kernel
* \file volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic.h
* \brief Volk puppet for the multiple 16-bit complex dot product kernel.
* \authors <ul>
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
* </ul>
@ -69,6 +69,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic(lv_
#endif // Generic
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
@ -99,8 +100,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_1
#endif // SSE3
#ifdef LV_HAVE_SSE3
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
@ -130,8 +131,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_1
#endif // SSE3
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEON
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
@ -161,6 +162,37 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon(lv_16s
#endif // NEON
#ifdef LV_HAVE_NEON
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
float rem_carrier_phase_in_rad = 0.345;
float phase_step_rad = 0.1;
lv_32fc_t phase[1];
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
lv_32fc_t phase_inc[1];
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
int num_a_vectors = 3;
lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
for(unsigned int n = 0; n < num_a_vectors; n++)
{
in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
}
volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points);
for(unsigned int n = 0; n < num_a_vectors; n++)
{
volk_gnsssdr_free(in_a[n]);
}
volk_gnsssdr_free(in_a);
}
#endif // NEON
#endif // INCLUDED_volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_H

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_16ic_xn_resampler_16ic_xn.h
* \brief Volk protokernel: Resamples N 16 bits integer short complex vectors using zero hold resample algorithm.
* \brief VOLK_GNSSSDR kernel: Resamples N 16 bits integer short complex vectors using zero hold resample algorithm.
* \authors <ul>
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
* </ul>
*
* Volk protokernel that esamples N 16 bits integer short complex vectors using zero hold resample algorithm.
* VOLK_GNSSSDR kernel that esamples N 16 bits integer short complex vectors using zero hold resample algorithm.
* It is optimized to resample a sigle GNSS local code signal replica into N vectors fractional-resampled and fractional-delayed
* (i.e. it creates the Early, Prompt, and Late code replicas)
*
@ -34,6 +34,31 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_16ic_xn_resampler_16ic_xn
*
* \b Overview
*
* Resamples a complex vector (16-bit integer each component), providing \p num_out_vectors outputs.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_16ic_xn_resampler_16ic_xn(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
* \endcode
*
* \b Inputs
* \li local_code: One of the vectors to be multiplied.
* \li rem_code_phase_chips: Remnant code phase [chips].
* \li code_phase_step_chips: Phase increment per sample [chips/sample].
* \li code_length_chips: Code length in chips.
* \li num_out_vectors Number of output vectors.
* \li num_output_samples: The number of data values to be in the resampled vector.
*
* \b Outputs
* \li result: Pointer to a vector of pointers where the results will be stored.
*
*/
#ifndef INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_H
#define INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_H
@ -49,16 +74,7 @@
// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
//}
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{
int local_code_chip_index;
@ -84,16 +100,6 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** re
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -125,7 +131,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
__m128i negative_indexes, overflow_indexes,_code_phase_out_int, _code_phase_out_int_neg,_code_phase_out_int_over;
__m128i zero=_mm_setzero_si128();
__m128i zero = _mm_setzero_si128();
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__m128 _4output_index = _mm_load_ps(init_idx_float);
@ -148,11 +154,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
_code_phase_out_with_offset = _mm_add_ps(_code_phase_out, _rem_code_phase); //add the phase offset
_code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer
negative_indexes = _mm_cmplt_epi32 (_code_phase_out_int, zero); //test for negative values
negative_indexes = _mm_cmplt_epi32(_code_phase_out_int, zero); //test for negative values
_code_phase_out_int_neg = _mm_add_epi32(_code_phase_out_int, _code_length_chips); //the negative values branch
_code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes,_mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
_code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes, _mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
overflow_indexes = _mm_cmpgt_epi32 (_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
overflow_indexes = _mm_cmpgt_epi32(_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
_code_phase_out_int_over = _mm_sub_epi32(_code_phase_out_int_neg, _code_length_chips); //the negative values branch
_code_phase_out_int_over = _mm_xor_si128(_code_phase_out_int_neg, _mm_and_si128( overflow_indexes, _mm_xor_si128( _code_phase_out_int_over, _code_phase_out_int_neg )));
@ -183,19 +189,10 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -227,7 +224,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
__m128i negative_indexes, overflow_indexes,_code_phase_out_int, _code_phase_out_int_neg,_code_phase_out_int_over;
__m128i zero=_mm_setzero_si128();
__m128i zero = _mm_setzero_si128();
__attribute__((aligned(16))) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__m128 _4output_index = _mm_loadu_ps(init_idx_float);
@ -250,11 +247,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
_code_phase_out_with_offset = _mm_add_ps(_code_phase_out, _rem_code_phase); //add the phase offset
_code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer
negative_indexes = _mm_cmplt_epi32 (_code_phase_out_int, zero); //test for negative values
negative_indexes = _mm_cmplt_epi32(_code_phase_out_int, zero); //test for negative values
_code_phase_out_int_neg = _mm_add_epi32(_code_phase_out_int, _code_length_chips); //the negative values branch
_code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes,_mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
_code_phase_out_int_neg = _mm_xor_si128(_code_phase_out_int, _mm_and_si128( negative_indexes, _mm_xor_si128( _code_phase_out_int_neg, _code_phase_out_int )));
overflow_indexes = _mm_cmpgt_epi32 (_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
overflow_indexes = _mm_cmpgt_epi32(_code_phase_out_int_neg, _code_length_chips_minus1); //test for overflow values
_code_phase_out_int_over = _mm_sub_epi32(_code_phase_out_int_neg, _code_length_chips); //the negative values branch
_code_phase_out_int_over = _mm_xor_si128(_code_phase_out_int_neg, _mm_and_si128( overflow_indexes, _mm_xor_si128( _code_phase_out_int_over, _code_phase_out_int_neg )));
@ -286,19 +283,10 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{
unsigned int number;

View File

@ -1,6 +1,6 @@
/*!
* \file volk_gnsssdr_32fc_convert_16ic.h
* \brief Volk protokernel: converts float32 complex values to 16 integer complex values taking care of overflow
* \brief VOLK_GNSSSDR kernel: converts float32 complex values to 16 integer complex values taking care of overflow.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
@ -30,6 +30,29 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_32fc_convert_16ic
*
* \b Overview
*
* Converts a complex vector of 32-bits float each component into
* a complex vector of 16-bits integer each component.
* Values are saturated to the limit values of the output data type.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_32fc_convert_16ic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li inputVector: The complex 32-bit float input data buffer.
* \li num_points: The number of data values to be converted.
*
* \b Outputs
* \li outputVector: The complex 16-bit integer output data buffer.
*
*/
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_H
#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_H
@ -40,12 +63,6 @@
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -91,15 +108,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE
#include <xmmintrin.h> // __m64, __m128 ??
/*!
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -149,12 +161,6 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector,
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -200,15 +206,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points/4;
@ -254,15 +255,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector,
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
const unsigned int neon_iters = num_points / 4;
@ -318,14 +314,9 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
#endif /* LV_HAVE_NEON */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
float* inputVectorPtr = (float*)inputVector;

View File

@ -1,6 +1,6 @@
/*!
* \file volk_gnsssdr_32fc_convert_8ic.h
* \brief Volk protokernel: converts float32 complex values to 8 integer complex values taking care of overflow
* \brief VOLK_GNSSSDR kernel: converts float32 complex values to 8 integer complex values taking care of overflow.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
@ -30,6 +30,29 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_32fc_convert_8ic
*
* \b Overview
*
* Converts a complex vector of 32-bits float each component into
* a complex vector of 8-bits integer each component.
* Values are saturated to the limit values of the output data type.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_32fc_convert_8ic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li inputVector: The complex 32-bit float input data buffer.
* \li num_points: The number of data values to be converted.
*
* \b Outputs
* \li outputVector: The complex 8-bit integer output data buffer.
*
*/
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_H
#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_H
@ -42,12 +65,6 @@
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 8-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
unsigned i = 0;
@ -103,14 +120,9 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector,
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 8-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
float* inputVectorPtr = (float*)inputVector;
@ -133,12 +145,6 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector,
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 8-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
@ -197,12 +203,6 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 8-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
const unsigned int neon_iters = num_points / 8;

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_64f_accumulator_64f.h
* \brief Volk protokernel: 64 bits (double) scalar accumulator
* \brief VOLK_GNSSSDR kernel: 64 bits (double) scalar accumulator.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that implements an accumulator of char values
* VOLK_GNSSSDR kernel that implements an accumulator of double (64-bit float) values.
*
* -------------------------------------------------------------------------
*
@ -32,6 +32,27 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_64f_accumulator_64f
*
* \b Overview
*
* Accumulates the values in the input buffer.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_64f_accumulator_64f(double* result, const double* inputBuffer, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li inputBuffer: The buffer of data to be accumulated.
* \li num_points: The number of values in \p inputBuffer to be accumulated.
*
* \b Outputs
* \li result: The accumulated result.
*
*/
#ifndef INCLUDED_volk_gnsssdr_64f_accumulator_64f_u_H
#define INCLUDED_volk_gnsssdr_64f_accumulator_64f_u_H
@ -39,96 +60,91 @@
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_64f_accumulator_64f_u_avx(double* result,const double* inputBuffer, unsigned int num_points){
static inline void volk_gnsssdr_64f_accumulator_64f_u_avx(double* result, const double* inputBuffer, unsigned int num_points)
{
double returnValue = 0;
const unsigned int sse_iters = num_points / 4;
const double* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(32) double tempBuffer[4];
__m256d accumulator = _mm256_setzero_pd();
__m256d aVal = _mm256_setzero_pd();
for(unsigned int number = 0; number < sse_iters; number++)
{
aVal = _mm256_loadu_pd(aPtr);
accumulator = _mm256_add_pd(accumulator, aVal);
aPtr += 4;
}
_mm256_storeu_pd((double*)tempBuffer,accumulator);
for(unsigned int i = 0; i<4; ++i){
returnValue += tempBuffer[i];
}
for(unsigned int i = 0; i<(num_points % 4); ++i){
returnValue += (*aPtr++);
}
{
aVal = _mm256_loadu_pd(aPtr);
accumulator = _mm256_add_pd(accumulator, aVal);
aPtr += 4;
}
_mm256_storeu_pd((double*)tempBuffer, accumulator);
for(unsigned int i = 0; i < 4; ++i)
{
returnValue += tempBuffer[i];
}
for(unsigned int i = 0; i < (num_points % 4); ++i)
{
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_64f_accumulator_64f_u_sse3(double* result,const double* inputBuffer, unsigned int num_points){
static inline void volk_gnsssdr_64f_accumulator_64f_u_sse3(double* result,const double* inputBuffer, unsigned int num_points)
{
double returnValue = 0;
const unsigned int sse_iters = num_points / 2;
const double* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(16) double tempBuffer[2];
__m128d accumulator = _mm_setzero_pd();
__m128d aVal = _mm_setzero_pd();
for(unsigned int number = 0; number < sse_iters; number++)
{
aVal = _mm_loadu_pd(aPtr);
accumulator = _mm_add_pd(accumulator, aVal);
aPtr += 2;
}
_mm_storeu_pd((double*)tempBuffer,accumulator);
for(unsigned int i = 0; i<2; ++i){
returnValue += tempBuffer[i];
}
for(unsigned int i = 0; i<(num_points % 2); ++i){
returnValue += (*aPtr++);
}
{
aVal = _mm_loadu_pd(aPtr);
accumulator = _mm_add_pd(accumulator, aVal);
aPtr += 2;
}
_mm_storeu_pd((double*)tempBuffer, accumulator);
for(unsigned int i = 0; i < 2; ++i)
{
returnValue += tempBuffer[i];
}
for(unsigned int i = 0; i < (num_points % 2); ++i)
{
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const double* inputBuffer, unsigned int num_points){
static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const double* inputBuffer, unsigned int num_points)
{
const double* aPtr = inputBuffer;
double returnValue = 0;
for(unsigned int number = 0;number < num_points; number++){
returnValue += (*aPtr++);
}
for(unsigned int number = 0;number < num_points; number++)
{
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_GENERIC */
@ -136,78 +152,75 @@ static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_64f_accumulator_64f_a_avx(double* result,const double* inputBuffer, unsigned int num_points){
static inline void volk_gnsssdr_64f_accumulator_64f_a_avx(double* result,const double* inputBuffer, unsigned int num_points)
{
double returnValue = 0;
const unsigned int sse_iters = num_points / 4;
const double* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(32) double tempBuffer[4];
__m256d accumulator = _mm256_setzero_pd();
__m256d aVal = _mm256_setzero_pd();
for(unsigned int number = 0; number < sse_iters; number++)
{
aVal = _mm256_load_pd(aPtr);
accumulator = _mm256_add_pd(accumulator, aVal);
aPtr += 4;
}
_mm256_store_pd((double*)tempBuffer,accumulator);
for(unsigned int i = 0; i<4; ++i){
returnValue += tempBuffer[i];
}
for(unsigned int i = 0; i<(num_points % 4); ++i){
returnValue += (*aPtr++);
}
{
aVal = _mm256_load_pd(aPtr);
accumulator = _mm256_add_pd(accumulator, aVal);
aPtr += 4;
}
_mm256_store_pd((double*)tempBuffer, accumulator);
for(unsigned int i = 0; i < 4; ++i)
{
returnValue += tempBuffer[i];
}
for(unsigned int i = 0; i < (num_points % 4); ++i)
{
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const double* inputBuffer, unsigned int num_points){
static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const double* inputBuffer, unsigned int num_points)
{
double returnValue = 0;
const unsigned int sse_iters = num_points / 2;
const double* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(16) double tempBuffer[2];
__m128d accumulator = _mm_setzero_pd();
__m128d aVal = _mm_setzero_pd();
for(unsigned int number = 0; number < sse_iters; number++)
{
aVal = _mm_load_pd(aPtr);
accumulator = _mm_add_pd(accumulator, aVal);
aPtr += 2;
}
_mm_store_pd((double*)tempBuffer,accumulator);
for(unsigned int i = 0; i<2; ++i){
returnValue += tempBuffer[i];
}
for(unsigned int i = 0; i<(num_points % 2); ++i){
returnValue += (*aPtr++);
}
{
aVal = _mm_load_pd(aPtr);
accumulator = _mm_add_pd(accumulator, aVal);
aPtr += 2;
}
_mm_store_pd((double*)tempBuffer, accumulator);
for(unsigned int i = 0; i < 2; ++i)
{
returnValue += tempBuffer[i];
}
for(unsigned int i = 0; i < (num_points % 2); ++i)
{
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_SSE3 */

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_8i_accumulator_s8i.h
* \brief Volk protokernel: 8 bits (char) scalar accumulator
* \brief VOLK_GNSSSDR kernel: 8 bits (char) scalar accumulator.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that implements an accumulator of char values
* VOLK_GNSSSDR kernel that implements an accumulator of char values
*
* -------------------------------------------------------------------------
*
@ -32,6 +32,27 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_8i_accumulator_s8i
*
* \b Overview
*
* Accumulates the values in the input buffer.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_8i_accumulator_s8i(char* result, const char* inputBuffer, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li inputBuffer: The buffer of data to be accumulated.
* \li num_points: The number of values in \p inputBuffer to be accumulated.
*
* \b Outputs
* \li result: The accumulated result.
*
*/
#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H
#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H
@ -40,12 +61,7 @@
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const char* inputBuffer, unsigned int num_points)
{
char returnValue = 0;
@ -63,14 +79,14 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const ch
accumulator = _mm_add_epi8(accumulator, aVal);
aPtr += 16;
}
_mm_storeu_si128((__m128i*)tempBuffer,accumulator);
_mm_storeu_si128((__m128i*)tempBuffer, accumulator);
for(unsigned int i = 0; i<16; ++i)
for(unsigned int i = 0; i < 16; ++i)
{
returnValue += tempBuffer[i];
}
for(unsigned int i = 0; i<(num_points % 16); ++i)
for(unsigned int i = 0; i < (num_points % 16); ++i)
{
returnValue += (*aPtr++);
}
@ -79,13 +95,9 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const ch
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const char* inputBuffer, unsigned int num_points)
{
const char* aPtr = inputBuffer;
@ -99,14 +111,10 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const c
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const char* inputBuffer, unsigned int num_points)
{
char returnValue = 0;
@ -126,11 +134,12 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch
}
_mm_store_si128((__m128i*)tempBuffer,accumulator);
for(unsigned int i = 0; i<16; ++i){
for(unsigned int i = 0; i < 16; ++i)
{
returnValue += tempBuffer[i];
}
}
for(unsigned int i = 0; i<(num_points % 16); ++i)
for(unsigned int i = 0; i < (num_points % 16); ++i)
{
returnValue += (*aPtr++);
}
@ -139,13 +148,9 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_ORC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
extern void volk_gnsssdr_8i_accumulator_s8i_a_orc_impl(short* result, const char* inputBuffer, unsigned int num_points);
static inline void volk_gnsssdr_8i_accumulator_s8i_u_orc(char* result, const char* inputBuffer, unsigned int num_points)

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_8i_index_max_16u.h
* \brief Volk protokernel: calculates the index of the maximum value in a group of 8 bits (char) scalars
* \brief VOLK_GNSSSDR kernel: calculates the index of the maximum value in a group of 8 bits (char) scalars.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that returns the index of the maximum value of a group of 8 bits (char) scalars
* VOLK_GNSSSDR kernel that returns the index of the maximum value of a group of 8 bits (char) scalars
*
* -------------------------------------------------------------------------
*
@ -32,6 +32,27 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_8i_index_max_16u
*
* \b Overview
*
* Returns the index of the max value in \p src0
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_8i_index_max_16u(unsigned int* target, const char* src0, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li src0: The buffer of data to be analyzed.
* \li num_points: The number of values in \p src0 to be analyzed.
*
* \b Outputs
* \li target: The index of the maximum value in \p src0
*
*/
#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_H
#define INCLUDED_volk_gnsssdr_8i_index_max_16u_H
@ -39,12 +60,7 @@
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -107,14 +123,10 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, con
#endif /*LV_HAVE_AVX*/
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -168,14 +180,10 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target,
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE2
#include<emmintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -235,13 +243,9 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, co
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_GENERIC
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -266,12 +270,7 @@ static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, c
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -334,14 +333,10 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con
#endif /*LV_HAVE_AVX*/
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -395,14 +390,10 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target,
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -463,5 +454,4 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, co
#endif /*LV_HAVE_SSE2*/
#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_H*/

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_8i_max_s8i.h
* \brief Volk protokernel: calculates the maximum value in a group of 8 bits (char) scalars
* \brief VOLK_GNSSSDR kernel: calculates the maximum value in a group of 8 bits (char) scalars.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that returns the maximum value of a group of 8 bits (char) scalars
* VOLK_GNSSSDR kernel that returns the maximum value of a group of 8 bits (char) scalars
*
* -------------------------------------------------------------------------
*
@ -32,6 +32,27 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_8i_max_s8i
*
* \b Overview
*
* Returns the max value in \p src0
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_8i_max_s8i(char* target, const char* src0, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li src0: The buffer of data to be analyzed.
* \li num_points: The number of values in \p src0 to be analyzed.
*
* \b Outputs
* \li target: The max value in \p src0
*
*/
#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_H
#define INCLUDED_volk_gnsssdr_8i_max_s8i_H
@ -39,12 +60,7 @@
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -89,14 +105,10 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char* target, const char* sr
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE2
#include<emmintrin.h>
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -152,13 +164,9 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char* target, const char* src0
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_GENERIC
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_generic(char* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -179,16 +187,9 @@ static inline void volk_gnsssdr_8i_max_s8i_generic(char* target, const char* src
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
@ -233,14 +234,10 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char* target, const char* sr
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_8i_x2_add_8i.h
* \brief Volk protokernel: adds pairs of 8 bits (char) scalars
* \brief VOLK_GNSSSDR kernel: adds pairs of 8 bits (char) scalars.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that adds pairs of 8 bits (char) scalars
* VOLK_GNSSSDR kernel that adds pairs of 8 bits (char) scalars
*
* -------------------------------------------------------------------------
*
@ -32,26 +32,42 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_8i_x2_add_8i
*
* \b Overview
*
* Adds the two input vectors and store the results in the third vector.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_8i_x2_add_8i(char* cVector, const char* aVector, const char* bVector, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li aVector: One of the vectors of to be added.
* \li bVector: The other vector to be added.
* \li num_points: Number of values in \p aVector and \p bVector to be added together and stored into \p cVector
*
* \b Outputs
* \li cVector: The vector where the result will be stored.
*
*/
#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_H
#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_H
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 16;
char* cPtr = cVector;
const char* aPtr = aVector;
const char* bPtr= bVector;
const char* bPtr = bVector;
__m128i aVal, bVal, cVal;
@ -62,7 +78,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a
cVal = _mm_add_epi8(aVal, bVal);
_mm_storeu_si128((__m128i*)cPtr,cVal); // Store the results back into the C container
_mm_storeu_si128((__m128i*)cPtr, cVal); // Store the results back into the C container
aPtr += 16;
bPtr += 16;
@ -76,19 +92,14 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_sse2(char* cVector, const char* a
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points)
{
char* cPtr = cVector;
const char* aPtr = aVector;
const char* bPtr= bVector;
const char* bPtr = bVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++)
@ -101,20 +112,14 @@ static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char*
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* aVector, const char* bVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 16;
char* cPtr = cVector;
const char* aPtr = aVector;
const char* bPtr= bVector;
const char* bPtr = bVector;
__m128i aVal, bVal, cVal;
@ -141,13 +146,7 @@ static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* a
#ifdef LV_HAVE_ORC
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
extern void volk_gnsssdr_8i_x2_add_8i_a_orc_impl(char* cVector, const char* aVector, const char* bVector, unsigned int num_points);
static inline void volk_gnsssdr_8i_x2_add_8i_u_orc(char* cVector, const char* aVector, const char* bVector, unsigned int num_points)
{

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_8ic_conjugate_8ic.h
* \brief Volk protokernel: calculates the conjugate of a 16 bits vector
* \brief VOLK_GNSSSDR kernel: calculates the conjugate of a 16 bits vector.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that calculates the conjugate of a
* VOLK_GNSSSDR kernel that calculates the conjugate of a
* 16 bits vector (8 bits the real part and 8 bits the imaginary part)
*
* -------------------------------------------------------------------------
@ -33,6 +33,27 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_8ic_conjugate_8ic
*
* \b Overview
*
* Takes the conjugate of a complex unsigned char vector.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_8ic_conjugate_8ic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li aVector: Vector of complex items to be conjugated
* \li num_points: The number of complex data points.
*
* \b Outputs
* \li cVector: The vector where the result will be stored
*
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H
#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H
@ -40,12 +61,7 @@
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 16;
@ -81,14 +97,10 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_avx(lv_8sc_t* cVector, const
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSSE3
#include <tmmintrin.h>
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
@ -116,14 +128,10 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, con
}
#endif /* LV_HAVE_SSSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
@ -153,13 +161,9 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, cons
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
{
lv_8sc_t* cPtr = cVector;
@ -176,12 +180,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, con
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 16;
@ -217,14 +216,10 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_avx(lv_8sc_t* cVector, const
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSSE3
#include <tmmintrin.h>
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
@ -252,14 +247,10 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_ssse3(lv_8sc_t* cVector, con
}
#endif /* LV_HAVE_SSSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
@ -291,12 +282,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons
#ifdef LV_HAVE_ORC
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
extern void volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points);
static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
{
@ -307,12 +293,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_neon(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
@ -335,7 +316,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_neon(lv_8sc_t* cVector, const
{
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_NEON */

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_8ic_magnitude_squared_8i.h
* \brief Volk protokernel: calculates the magnitude squared of a 16 bits vector
* \brief VOLK_GNSSSDR kernel: calculates the magnitude squared of a 16 bits vector.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that calculates the magnitude squared of a
* VOLK_GNSSSDR kernel that calculates the magnitude squared of a
* 16 bits vector (8 bits the real part and 8 bits the imaginary part)
* result = (real*real) + (imag*imag)
*
@ -34,18 +34,34 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_8ic_magnitude_squared_8i
*
* \b Overview
*
* Calculates the magnitude squared of the complex data items in \p complexVector and stores the results in \p magnitudeVector
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_8ic_magnitude_squared_8i(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li complexVector: The vector containing the complex input values
* \li num_points: The number of complex data points.
*
* \b Outputs
* \li magnitudeVector: The vector containing the real output values
*
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_H
#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_H
#ifdef LV_HAVE_SSSE3
#include <tmmintrin.h>
/*!
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 16;
@ -99,12 +115,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeV
//#ifdef LV_HAVE_SSE
//#include <xmmintrin.h>
///*!
// \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
// \param complexVector The vector containing the complex input values
// \param magnitudeVector The vector containing the real output values
// \param num_points The number of complex values in complexVector to be calculated and stored into cVector
// */
//
//static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
// unsigned int number = 0;
// const unsigned int quarterPoints = num_points / 4;
@ -144,12 +155,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_sse3(char* magnitudeV
//#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points)
{
const char* complexVectorPtr = (char*)complexVector;
@ -167,12 +173,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitude
#ifdef LV_HAVE_SSSE3
#include <tmmintrin.h>
/*!
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 16;
@ -226,12 +227,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV
//#ifdef LV_HAVE_SSE
//#include <xmmintrin.h>
///*!
// \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
// \param complexVector The vector containing the complex input values
// \param magnitudeVector The vector containing the real output values
// \param num_points The number of complex values in complexVector to be calculated and stored into cVector
// */
//
//static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
// unsigned int number = 0;
// const unsigned int quarterPoints = num_points / 4;
@ -272,12 +268,7 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV
#ifdef LV_HAVE_ORC
/*!
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
extern void volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points);
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_orc(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points)
{

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_8ic_s8ic_multiply_8ic.h
* \brief Volk protokernel: multiplies a group of 16 bits vectors by one constant vector
* \brief VOLK_GNSSSDR kernel: multiplies a group of 16 bits vectors by one constant vector.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that multiplies a group of 16 bits vectors
* VOLK_GNSSSDR kernel that multiplies a group of 16 bits vectors
* (8 bits the real part and 8 bits the imaginary part) by one constant vector
*
* -------------------------------------------------------------------------
@ -33,6 +33,28 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_8ic_s8ic_multiply_8ic
*
* \b Overview
*
* Multiplies the input vector by a scalar and stores the results in the third vector
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_8ic_s8ic_multiply_8ic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li aVector: The vector to be multiplied.
* \li scalar The complex scalar to multiply \p aVector
* \li num_points: The number of complex values in \p aVector to be multiplied by \p scalar and stored into \p cVector.
*
* \b Outputs
* \li cVector: The vector where the results will be stored
*
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_H
#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_H
@ -40,13 +62,7 @@
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
*/
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
{
unsigned int number = 0;
@ -59,31 +75,31 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector,
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
y = _mm_set1_epi16 (*(short*)&scalar);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
y = _mm_set1_epi16(*(short*)&scalar);
imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128(y, mult1);
for(; number < sse_iters; number++)
{
x = _mm_lddqu_si128((__m128i*)a);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128(x, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128 (realc, mult1);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128 (imagc, mult1);
imagc = _mm_slli_si128 (imagc, 1);
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128(realc, mult1);
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128(imagc, mult1);
imagc = _mm_slli_si128(imagc, 1);
totalc = _mm_or_si128 (realc, imagc);
totalc = _mm_or_si128(realc, imagc);
_mm_storeu_si128((__m128i*)c, totalc);
@ -99,14 +115,9 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector,
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
*/
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
{
/*lv_8sc_t* cPtr = cVector;
@ -144,13 +155,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector,
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
*/
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
{
unsigned int number = 0;
@ -163,31 +168,31 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
y = _mm_set1_epi16 (*(short*)&scalar);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
y = _mm_set1_epi16(*(short*)&scalar);
imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128(y, mult1);
for(; number < sse_iters; number++)
{
x = _mm_load_si128((__m128i*)a);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128(x, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128 (realc, mult1);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128 (imagc, mult1);
imagc = _mm_slli_si128 (imagc, 1);
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128(realc, mult1);
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128(imagc, mult1);
imagc = _mm_slli_si128(imagc, 1);
totalc = _mm_or_si128 (realc, imagc);
totalc = _mm_or_si128(realc, imagc);
_mm_store_si128((__m128i*)c, totalc);
@ -205,13 +210,7 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
#ifdef LV_HAVE_ORC
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
*/
extern void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const char scalarreal, const char scalarimag, unsigned int num_points);
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
{

View File

@ -1,12 +1,12 @@
/*!
* \file volk_gnsssdr_8ic_x2_dot_prod_8ic.h
* \brief Volk protokernel: multiplies two 16 bits vectors and accumulates them
* \brief VOLK_GNSSSDR kernel: multiplies two 16 bits vectors and accumulates them.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
* and 8 bits the imaginary part) and accumulates them
* VOLK_GNSSSDR kernel that multiplies two 16 bits vectors (8 bits the real part
* and 8 bits the imaginary part) and accumulates them.
*
* -------------------------------------------------------------------------
*
@ -33,6 +33,29 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_8ic_x2_dot_prod_8ic
*
* \b Overview
*
* Multiplies two input complex vectors (8-bit integer each component) and accumulates them,
* storing the result.
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_16ic_x2_dot_prod_16ic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li in_a: One of the vectors to be multiplied and accumulated
* \li in_b: The other vector to be multiplied and accumulated
* \li num_points: The Number of complex values to be multiplied together, accumulated and stored into \p result
*
* \b Outputs
* \li result: Value of the accumulated result
*
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H
#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H
@ -42,26 +65,19 @@
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
{
/*lv_8sc_t* cPtr = result;
const lv_8sc_t* aPtr = input;
const lv_8sc_t* bPtr = taps;
const lv_8sc_t* aPtr = in_a;
const lv_8sc_t* bPtr = in_b;
for(int number = 0; number < num_points; number++){
*cPtr += (*aPtr++) * (*bPtr++);
}*/
char * res = (char*) result;
char * in = (char*) input;
char * tp = (char*) taps;
char * in = (char*) in_a;
char * tp = (char*) in_b;
unsigned int n_2_ccomplex_blocks = num_points/2;
unsigned int isodd = num_points & 1;
@ -86,29 +102,23 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, co
// Cleanup if we had an odd number of points
for(i = 0; i < isodd; ++i)
{
*result += input[num_points - 1] * taps[num_points - 1];
*result += in_a[num_points - 1] * in_b[num_points - 1];
}
}
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
{
lv_8sc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(char));
const lv_8sc_t* a = input;
const lv_8sc_t* b = taps;
const lv_8sc_t* a = in_a;
const lv_8sc_t* b = in_b;
const unsigned int sse_iters = num_points/8;
@ -125,40 +135,40 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
x = _mm_loadu_si128((__m128i*)a);
y = _mm_loadu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
realcacc = _mm_add_epi16 (realcacc, realc);
imagcacc = _mm_add_epi16 (imagcacc, imagc);
realcacc = _mm_add_epi16(realcacc, realc);
imagcacc = _mm_add_epi16(imagcacc, imagc);
a += 8;
b += 8;
}
realcacc = _mm_and_si128 (realcacc, mult1);
imagcacc = _mm_and_si128 (imagcacc, mult1);
imagcacc = _mm_slli_si128 (imagcacc, 1);
realcacc = _mm_and_si128(realcacc, mult1);
imagcacc = _mm_and_si128(imagcacc, mult1);
imagcacc = _mm_slli_si128(imagcacc, 1);
totalc = _mm_or_si128 (realcacc, imagcacc);
totalc = _mm_or_si128(realcacc, imagcacc);
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
_mm_storeu_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
_mm_storeu_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
for (int i = 0; i<8; ++i)
for (int i = 0; i < 8; ++i)
{
dotProduct += dotProductVector[i];
}
@ -174,23 +184,17 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
{
lv_8sc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(char));
const lv_8sc_t* a = input;
const lv_8sc_t* b = taps;
const lv_8sc_t* a = in_a;
const lv_8sc_t* b = in_b;
const unsigned int sse_iters = num_points/8;
@ -207,24 +211,24 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
x = _mm_lddqu_si128((__m128i*)a);
y = _mm_lddqu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
realcacc = _mm_add_epi16 (realcacc, realc);
imagcacc = _mm_add_epi16 (imagcacc, imagc);
realcacc = _mm_add_epi16(realcacc, realc);
imagcacc = _mm_add_epi16(imagcacc, imagc);
a += 8;
b += 8;
@ -236,9 +240,9 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
_mm_storeu_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
_mm_storeu_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
for (unsigned int i = 0; i<8; ++i)
for (unsigned int i = 0; i < 8; ++i)
{
dotProduct += dotProductVector[i];
}
@ -258,20 +262,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
{
lv_8sc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(char));
const lv_8sc_t* a = input;
const lv_8sc_t* b = taps;
const lv_8sc_t* a = in_a;
const lv_8sc_t* b = in_b;
const unsigned int sse_iters = num_points/8;
@ -288,40 +285,40 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
realcacc = _mm_add_epi16 (realcacc, realc);
imagcacc = _mm_add_epi16 (imagcacc, imagc);
realcacc = _mm_add_epi16(realcacc, realc);
imagcacc = _mm_add_epi16(imagcacc, imagc);
a += 8;
b += 8;
}
realcacc = _mm_and_si128 (realcacc, mult1);
imagcacc = _mm_and_si128 (imagcacc, mult1);
imagcacc = _mm_slli_si128 (imagcacc, 1);
realcacc = _mm_and_si128(realcacc, mult1);
imagcacc = _mm_and_si128(imagcacc, mult1);
imagcacc = _mm_slli_si128(imagcacc, 1);
totalc = _mm_or_si128 (realcacc, imagcacc);
totalc = _mm_or_si128(realcacc, imagcacc);
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
_mm_store_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
_mm_store_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
for (unsigned int i = 0; i<8; ++i)
for (unsigned int i = 0; i < 8; ++i)
{
dotProduct += dotProductVector[i];
}
@ -340,24 +337,17 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
{
lv_8sc_t dotProduct;
memset(&dotProduct, 0x0, 2*sizeof(char));
const lv_8sc_t* a = input;
const lv_8sc_t* b = taps;
const lv_8sc_t* a = in_a;
const lv_8sc_t* b = in_b;
const unsigned int sse_iters = num_points/8;
const unsigned int sse_iters = num_points / 8;
if (sse_iters>0)
if (sse_iters > 0)
{
__m128i x, y, mult1, realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, realc, imagc, totalc, realcacc, imagcacc;
@ -370,24 +360,24 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
realcacc = _mm_add_epi16 (realcacc, realc);
imagcacc = _mm_add_epi16 (imagcacc, imagc);
realcacc = _mm_add_epi16(realcacc, realc);
imagcacc = _mm_add_epi16(imagcacc, imagc);
a += 8;
b += 8;
@ -399,9 +389,9 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
_mm_store_si128((__m128i*)dotProductVector,totalc); // Store the results back into the dot product vector
_mm_store_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
for (unsigned int i = 0; i<8; ++i)
for (unsigned int i = 0; i < 8; ++i)
{
dotProduct += dotProductVector[i];
}
@ -417,17 +407,11 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_ORC
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points);
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points);
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
{
short resReal = 0;
char* resRealChar = (char*)&resReal;
@ -437,7 +421,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons
char* resImagChar = (char*)&resImag;
resImagChar++;
volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(&resReal, &resImag, input, taps, num_points);
volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(&resReal, &resImag, in_a, in_b, num_points);
*result = lv_cmake(*resRealChar, *resImagChar);
}
@ -447,21 +431,14 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
{
lv_8sc_t dotProduct;
dotProduct = lv_cmake(0,0);
*result = lv_cmake(0,0);
const lv_8sc_t* a = input;
const lv_8sc_t* b = taps;
const lv_8sc_t* a = in_a;
const lv_8sc_t* b = in_b;
// for 2-lane vectors, 1st lane holds the real part,
// 2nd lane holds the imaginary part
int8x8x2_t a_val, b_val, c_val, accumulator, tmp_real, tmp_imag;

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_8ic_x2_multiply_8ic.h
* \brief Volk protokernel: multiplies two 16 bits vectors
* \brief VOLK_GNSSSDR kernel: multiplies two 16 bits vectors.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that multiplies two 16 bits vectors (8 bits the real part
* VOLK_GNSSSDR kernel that multiplies two 16 bits vectors (8 bits the real part
* and 8 bits the imaginary part)
*
* -------------------------------------------------------------------------
@ -33,6 +33,28 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_8ic_x2_multiply_8ic
*
* \b Overview
*
* Multiplies two input complex vectors, point-by-point, storing the result in the third vector
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_8ic_x2_multiply_8ic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li aVector: One of the vectors to be multiplied
* \li bVector: The other vector to be multiplied
* \li num_points: The number of complex data points.
*
* \b Outputs
* \li cVector: The vector where the result will be stored
*
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H
#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H
@ -41,13 +63,6 @@
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
@ -99,16 +114,10 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
@ -160,15 +169,9 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector,
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
lv_8sc_t* cPtr = cVector;
@ -186,13 +189,6 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
@ -244,16 +240,10 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 8;
@ -310,13 +300,6 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points);
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(cVector, aVector, bVector, num_points);

View File

@ -1,11 +1,11 @@
/*!
* \file volk_gnsssdr_8u_x2_multiply_8u.h
* \brief Volk protokernel: multiplies unsigned char values
* \brief VOLK_GNSSSDR kernel: multiplies unsigned char values.
* \authors <ul>
* <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* Volk protokernel that multiplies unsigned char values (8 bits data)
* VOLK_GNSSSDR kernel that multiplies unsigned char values (8 bits data)
*
* -------------------------------------------------------------------------
*
@ -32,19 +32,36 @@
* -------------------------------------------------------------------------
*/
/*!
* \page volk_gnsssdr_8u_x2_multiply_8u
*
* \b Overview
*
* Multiplies two input vectors of unsigned char, point-by-point, storing the result in the third vector
*
* <b>Dispatcher Prototype</b>
* \code
* void volk_gnsssdr_8u_x2_multiply_8u(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points);
* \endcode
*
* \b Inputs
* \li aChar: One of the vectors to be multiplied
* \li bChar: The other vector to be multiplied
* \li num_points: The number of complex data points.
*
* \b Outputs
* \li cChar: The vector where the result will be stored
*
*/
#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H
#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Multiplies the two input unsigned char values and stores their results in the third unsigned char
\param cChar The unsigned char where the results will be stored
\param aChar One of the unsigned char to be multiplied
\param bChar One of the unsigned char to be multiplied
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
*/
static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 16;
@ -60,21 +77,21 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, c
y = _mm_lddqu_si128((__m128i*)b);
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
x1 = _mm_srli_si128 (x, 1);
x1 = _mm_and_si128 (x1, mult1);
x2 = _mm_and_si128 (x, mult1);
x1 = _mm_srli_si128(x, 1);
x1 = _mm_and_si128(x1, mult1);
x2 = _mm_and_si128(x, mult1);
y1 = _mm_srli_si128 (y, 1);
y1 = _mm_and_si128 (y1, mult1);
y2 = _mm_and_si128 (y, mult1);
y1 = _mm_srli_si128(y, 1);
y1 = _mm_and_si128(y1, mult1);
y2 = _mm_and_si128(y, mult1);
x1_mult_y1 = _mm_mullo_epi16 (x1, y1);
x2_mult_y2 = _mm_mullo_epi16 (x2, y2);
x1_mult_y1 = _mm_mullo_epi16(x1, y1);
x2_mult_y2 = _mm_mullo_epi16(x2, y2);
tmp = _mm_and_si128 (x1_mult_y1, mult1);
tmp1 = _mm_slli_si128 (tmp, 1);
tmp2 = _mm_and_si128 (x2_mult_y2, mult1);
totalc = _mm_or_si128 (tmp1, tmp2);
tmp = _mm_and_si128(x1_mult_y1, mult1);
tmp1 = _mm_slli_si128(tmp, 1);
tmp2 = _mm_and_si128(x2_mult_y2, mult1);
totalc = _mm_or_si128(tmp1, tmp2);
_mm_storeu_si128((__m128i*)c, totalc);
@ -91,13 +108,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_sse3(unsigned char* cChar, c
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
\param cChar The unsigned char where the results will be stored
\param aChar One of the unsigned char to be multiplied
\param bChar One of the unsigned char to be multiplied
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
*/
static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points)
{
unsigned char* cPtr = cChar;
@ -115,13 +126,6 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar,
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
\param cChar The unsigned char where the results will be stored
\param aChar One of the unsigned char to be multiplied
\param bChar One of the unsigned char to be multiplied
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
*/
static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 16;
@ -137,21 +141,21 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c
y = _mm_load_si128((__m128i*)b);
mult1 = _mm_set_epi8(0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255);
x1 = _mm_srli_si128 (x, 1);
x1 = _mm_and_si128 (x1, mult1);
x2 = _mm_and_si128 (x, mult1);
x1 = _mm_srli_si128(x, 1);
x1 = _mm_and_si128(x1, mult1);
x2 = _mm_and_si128(x, mult1);
y1 = _mm_srli_si128 (y, 1);
y1 = _mm_and_si128 (y1, mult1);
y2 = _mm_and_si128 (y, mult1);
y1 = _mm_srli_si128(y, 1);
y1 = _mm_and_si128(y1, mult1);
y2 = _mm_and_si128(y, mult1);
x1_mult_y1 = _mm_mullo_epi16 (x1, y1);
x2_mult_y2 = _mm_mullo_epi16 (x2, y2);
x1_mult_y1 = _mm_mullo_epi16(x1, y1);
x2_mult_y2 = _mm_mullo_epi16(x2, y2);
tmp = _mm_and_si128 (x1_mult_y1, mult1);
tmp1 = _mm_slli_si128 (tmp, 1);
tmp2 = _mm_and_si128 (x2_mult_y2, mult1);
totalc = _mm_or_si128 (tmp1, tmp2);
tmp = _mm_and_si128(x1_mult_y1, mult1);
tmp1 = _mm_slli_si128(tmp, 1);
tmp2 = _mm_and_si128(x2_mult_y2, mult1);
totalc = _mm_or_si128(tmp1, tmp2);
_mm_store_si128((__m128i*)c, totalc);
@ -169,13 +173,7 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c
#ifdef LV_HAVE_ORC
/*!
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
\param cChar The unsigned char where the results will be stored
\param aChar One of the unsigned char to be multiplied
\param bChar One of the unsigned char to be multiplied
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
*/
extern void volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl(unsigned char* cVector, const unsigned char* aVector, const unsigned char* bVector, unsigned int num_points);
static inline void volk_gnsssdr_8u_x2_multiply_8u_u_orc(unsigned char* cVector, const unsigned char* aVector, const unsigned char* bVector, unsigned int num_points)
{

View File

@ -106,7 +106,7 @@ macro(check_arch arch_name)
set(flags ${ARGN})
set(have_${arch_name} TRUE)
foreach(flag ${flags})
if ( (${COMPILER_NAME} STREQUAL "MSVC") AND (${flag} STREQUAL "/arch:SSE2" OR ${flag} STREQUAL "/arch:SSE" ))
if (MSVC AND (${flag} STREQUAL "/arch:SSE2" OR ${flag} STREQUAL "/arch:SSE" ))
# SSE/SSE2 is supported in MSVC since VS 2005 but flag not available when compiling 64-bit so do not check
else()
include(CheckCXXCompilerFlag)

View File

@ -1,183 +0,0 @@
/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
/* %ecx */
#define bit_SSE3 (1 << 0)
#define bit_PCLMUL (1 << 1)
#define bit_SSSE3 (1 << 9)
#define bit_FMA (1 << 12)
#define bit_CMPXCHG16B (1 << 13)
#define bit_SSE4_1 (1 << 19)
#define bit_SSE4_2 (1 << 20)
#define bit_MOVBE (1 << 22)
#define bit_POPCNT (1 << 23)
#define bit_AES (1 << 25)
#define bit_XSAVE (1 << 26)
#define bit_OSXSAVE (1 << 27)
#define bit_AVX (1 << 28)
#define bit_F16C (1 << 29)
#define bit_RDRND (1 << 30)
/* %edx */
#define bit_CMPXCHG8B (1 << 8)
#define bit_CMOV (1 << 15)
#define bit_MMX (1 << 23)
#define bit_FXSAVE (1 << 24)
#define bit_SSE (1 << 25)
#define bit_SSE2 (1 << 26)
/* Extended Features */
/* %ecx */
#define bit_LAHF_LM (1 << 0)
#define bit_ABM (1 << 5)
#define bit_SSE4a (1 << 6)
#define bit_XOP (1 << 11)
#define bit_LWP (1 << 15)
#define bit_FMA4 (1 << 16)
#define bit_TBM (1 << 21)
/* %edx */
#define bit_MMXEXT (1 << 22)
#define bit_LM (1 << 29)
#define bit_3DNOWP (1 << 30)
#define bit_3DNOW (1 << 31)
/* Extended Features (%eax == 7) */
#define bit_FSGSBASE (1 << 0)
#define bit_BMI (1 << 3)
#if defined(__i386__) && defined(__PIC__)
/* %ebx may be the PIC register. */
#if __GNUC__ >= 3
#define __cpuid(level, a, b, c, d) \
__asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
"cpuid\n\t" \
"xchg{l}\t{%%}ebx, %1\n\t" \
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
: "0" (level))
#define __cpuid_count(level, count, a, b, c, d) \
__asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
"cpuid\n\t" \
"xchg{l}\t{%%}ebx, %1\n\t" \
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
: "0" (level), "2" (count))
#else
/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
nor alternatives in i386 code. */
#define __cpuid(level, a, b, c, d) \
__asm__ ("xchgl\t%%ebx, %1\n\t" \
"cpuid\n\t" \
"xchgl\t%%ebx, %1\n\t" \
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
: "0" (level))
#define __cpuid_count(level, count, a, b, c, d) \
__asm__ ("xchgl\t%%ebx, %1\n\t" \
"cpuid\n\t" \
"xchgl\t%%ebx, %1\n\t" \
: "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
: "0" (level), "2" (count))
#endif
#else
#define __cpuid(level, a, b, c, d) \
__asm__ ("cpuid\n\t" \
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
: "0" (level))
#define __cpuid_count(level, count, a, b, c, d) \
__asm__ ("cpuid\n\t" \
: "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
: "0" (level), "2" (count))
#endif
/* Return highest supported input value for cpuid instruction. ext can
be either 0x0 or 0x8000000 to return highest supported value for
basic or extended cpuid information. Function returns 0 if cpuid
is not supported or whatever cpuid returns in eax register. If sig
pointer is non-null, then first four bytes of the signature
(as found in ebx register) are returned in location pointed by sig. */
static __inline unsigned int
__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
{
unsigned int __eax, __ebx, __ecx, __edx;
#ifndef __x86_64__
/* See if we can use cpuid. On AMD64 we always can. */
#if __GNUC__ >= 3
__asm__ ("pushf{l|d}\n\t"
"pushf{l|d}\n\t"
"pop{l}\t%0\n\t"
"mov{l}\t{%0, %1|%1, %0}\n\t"
"xor{l}\t{%2, %0|%0, %2}\n\t"
"push{l}\t%0\n\t"
"popf{l|d}\n\t"
"pushf{l|d}\n\t"
"pop{l}\t%0\n\t"
"popf{l|d}\n\t"
: "=&r" (__eax), "=&r" (__ebx)
: "i" (0x00200000));
#else
/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
nor alternatives in i386 code. */
__asm__ ("pushfl\n\t"
"pushfl\n\t"
"popl\t%0\n\t"
"movl\t%0, %1\n\t"
"xorl\t%2, %0\n\t"
"pushl\t%0\n\t"
"popfl\n\t"
"pushfl\n\t"
"popl\t%0\n\t"
"popfl\n\t"
: "=&r" (__eax), "=&r" (__ebx)
: "i" (0x00200000));
#endif
if (!((__eax ^ __ebx) & 0x00200000))
return 0;
#endif
/* Host supports cpuid. Return highest supported cpuid input value. */
__cpuid (__ext, __eax, __ebx, __ecx, __edx);
if (__sig)
*__sig = __ebx;
return __eax;
}
/* Return cpuid data for requested cpuid level, as found in returned
eax, ebx, ecx and edx registers. The function checks if cpuid is
supported and returns 1 for valid cpuid information or 0 for
unsupported cpuid level. All pointers are required to be non-null. */
static __inline int
__get_cpuid (unsigned int __level,
unsigned int *__eax, unsigned int *__ebx,
unsigned int *__ecx, unsigned int *__edx)
{
unsigned int __ext = __level & 0x80000000;
if (__get_cpuid_max (__ext, 0) < __level)
return 0;
__cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
return 1;
}

View File

@ -33,6 +33,7 @@ struct VOLK_CPU volk_gnsssdr_cpu;
#if defined(__GNUC__)
#include <cpuid.h>
#define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3)
#define cpuid_x86_count(op, count, regs) __cpuid_count(op, count, *((unsigned int*)regs), *((unsigned int*)regs+1), *((unsigned int*)regs+2), *((unsigned int*)regs+3))
/* Return Intel AVX extended CPU capabilities register.
* This function will bomb on non-AVX-capable machines, so
@ -68,8 +69,8 @@ struct VOLK_CPU volk_gnsssdr_cpu;
static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit) {
#if defined(VOLK_CPU_x86)
unsigned int regs[4];
__cpuid_count(level, count, regs[0], regs[1], regs[2], regs[3]);
unsigned int regs[4] = {0};
cpuid_x86_count(level, count, regs);
return regs[reg] >> bit & 0x01;
#else
return 0;

View File

@ -8,7 +8,7 @@ LV_CXXFLAGS=@LV_CXXFLAGS@
Name: volk_gnsssdr
Description: VOLK_GNSSSDR: Vector Optimized Library of Kernels specific for GNSS-SDR
Requires:
Version: @VERSION@
Version: @LIBVER@
Libs: -L${libdir} -lvolk_gnsssdr
Cflags: -I${includedir} ${LV_CXXFLAGS}

View File

@ -38,6 +38,7 @@
#include <iostream>
#include <boost/lexical_cast.hpp>
#include <gnuradio/io_signature.h>
#include <pmt/pmt.h>
#include <glog/logging.h>
#include "control_message_factory.h"
#include "gnss_synchro.h"
@ -78,6 +79,9 @@ gps_l1_ca_telemetry_decoder_cc::gps_l1_ca_telemetry_decoder_cc(
gr::block("gps_navigation_cc", gr::io_signature::make(1, 1, sizeof(Gnss_Synchro)),
gr::io_signature::make(1, 1, sizeof(Gnss_Synchro)))
{
// create asynchronous message ports
this->message_port_register_out(pmt::mp("preamble_index"));
// initialize internal vars
d_queue = queue;
d_dump = dump;
@ -178,6 +182,8 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
const Gnss_Synchro **in = (const Gnss_Synchro **) &input_items[0]; //Get the input samples pointer
// TODO Optimize me!
if (in[0][d_samples_per_bit*8 - 1].symbol_integration_enabled==false)
{
//******* preamble correlation ********
for (unsigned int i = 0; i < d_samples_per_bit*8; i++)
{
@ -190,20 +196,37 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
corr_value += d_preambles_symbols[i];
}
}
}else{
//******* preamble correlation ********
for (unsigned int i = 0; i < d_samples_per_bit*8; i++)
{
if (in[0][i].Flag_valid_symbol_output==true)
{
if (in[0][i].Prompt_I < 0) // symbols clipping
{
corr_value -= d_preambles_symbols[i]*d_samples_per_bit;
}
else
{
corr_value += d_preambles_symbols[i]*d_samples_per_bit;
}
}
}
}
d_flag_preamble = false;
//******* frame sync ******************
if (abs(corr_value) >= 160)
if (abs(corr_value) == 160)
{
//TODO: Rewrite with state machine
if (d_stat == 0)
{
d_GPS_FSM.Event_gps_word_preamble();
d_preamble_index = d_sample_counter;//record the preamble sample stamp
LOG(INFO) << "Preamble detection for SAT " << this->d_satellite;
DLOG(INFO) << "Preamble detection for SAT " << this->d_satellite;
d_symbol_accumulator = 0; //sync the symbol to bits integrator
d_symbol_accumulator_counter = 0;
d_frame_bit_index = 8;
d_frame_bit_index = 7;
d_stat = 1; // enter into frame pre-detection status
}
else if (d_stat == 1) //check 6 seconds of preamble separation
@ -215,20 +238,24 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
d_flag_preamble = true;
d_preamble_index = d_sample_counter; //record the preamble sample stamp (t_P)
d_preamble_time_seconds = in[0][0].Tracking_timestamp_secs;// - d_preamble_duration_seconds; //record the PRN start sample index associated to the preamble
d_frame_bit_index = 7;
if (!d_flag_frame_sync)
{
//send asynchronous message to tracking to inform of frame sync and extend correlation time
pmt::pmt_t value = pmt::from_long(d_preamble_index-1);
this->message_port_pub(pmt::mp("preamble_index"),value);
d_flag_frame_sync = true;
if (corr_value < 0)
{
flag_PLL_180_deg_phase_locked = true; //PLL is locked to opposite phase!
LOG(INFO) << " PLL in opposite phase for Sat "<< this->d_satellite.get_PRN();
DLOG(INFO) << " PLL in opposite phase for Sat "<< this->d_satellite.get_PRN();
}
else
{
flag_PLL_180_deg_phase_locked = false;
}
LOG(INFO) << " Frame sync SAT " << this->d_satellite << " with preamble start at " << d_preamble_time_seconds << " [s]";
DLOG(INFO) << " Frame sync SAT " << this->d_satellite << " with preamble start at " << d_preamble_time_seconds << " [s]";
}
}
}
@ -240,7 +267,7 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
preamble_diff = d_sample_counter - d_preamble_index;
if (preamble_diff > 6001)
{
LOG(INFO) << "Lost of frame sync SAT " << this->d_satellite << " preamble_diff= " << preamble_diff;
DLOG(INFO) << "Lost of frame sync SAT " << this->d_satellite << " preamble_diff= " << preamble_diff;
d_stat = 0; //lost of frame sync
d_flag_frame_sync = false;
flag_TOW_set = false;
@ -249,16 +276,29 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
}
//******* SYMBOL TO BIT *******
d_symbol_accumulator += in[0][d_samples_per_bit*8 - 1].Prompt_I; // accumulate the input value in d_symbol_accumulator
d_symbol_accumulator_counter++;
if (d_symbol_accumulator_counter == 20)
if (in[0][d_samples_per_bit*8 - 1].Flag_valid_symbol_output==true)
{
if (in[0][d_samples_per_bit*8 - 1].symbol_integration_enabled==true)
{
// extended correlation to bit period is enabled in tracking!
// 1 symbol = 1 bit
d_symbol_accumulator = in[0][d_samples_per_bit*8 - 1].Prompt_I; // accumulate the input value in d_symbol_accumulator
d_symbol_accumulator_counter=20;
}else{
// 20 symbols = 1 bit: do symbols integration in telemetry decoder
d_symbol_accumulator += in[0][d_samples_per_bit*8 - 1].Prompt_I; // accumulate the input value in d_symbol_accumulator
d_symbol_accumulator_counter++;
}
}
if (d_symbol_accumulator_counter == 20 )
{
if (d_symbol_accumulator > 0)
{ //symbol to bit
d_GPS_frame_4bytes += 1; //insert the telemetry bit in LSB
}
d_symbol_accumulator = 0;
d_symbol_accumulator_counter = 0;
if (d_symbol_accumulator > 0)
{ //symbol to bit
d_GPS_frame_4bytes += 1; //insert the telemetry bit in LSB
}
d_symbol_accumulator = 0;
d_symbol_accumulator_counter = 0;
//******* bits to words ******
d_frame_bit_index++;
if (d_frame_bit_index == 30)
@ -302,6 +342,8 @@ int gps_l1_ca_telemetry_decoder_cc::general_work (int noutput_items, gr_vector_i
{
d_GPS_frame_4bytes <<= 1; //shift 1 bit left the telemetry word
}
}
// output the frame
consume_each(1); //one by one

View File

@ -34,7 +34,9 @@
#include <memory>
#include <sstream>
#include <boost/lexical_cast.hpp>
#include <boost/bind.hpp>
#include <gnuradio/io_signature.h>
#include <pmt/pmt.h>
#include <volk/volk.h>
#include <glog/logging.h>
#include "gps_sdr_signal_processing.h"
@ -82,6 +84,17 @@ void gps_l1_ca_dll_pll_c_aid_tracking_cc::forecast (int noutput_items,
}
}
void gps_l1_ca_dll_pll_c_aid_tracking_cc::msg_handler_preamble_index(pmt::pmt_t msg)
{
//pmt::print(msg);
DLOG(INFO) << "Extended correlation for Tracking CH " << d_channel << ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN)<< std::endl;
if (d_enable_20ms_integration==false) //avoid re-setting preamble indicator
{
d_preamble_index=pmt::to_long(msg);
d_enable_20ms_integration=true;
d_preamble_synchronized=false;
}
}
gps_l1_ca_dll_pll_c_aid_tracking_cc::gps_l1_ca_dll_pll_c_aid_tracking_cc(
@ -97,6 +110,13 @@ gps_l1_ca_dll_pll_c_aid_tracking_cc::gps_l1_ca_dll_pll_c_aid_tracking_cc(
gr::block("gps_l1_ca_dll_pll_c_aid_tracking_cc", gr::io_signature::make(1, 1, sizeof(gr_complex)),
gr::io_signature::make(1, 1, sizeof(Gnss_Synchro)))
{
// create asynchronous message ports
this->message_port_register_in(pmt::mp("preamble_index"));
this->set_msg_handler(pmt::mp("preamble_index"),
boost::bind(&gps_l1_ca_dll_pll_c_aid_tracking_cc::msg_handler_preamble_index, this, _1));
// initialize internal vars
d_queue = queue;
d_dump = dump;
@ -107,8 +127,10 @@ gps_l1_ca_dll_pll_c_aid_tracking_cc::gps_l1_ca_dll_pll_c_aid_tracking_cc(
d_correlation_length_samples = static_cast<int>(d_vector_length);
// Initialize tracking ==========================================
d_code_loop_filter.set_DLL_BW(dll_bw_hz);
d_carrier_loop_filter.set_params(10.0, pll_bw_hz,2);
d_pll_bw_hz=pll_bw_hz;
d_dll_bw_hz=dll_bw_hz;
d_code_loop_filter.set_DLL_BW(d_dll_bw_hz);
d_carrier_loop_filter.set_params(10.0, d_pll_bw_hz,2);
//--- DLL variables --------------------------------------------------------
d_early_late_spc_chips = early_late_space_chips; // Define early-late offset (in chips)
@ -141,7 +163,9 @@ gps_l1_ca_dll_pll_c_aid_tracking_cc::gps_l1_ca_dll_pll_c_aid_tracking_cc(
d_rem_carrier_phase_rad = 0.0;
// sample synchronization
d_sample_counter = 0;
d_sample_counter = 0; //(from trk to tlm)
// symbol synchronization (from tlm to trk)
d_symbol_counter =0;
//d_sample_counter_seconds = 0;
d_acq_sample_stamp = 0;
@ -175,6 +199,8 @@ gps_l1_ca_dll_pll_c_aid_tracking_cc::gps_l1_ca_dll_pll_c_aid_tracking_cc(
d_rem_code_phase_chips = 0.0;
d_code_phase_step_chips = 0.0;
d_carrier_phase_step_rad = 0.0;
d_enable_20ms_integration=false;
d_preamble_synchronized=false;
//set_min_output_buffer((long int)300);
}
@ -258,7 +284,8 @@ void gps_l1_ca_dll_pll_c_aid_tracking_cc::start_tracking()
// enable tracking
d_pull_in = true;
d_enable_tracking = true;
d_enable_20ms_integration=false;
d_preamble_synchronized=false;
LOG(INFO) << "PULL-IN Doppler [Hz]=" << d_carrier_doppler_hz
<< " Code Phase correction [samples]=" << delay_correction_samples
<< " PULL-IN Code Phase [samples]=" << d_acq_code_phase_samples;
@ -315,6 +342,7 @@ int gps_l1_ca_dll_pll_c_aid_tracking_cc::general_work (int noutput_items, gr_vec
current_synchro_data = *d_acquisition_gnss_synchro;
*out[0] = current_synchro_data;
consume_each(samples_offset); //shift input to perform alignment with local replica
d_symbol_counter++;
return 1;
}
@ -326,115 +354,203 @@ int gps_l1_ca_dll_pll_c_aid_tracking_cc::general_work (int noutput_items, gr_vec
multicorrelator_cpu.set_input_output_vectors(d_correlator_outs,in);
multicorrelator_cpu.Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad, d_carrier_phase_step_rad, d_rem_code_phase_chips, d_code_phase_step_chips, d_correlation_length_samples);
// UPDATE INTEGRATION TIME
CURRENT_INTEGRATION_TIME_S = static_cast<double>(d_correlation_length_samples) / static_cast<double>(d_fs_in);
// ####### 20ms coherent intergration extension (experimental)
// keep the last 40 symbols (2 bits to detect transitions)
d_E_history.push_back(d_correlator_outs[0]); // save early output
d_P_history.push_back(d_correlator_outs[1]); // save prompt output
d_L_history.push_back(d_correlator_outs[2]); // save late output
// ################## PLL ##########################################################
// Update PLL discriminator [rads/Ti -> Secs/Ti]
carr_phase_error_secs_Ti = pll_cloop_two_quadrant_atan(d_correlator_outs[1]) / GPS_TWO_PI; //prompt output
// Carrier discriminator filter
// NOTICE: The carrier loop filter includes the Carrier Doppler accumulator, as described in Kaplan
//d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_phase_error_filt_secs_ti/INTEGRATION_TIME;
// Input [s/Ti] -> output [Hz]
d_carrier_doppler_hz = d_carrier_loop_filter.get_carrier_error(0.0, carr_phase_error_secs_Ti, CURRENT_INTEGRATION_TIME_S);
// PLL to DLL assistance [Secs/Ti]
d_pll_to_dll_assist_secs_Ti = (d_carrier_doppler_hz * CURRENT_INTEGRATION_TIME_S) / GPS_L1_FREQ_HZ;
// code Doppler frequency update
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ);
if (d_P_history.size()>GPS_CA_TELEMETRY_SYMBOLS_PER_BIT)
{
d_E_history.pop_front();
d_P_history.pop_front();
d_L_history.pop_front();
}
// ################## DLL ##########################################################
// DLL discriminator
code_error_chips_Ti = dll_nc_e_minus_l_normalized(d_correlator_outs[0], d_correlator_outs[2]); //[chips/Ti] //early and late
// Code discriminator filter
code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips_Ti); //input [chips/Ti] -> output [chips/second]
code_error_filt_secs_Ti = code_error_filt_chips*CURRENT_INTEGRATION_TIME_S/d_code_freq_chips; // [s/Ti]
// DLL code error estimation [s/Ti]
// TODO: PLL carrier aid to DLL is disabled. Re-enable it and measure performance
dll_code_error_secs_Ti = - code_error_filt_secs_Ti + d_pll_to_dll_assist_secs_Ti;
bool enable_dll_pll;
if (d_enable_20ms_integration==true)
{
long int symbol_diff=d_symbol_counter-d_preamble_index;
if (symbol_diff % GPS_CA_TELEMETRY_SYMBOLS_PER_BIT == 0)
{
// compute coherent integration and enable tracking loop
// perform coherent integration using correlator output history
//gr_complex d_correlator_outs_2[3];
//std::cout<<"##### RESET COHERENT INTEGRATION ####"<<std::endl;
d_correlator_outs[0]=gr_complex(0.0,0.0);
d_correlator_outs[1]=gr_complex(0.0,0.0);
d_correlator_outs[2]=gr_complex(0.0,0.0);
for (int n=0;n<GPS_CA_TELEMETRY_SYMBOLS_PER_BIT;n++)
{
d_correlator_outs[0]+=d_E_history.at(n);
d_correlator_outs[1]+=d_P_history.at(n);
d_correlator_outs[2]+=d_L_history.at(n);
}
// ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT #######################
// keep alignment parameters for the next input buffer
double T_chip_seconds;
double T_prn_seconds;
double T_prn_samples;
double K_blk_samples;
// Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation
T_chip_seconds = 1 / d_code_freq_chips;
T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
K_blk_samples = T_prn_samples + d_rem_code_phase_samples - dll_code_error_secs_Ti * static_cast<double>(d_fs_in);
if (d_preamble_synchronized==false)
{
d_preamble_synchronized=true;
}
current_synchro_data.symbol_integration_enabled=true;
// UPDATE INTEGRATION TIME
CURRENT_INTEGRATION_TIME_S = static_cast<double>(GPS_CA_TELEMETRY_SYMBOLS_PER_BIT)*GPS_L1_CA_CODE_PERIOD;
d_code_loop_filter.set_DLL_BW(d_dll_bw_hz);
d_carrier_loop_filter.set_params(10.0, d_pll_bw_hz/5,2);
enable_dll_pll=true;
d_correlation_length_samples = round(K_blk_samples); //round to a discrete samples
old_d_rem_code_phase_samples=d_rem_code_phase_samples;
d_rem_code_phase_samples = K_blk_samples - static_cast<double>(d_correlation_length_samples); //rounding error < 1 sample
}else{
current_synchro_data.symbol_integration_enabled=false;
if(d_preamble_synchronized==true)
{
// continue extended coherent correlation
d_correlation_length_samples=d_correlation_length_samples-d_rem_code_phase_integer_samples;
d_rem_code_phase_integer_samples=0;
d_rem_carrier_phase_rad = fmod(d_rem_carrier_phase_rad + d_carrier_phase_step_rad * d_correlation_length_samples, GPS_TWO_PI);
d_rem_code_phase_chips = fmod(d_rem_code_phase_chips + d_code_phase_step_chips*d_correlation_length_samples,GPS_L1_CA_CODE_LENGTH_CHIPS);
// disable tracking loop and inform telemetry decoder
enable_dll_pll=false;
}else{
// perform basic (1ms) correlation
// UPDATE INTEGRATION TIME
CURRENT_INTEGRATION_TIME_S = static_cast<double>(d_correlation_length_samples) / static_cast<double>(d_fs_in);
enable_dll_pll=true;
}
}
}else{
current_synchro_data.symbol_integration_enabled=false;
// UPDATE INTEGRATION TIME
CURRENT_INTEGRATION_TIME_S = static_cast<double>(d_correlation_length_samples) / static_cast<double>(d_fs_in);
enable_dll_pll=true;
}
// UPDATE REMNANT CARRIER PHASE
CORRECTED_INTEGRATION_TIME_S=(static_cast<double>(d_correlation_length_samples)/static_cast<double>(d_fs_in));
//remnant carrier phase [rad]
d_rem_carrier_phase_rad = fmod(d_rem_carrier_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S, GPS_TWO_PI);
// UPDATE CARRIER PHASE ACCUULATOR
//carrier phase accumulator prior to update the PLL estimators (accumulated carrier in this loop depends on the old estimations!)
d_acc_carrier_phase_cycles -= d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S;
// ###### end 20ms correlation extension
//################### PLL COMMANDS #################################################
//carrier phase step (NCO phase increment per sample) [rads/sample]
d_carrier_phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in);
if (enable_dll_pll==true)
{
// ################## PLL ##########################################################
// Update PLL discriminator [rads/Ti -> Secs/Ti]
carr_phase_error_secs_Ti = pll_cloop_two_quadrant_atan(d_correlator_outs[1]) / GPS_TWO_PI; //prompt output
// Carrier discriminator filter
// NOTICE: The carrier loop filter includes the Carrier Doppler accumulator, as described in Kaplan
//d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_phase_error_filt_secs_ti/INTEGRATION_TIME;
// Input [s/Ti] -> output [Hz]
d_carrier_doppler_hz = d_carrier_loop_filter.get_carrier_error(0.0, carr_phase_error_secs_Ti, CURRENT_INTEGRATION_TIME_S);
// PLL to DLL assistance [Secs/Ti]
d_pll_to_dll_assist_secs_Ti = (d_carrier_doppler_hz * CURRENT_INTEGRATION_TIME_S) / GPS_L1_FREQ_HZ;
// code Doppler frequency update
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ);
//################### DLL COMMANDS #################################################
//code phase step (Code resampler phase increment per sample) [chips/sample]
d_code_phase_step_chips = d_code_freq_chips / static_cast<double>(d_fs_in);
//remnant code phase [chips]
d_rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / static_cast<double>(d_fs_in));
// ################## DLL ##########################################################
// DLL discriminator
code_error_chips_Ti = dll_nc_e_minus_l_normalized(d_correlator_outs[0], d_correlator_outs[2]); //[chips/Ti] //early and late
// Code discriminator filter
code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips_Ti); //input [chips/Ti] -> output [chips/second]
code_error_filt_secs_Ti = code_error_filt_chips*CURRENT_INTEGRATION_TIME_S/d_code_freq_chips; // [s/Ti]
// DLL code error estimation [s/Ti]
dll_code_error_secs_Ti = - code_error_filt_secs_Ti + d_pll_to_dll_assist_secs_Ti;
// ####### CN0 ESTIMATION AND LOCK DETECTORS #######################################
if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
{
// fill buffer with prompt correlator output values
d_Prompt_buffer[d_cn0_estimation_counter] = d_correlator_outs[1]; //prompt
d_cn0_estimation_counter++;
}
else
{
d_cn0_estimation_counter = 0;
// Code lock indicator
d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
// Carrier lock indicator
d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
// Loss of lock detection
if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
{
d_carrier_lock_fail_counter++;
}
else
{
if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
}
if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
{
std::cout << "Loss of lock in channel " << d_channel << "!" << std::endl;
LOG(INFO) << "Loss of lock in channel " << d_channel << "!";
std::unique_ptr<ControlMessageFactory> cmf(new ControlMessageFactory());
if (d_queue != gr::msg_queue::sptr())
{
d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
}
d_carrier_lock_fail_counter = 0;
d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
}
}
// ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT #######################
// ########### Output the tracking data to navigation and PVT ##########
current_synchro_data.Prompt_I = static_cast<double>((d_correlator_outs[1]).real());
current_synchro_data.Prompt_Q = static_cast<double>((d_correlator_outs[1]).imag());
// Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!)
current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + old_d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
current_synchro_data.Code_phase_secs = 0;
current_synchro_data.Carrier_phase_rads = GPS_TWO_PI * d_acc_carrier_phase_cycles;
current_synchro_data.Carrier_Doppler_hz = d_carrier_doppler_hz;
current_synchro_data.CN0_dB_hz = d_CN0_SNV_dB_Hz;
current_synchro_data.Flag_valid_pseudorange = false;
*out[0] = current_synchro_data;
// keep alignment parameters for the next input buffer
double T_chip_seconds;
double T_prn_seconds;
double T_prn_samples;
double K_prn_samples;
// Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation
T_chip_seconds = 1 / d_code_freq_chips;
T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
K_prn_samples = round(T_prn_samples);
double K_T_prn_error_samples=K_prn_samples-T_prn_samples;
old_d_rem_code_phase_samples=d_rem_code_phase_samples;
d_rem_code_phase_samples= d_rem_code_phase_samples - K_T_prn_error_samples -dll_code_error_secs_Ti * static_cast<double>(d_fs_in);
d_rem_code_phase_integer_samples=round(d_rem_code_phase_samples);
d_correlation_length_samples = K_prn_samples + d_rem_code_phase_integer_samples; //round to a discrete samples
d_rem_code_phase_samples=d_rem_code_phase_samples-d_rem_code_phase_integer_samples;
// UPDATE REMNANT CARRIER PHASE
CORRECTED_INTEGRATION_TIME_S=(static_cast<double>(d_correlation_length_samples)/static_cast<double>(d_fs_in));
//remnant carrier phase [rad]
d_rem_carrier_phase_rad = fmod(d_rem_carrier_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S, GPS_TWO_PI);
// UPDATE CARRIER PHASE ACCUULATOR
//carrier phase accumulator prior to update the PLL estimators (accumulated carrier in this loop depends on the old estimations!)
d_acc_carrier_phase_cycles -= d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S;
//################### PLL COMMANDS #################################################
//carrier phase step (NCO phase increment per sample) [rads/sample]
d_carrier_phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in);
//################### DLL COMMANDS #################################################
//code phase step (Code resampler phase increment per sample) [chips/sample]
d_code_phase_step_chips = d_code_freq_chips / static_cast<double>(d_fs_in);
//remnant code phase [chips]
d_rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / static_cast<double>(d_fs_in));
// ####### CN0 ESTIMATION AND LOCK DETECTORS #######################################
if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
{
// fill buffer with prompt correlator output values
d_Prompt_buffer[d_cn0_estimation_counter] = d_correlator_outs[1]; //prompt
d_cn0_estimation_counter++;
}
else
{
d_cn0_estimation_counter = 0;
// Code lock indicator
d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
// Carrier lock indicator
d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
// Loss of lock detection
if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
{
d_carrier_lock_fail_counter++;
}
else
{
if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
}
if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
{
std::cout << "Loss of lock in channel " << d_channel << "!" << std::endl;
LOG(INFO) << "Loss of lock in channel " << d_channel << "!";
std::unique_ptr<ControlMessageFactory> cmf(new ControlMessageFactory());
if (d_queue != gr::msg_queue::sptr())
{
d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
}
d_carrier_lock_fail_counter = 0;
d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
}
}
// ########### Output the tracking data to navigation and PVT ##########
current_synchro_data.Prompt_I = static_cast<double>((d_correlator_outs[1]).real());
current_synchro_data.Prompt_Q = static_cast<double>((d_correlator_outs[1]).imag());
// Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!)
current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + old_d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
current_synchro_data.Code_phase_secs = 0;
current_synchro_data.Carrier_phase_rads = GPS_TWO_PI * d_acc_carrier_phase_cycles;
current_synchro_data.Carrier_Doppler_hz = d_carrier_doppler_hz;
current_synchro_data.CN0_dB_hz = d_CN0_SNV_dB_Hz;
current_synchro_data.Flag_valid_pseudorange = false;
current_synchro_data.Flag_valid_symbol_output = true;
*out[0] = current_synchro_data;
}else{
//todo: fill synchronization data to produce output while coherent integration is running
current_synchro_data.Flag_valid_symbol_output = false;
current_synchro_data.Prompt_I = static_cast<double>((d_correlator_outs[1]).real());
current_synchro_data.Prompt_Q = static_cast<double>((d_correlator_outs[1]).imag());
// Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!)
current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
current_synchro_data.Code_phase_secs = 0;
current_synchro_data.Carrier_phase_rads = GPS_TWO_PI * d_acc_carrier_phase_cycles; // todo: project the acc carrier phase
current_synchro_data.Carrier_Doppler_hz = d_carrier_doppler_hz;// todo: project the carrier doppler
current_synchro_data.CN0_dB_hz = d_CN0_SNV_dB_Hz;
current_synchro_data.Flag_valid_pseudorange = false;
*out[0] = current_synchro_data;
}
// ########## DEBUG OUTPUT
/*!
@ -553,6 +669,7 @@ int gps_l1_ca_dll_pll_c_aid_tracking_cc::general_work (int noutput_items, gr_vec
{
LOG(WARNING) << "noutput_items = 0";
}
d_symbol_counter++;
return 1; //output tracking result ALWAYS even in the case of d_enable_tracking==false
}

View File

@ -39,9 +39,11 @@
#include <fstream>
#include <map>
#include <deque>
#include <string>
#include <gnuradio/block.h>
#include <gnuradio/msg_queue.h>
#include <pmt/pmt.h>
#include "concurrent_queue.h"
#include "gnss_synchro.h"
#include "tracking_2nd_DLL_filter.h"
@ -130,6 +132,7 @@ private:
double d_rem_code_phase_samples;
double d_rem_code_phase_chips;
double d_rem_carrier_phase_rad;
int d_rem_code_phase_integer_samples;
// PLL and DLL filter library
Tracking_2nd_DLL_filter d_code_loop_filter;
@ -140,6 +143,8 @@ private:
double d_acq_carrier_doppler_hz;
// tracking vars
float d_dll_bw_hz;
float d_pll_bw_hz;
double d_code_freq_chips;
double d_code_phase_step_chips;
double d_carrier_doppler_hz;
@ -148,6 +153,17 @@ private:
double d_code_phase_samples;
double d_pll_to_dll_assist_secs_Ti;
// symbol history to detect bit transition
std::deque<gr_complex> d_E_history;
std::deque<gr_complex> d_P_history;
std::deque<gr_complex> d_L_history;
long int d_preamble_index;
long int d_symbol_counter;
bool d_enable_20ms_integration;
bool d_preamble_synchronized;
int d_correlation_symbol_counter;
void msg_handler_preamble_index(pmt::pmt_t msg);
//Integration period in samples
int d_correlation_length_samples;

View File

@ -390,12 +390,12 @@ void GNSSFlowgraph::wait()
*/
void GNSSFlowgraph::apply_action(unsigned int who, unsigned int what)
{
DLOG(INFO) << "received " << what << " from " << who;
DLOG(INFO) << "received " << what << " from " << who;
switch (what)
{
case 0:
LOG(INFO) << "Channel " << who << " ACQ FAILED satellite " << channels_.at(who)->get_signal().get_satellite() << ", Signal " << channels_.at(who)->get_signal().get_signal_str();
DLOG(INFO) << "Channel " << who << " ACQ FAILED satellite " << channels_.at(who)->get_signal().get_satellite() << ", Signal " << channels_.at(who)->get_signal().get_signal_str();
available_GNSS_signals_.push_back(channels_.at(who)->get_signal());
//TODO: Optimize the channel and signal matching!
@ -403,9 +403,11 @@ void GNSSFlowgraph::apply_action(unsigned int who, unsigned int what)
{
available_GNSS_signals_.push_back(available_GNSS_signals_.front());
available_GNSS_signals_.pop_front();
std::cout << "loop"<<std::endl;
}
channels_.at(who)->set_signal(available_GNSS_signals_.front());
available_GNSS_signals_.pop_front();
//todo: This is a provisional bug fix to avoid random channel state machine deadlock caused by an incorrect sequence of events
// Correct sequence: start_acquisition() is triggered after the negative acquisition driven by the process_channel_messages() thread inside channel class
// Incorrect sequence: due to thread concurrency, some times start_acquisition is triggered BEFORE the last negative_acquisition notification, thus producing a deadlock
@ -417,7 +419,7 @@ void GNSSFlowgraph::apply_action(unsigned int who, unsigned int what)
// TODO: Tracking messages
case 1:
LOG(INFO) << "Channel " << who << " ACQ SUCCESS satellite " << channels_.at(who)->get_signal().get_satellite();
DLOG(INFO) << "Channel " << who << " ACQ SUCCESS satellite " << channels_.at(who)->get_signal().get_satellite();
channels_state_[who] = 2;
acq_channels_count_--;
if (!available_GNSS_signals_.empty() && acq_channels_count_ < max_acq_channels_)
@ -445,7 +447,7 @@ void GNSSFlowgraph::apply_action(unsigned int who, unsigned int what)
break;
case 2:
LOG(INFO) << "Channel " << who << " TRK FAILED satellite " << channels_.at(who)->get_signal().get_satellite();
DLOG(INFO) << "Channel " << who << " TRK FAILED satellite " << channels_.at(who)->get_signal().get_satellite();
if (acq_channels_count_ < max_acq_channels_)
{
channels_state_[who] = 1;
@ -468,7 +470,7 @@ void GNSSFlowgraph::apply_action(unsigned int who, unsigned int what)
default:
break;
}
DLOG(INFO) << "Number of available signals: " << available_GNSS_signals_.size();
DLOG(INFO) << "Number of available signals: " << available_GNSS_signals_.size();
}

View File

@ -75,6 +75,7 @@ const int GPS_L1_CA_HISTORY_DEEP = 100;
#define GPS_PREAMBLE {1, 0, 0, 0, 1, 0, 1, 1}
const int GPS_CA_PREAMBLE_LENGTH_BITS = 8;
const int GPS_CA_TELEMETRY_RATE_BITS_SECOND = 50; //!< NAV message bit rate [bits/s]
const int GPS_CA_TELEMETRY_SYMBOLS_PER_BIT = 20;
const int GPS_CA_TELEMETRY_RATE_SYMBOLS_SECOND = GPS_CA_TELEMETRY_RATE_BITS_SECOND*20; //!< NAV message bit rate [symbols/s]
const int GPS_WORD_LENGTH = 4; //!< CRC + GPS WORD (-2 -1 0 ... 29) Bits = 4 bytes
const int GPS_SUBFRAME_LENGTH = 40; //!< GPS_WORD_LENGTH x 10 = 40 bytes

View File

@ -60,6 +60,8 @@ public:
double Code_phase_secs; //!< Set by Tracking processing block
double Tracking_timestamp_secs; //!< Set by Tracking processing block
bool Flag_valid_tracking;
bool Flag_valid_symbol_output;
bool symbol_integration_enabled; //!< Set by Tracking processing block
//Telemetry Decoder
double Prn_timestamp_ms; //!< Set by Telemetry Decoder processing block