mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-12-14 20:20:35 +00:00
Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into glonass
This commit is contained in:
commit
00ba4ff96e
@ -55,7 +55,6 @@ void galileo_e1_code_gen_int(int* _dest, char _Signal[3], signed int _prn)
|
||||
hex_to_binary_converter(&_dest[index], Galileo_E1_B_PRIMARY_CODE[prn].at(i));
|
||||
index = index + 4;
|
||||
}
|
||||
|
||||
}
|
||||
else if (_galileo_signal.rfind("1C") != std::string::npos && _galileo_signal.length() >= 2)
|
||||
{
|
||||
@ -72,8 +71,7 @@ void galileo_e1_code_gen_int(int* _dest, char _Signal[3], signed int _prn)
|
||||
}
|
||||
|
||||
|
||||
|
||||
void galileo_e1_sinboc_11_gen(std::complex<float>* _dest, int* _prn, unsigned int _length_out)
|
||||
void galileo_e1_sinboc_11_gen_int(int* _dest, int* _prn, unsigned int _length_out)
|
||||
{
|
||||
const unsigned int _length_in = Galileo_E1_B_CODE_LENGTH_CHIPS;
|
||||
unsigned int _period = static_cast<unsigned int>( _length_out / _length_in );
|
||||
@ -81,18 +79,17 @@ void galileo_e1_sinboc_11_gen(std::complex<float>* _dest, int* _prn, unsigned in
|
||||
{
|
||||
for (unsigned int j = 0; j < (_period / 2); j++)
|
||||
{
|
||||
_dest[i * _period + j] = std::complex<float>(static_cast<float>(_prn[i]), 0.0);
|
||||
_dest[i * _period + j] = _prn[i];
|
||||
}
|
||||
for (unsigned int j = (_period / 2); j < _period; j++)
|
||||
{
|
||||
_dest[i * _period + j] = std::complex<float>(static_cast<float>(- _prn[i]), 0.0);
|
||||
_dest[i * _period + j] = - _prn[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void galileo_e1_sinboc_61_gen(std::complex<float>* _dest, int* _prn, unsigned int _length_out)
|
||||
void galileo_e1_sinboc_61_gen_int(int* _dest, int* _prn, unsigned int _length_out)
|
||||
{
|
||||
const unsigned int _length_in = Galileo_E1_B_CODE_LENGTH_CHIPS;
|
||||
unsigned int _period = static_cast<unsigned int>(_length_out / _length_in);
|
||||
@ -101,42 +98,43 @@ void galileo_e1_sinboc_61_gen(std::complex<float>* _dest, int* _prn, unsigned in
|
||||
{
|
||||
for (unsigned int j = 0; j < _period; j += 2)
|
||||
{
|
||||
_dest[i * _period + j] = std::complex<float>(static_cast<float>(_prn[i]), 0.0);
|
||||
_dest[i * _period + j] = _prn[i];
|
||||
}
|
||||
for (unsigned int j = 1; j < _period; j += 2)
|
||||
{
|
||||
_dest[i * _period + j] = std::complex<float>(static_cast<float>(- _prn[i]), 0.0);
|
||||
_dest[i * _period + j] = - _prn[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void galileo_e1_gen(std::complex<float>* _dest, int* _prn, char _Signal[3])
|
||||
void galileo_e1_gen_float(float* _dest, int* _prn, char _Signal[3])
|
||||
{
|
||||
std::string _galileo_signal = _Signal;
|
||||
const unsigned int _codeLength = 12 * Galileo_E1_B_CODE_LENGTH_CHIPS;
|
||||
const float alpha = sqrt(10.0 / 11.0);
|
||||
const float beta = sqrt(1.0 / 11.0);
|
||||
|
||||
std::complex<float> sinboc_11[12 * 4092]; // _codeLength not accepted by Clang
|
||||
std::complex<float> sinboc_61[12 * 4092];
|
||||
int sinboc_11[12 * 4092]; // _codeLength not accepted by Clang
|
||||
int sinboc_61[12 * 4092];
|
||||
|
||||
galileo_e1_sinboc_11_gen(sinboc_11, _prn, _codeLength); //generate sinboc(1,1) 12 samples per chip
|
||||
galileo_e1_sinboc_61_gen(sinboc_61, _prn, _codeLength); //generate sinboc(6,1) 12 samples per chip
|
||||
galileo_e1_sinboc_11_gen_int(sinboc_11, _prn, _codeLength); //generate sinboc(1,1) 12 samples per chip
|
||||
galileo_e1_sinboc_61_gen_int(sinboc_61, _prn, _codeLength); //generate sinboc(6,1) 12 samples per chip
|
||||
|
||||
if (_galileo_signal.rfind("1B") != std::string::npos && _galileo_signal.length() >= 2)
|
||||
{
|
||||
for (unsigned int i = 0; i < _codeLength; i++)
|
||||
{
|
||||
_dest[i] = alpha * sinboc_11[i] + beta * sinboc_61[i];
|
||||
_dest[i] = alpha * static_cast<float>(sinboc_11[i]) +
|
||||
beta * static_cast<float>(sinboc_61[i]);
|
||||
}
|
||||
}
|
||||
else if (_galileo_signal.rfind("1C") != std::string::npos && _galileo_signal.length() >= 2)
|
||||
{
|
||||
for (unsigned int i = 0; i < _codeLength; i++)
|
||||
{
|
||||
_dest[i] = alpha * sinboc_11[i] - beta * sinboc_61[i];
|
||||
_dest[i] = alpha * static_cast<float>(sinboc_11[i]) -
|
||||
beta * static_cast<float>(sinboc_61[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -144,8 +142,7 @@ void galileo_e1_gen(std::complex<float>* _dest, int* _prn, char _Signal[3])
|
||||
}
|
||||
|
||||
|
||||
|
||||
void galileo_e1_code_gen_complex_sampled(std::complex<float>* _dest, char _Signal[3],
|
||||
void galileo_e1_code_gen_float_sampled(float* _dest, char _Signal[3],
|
||||
bool _cboc, unsigned int _prn, signed int _fs, unsigned int _chip_shift,
|
||||
bool _secondary_flag)
|
||||
{
|
||||
@ -164,23 +161,29 @@ void galileo_e1_code_gen_complex_sampled(std::complex<float>* _dest, char _Signa
|
||||
|
||||
galileo_e1_code_gen_int(primary_code_E1_chips, _Signal, _prn); //generate Galileo E1 code, 1 sample per chip
|
||||
|
||||
std::complex<float>* _signal_E1;
|
||||
float* _signal_E1;
|
||||
|
||||
_codeLength = _samplesPerChip * Galileo_E1_B_CODE_LENGTH_CHIPS;
|
||||
_signal_E1 = new std::complex<float>[_codeLength];
|
||||
_signal_E1 = new float[_codeLength];
|
||||
|
||||
if (_cboc == true)
|
||||
{
|
||||
galileo_e1_gen(_signal_E1, primary_code_E1_chips, _Signal); //generate cboc 12 samples per chip
|
||||
galileo_e1_gen_float(_signal_E1, primary_code_E1_chips, _Signal); //generate cboc 12 samples per chip
|
||||
}
|
||||
else
|
||||
{
|
||||
galileo_e1_sinboc_11_gen(_signal_E1, primary_code_E1_chips, _codeLength); //generate sinboc(1,1) 2 samples per chip
|
||||
int _signal_E1_int[_codeLength];
|
||||
galileo_e1_sinboc_11_gen_int(_signal_E1_int, primary_code_E1_chips, _codeLength); //generate sinboc(1,1) 2 samples per chip
|
||||
|
||||
for( unsigned int ii = 0; ii < _codeLength; ++ii )
|
||||
{
|
||||
_signal_E1[ii] = static_cast< float >( _signal_E1_int[ii] );
|
||||
}
|
||||
}
|
||||
|
||||
if (_fs != _samplesPerChip * _codeFreqBasis)
|
||||
{
|
||||
std::complex<float>* _resampled_signal = new std::complex<float>[_samplesPerCode];
|
||||
float* _resampled_signal = new float[_samplesPerCode];
|
||||
resampler(_signal_E1, _resampled_signal, _samplesPerChip * _codeFreqBasis, _fs,
|
||||
_codeLength, _samplesPerCode); //resamples code to fs
|
||||
|
||||
@ -188,21 +191,16 @@ void galileo_e1_code_gen_complex_sampled(std::complex<float>* _dest, char _Signa
|
||||
_signal_E1 = _resampled_signal;
|
||||
}
|
||||
|
||||
|
||||
if (_galileo_signal.rfind("1C") != std::string::npos && _galileo_signal.length() >= 2 && _secondary_flag)
|
||||
{
|
||||
|
||||
std::complex<float>* _signal_E1C_secondary = new std::complex<float>
|
||||
[static_cast<int>(Galileo_E1_C_SECONDARY_CODE_LENGTH)
|
||||
* _samplesPerCode];
|
||||
float* _signal_E1C_secondary = new float[static_cast<int>(Galileo_E1_C_SECONDARY_CODE_LENGTH) * _samplesPerCode];
|
||||
|
||||
for (unsigned int i = 0; i < static_cast<unsigned int>(Galileo_E1_C_SECONDARY_CODE_LENGTH); i++)
|
||||
{
|
||||
for (unsigned k = 0; k < _samplesPerCode; k++)
|
||||
{
|
||||
_signal_E1C_secondary[i*_samplesPerCode + k] = _signal_E1[k]
|
||||
* (Galileo_E1_C_SECONDARY_CODE.at(i) == '0'
|
||||
? std::complex<float>(1,0) : std::complex<float>(-1,0));
|
||||
* (Galileo_E1_C_SECONDARY_CODE.at(i) == '0' ? 1.0f : -1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
@ -221,6 +219,38 @@ void galileo_e1_code_gen_complex_sampled(std::complex<float>* _dest, char _Signa
|
||||
}
|
||||
|
||||
|
||||
void galileo_e1_code_gen_complex_sampled(std::complex<float>* _dest, char _Signal[3],
|
||||
bool _cboc, unsigned int _prn, signed int _fs, unsigned int _chip_shift,
|
||||
bool _secondary_flag)
|
||||
{
|
||||
std::string _galileo_signal = _Signal;
|
||||
const int _codeFreqBasis = Galileo_E1_CODE_CHIP_RATE_HZ; //Hz
|
||||
unsigned int _samplesPerCode = static_cast<unsigned int>( static_cast<double>(_fs) /
|
||||
(static_cast<double>(_codeFreqBasis ) / static_cast<double>(Galileo_E1_B_CODE_LENGTH_CHIPS)));
|
||||
|
||||
if (_galileo_signal.rfind("1C") != std::string::npos && _galileo_signal.length() >= 2 && _secondary_flag)
|
||||
{
|
||||
_samplesPerCode *= static_cast<int>(Galileo_E1_C_SECONDARY_CODE_LENGTH);
|
||||
}
|
||||
|
||||
float real_code[_samplesPerCode];
|
||||
|
||||
galileo_e1_code_gen_float_sampled( real_code, _Signal, _cboc, _prn, _fs, _chip_shift, _secondary_flag );
|
||||
|
||||
for( unsigned int ii = 0; ii < _samplesPerCode; ++ii )
|
||||
{
|
||||
_dest[ii] = std::complex< float >( real_code[ii], 0.0f );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void galileo_e1_code_gen_float_sampled(float* _dest, char _Signal[3],
|
||||
bool _cboc, unsigned int _prn, signed int _fs, unsigned int _chip_shift)
|
||||
{
|
||||
galileo_e1_code_gen_float_sampled(_dest, _Signal, _cboc, _prn, _fs, _chip_shift, false);
|
||||
}
|
||||
|
||||
|
||||
void galileo_e1_code_gen_complex_sampled(std::complex<float>* _dest, char _Signal[3],
|
||||
bool _cboc, unsigned int _prn, signed int _fs, unsigned int _chip_shift)
|
||||
{
|
||||
|
@ -36,30 +36,22 @@
|
||||
|
||||
|
||||
/*!
|
||||
* \brief This function generates Galileo E1 code (one sample per chip).
|
||||
* \brief This function generates Galileo E1 code (can select E1B or E1C, cboc or sinboc
|
||||
* and the sample frequency _fs).
|
||||
*
|
||||
*/
|
||||
void galileo_e1_code_gen_int(int* _dest, char _Signal[3], signed int _prn);
|
||||
void galileo_e1_code_gen_float_sampled(float* _dest, char _Signal[3],
|
||||
bool _cboc, unsigned int _prn, signed int _fs, unsigned int _chip_shift,
|
||||
bool _secondary_flag);
|
||||
|
||||
/*!
|
||||
* \brief This function generates Galileo E1 sinboc(1,1) code (minimum 2 samples per chip),
|
||||
* the _codeLength variable must be a multiple of 2*4092.
|
||||
* \brief This function generates Galileo E1 code (can select E1B or E1C, cboc or sinboc
|
||||
* and the sample frequency _fs).
|
||||
*
|
||||
*/
|
||||
void galileo_e1_sinboc_11_gen(std::complex<float>* _dest, int* _prn,
|
||||
unsigned int _codeLength);
|
||||
/*!
|
||||
* \brief This function generates Galileo E1 sinboc(6,1) code (minimum 12 samples per chip),
|
||||
* the _codeLength variable must be a multiple of 12*4092.
|
||||
*
|
||||
*/
|
||||
void galileo_e1_sinboc_61_gen(std::complex<float>* _dest, int* _prn,
|
||||
unsigned int _codeLength);
|
||||
/*!
|
||||
* \brief This function generates Galileo E1 cboc code (12 samples per chip).
|
||||
*
|
||||
*/
|
||||
void galileo_e1_cboc_gen(std::complex<float>* _dest, int* _prn, char _Signal[3]);
|
||||
void galileo_e1_code_gen_float_sampled(float* _dest, char _Signal[3],
|
||||
bool _cboc, unsigned int _prn, signed int _fs, unsigned int _chip_shift);
|
||||
|
||||
/*!
|
||||
* \brief This function generates Galileo E1 code (can select E1B or E1C, cboc or sinboc
|
||||
* and the sample frequency _fs).
|
||||
|
@ -156,6 +156,28 @@ void hex_to_binary_converter(int * _dest, char _from)
|
||||
}
|
||||
}
|
||||
|
||||
void resampler(float* _from, float* _dest, float _fs_in,
|
||||
float _fs_out, unsigned int _length_in, unsigned int _length_out)
|
||||
{
|
||||
unsigned int _codeValueIndex;
|
||||
float aux;
|
||||
//--- Find time constants --------------------------------------------------
|
||||
const float _t_in = 1 / _fs_in; // Incoming sampling period in sec
|
||||
const float _t_out = 1 / _fs_out; // Out sampling period in sec
|
||||
for (unsigned int i = 0; i < _length_out - 1; i++)
|
||||
{
|
||||
//=== Digitizing =======================================================
|
||||
//--- compute index array to read sampled values -------------------------
|
||||
//_codeValueIndex = ceil((_t_out * ((float)i + 1)) / _t_in) - 1;
|
||||
aux = (_t_out * (i + 1)) / _t_in;
|
||||
_codeValueIndex = auxCeil2(aux) - 1;
|
||||
|
||||
//if repeat the chip -> upsample by nearest neighborhood interpolation
|
||||
_dest[i] = _from[_codeValueIndex];
|
||||
}
|
||||
//--- Correct the last index (due to number rounding issues) -----------
|
||||
_dest[_length_out - 1] = _from[_length_in - 1];
|
||||
}
|
||||
|
||||
void resampler(std::complex<float>* _from, std::complex<float>* _dest, float _fs_in,
|
||||
float _fs_out, unsigned int _length_in, unsigned int _length_out)
|
||||
|
@ -60,6 +60,13 @@ void complex_exp_gen_conj(std::complex<float>* _dest, double _f, double _fs,
|
||||
*/
|
||||
void hex_to_binary_converter(int * _dest, char _from);
|
||||
|
||||
/*!
|
||||
* \brief This function resamples a sequence of float values.
|
||||
*
|
||||
*/
|
||||
void resampler(float* _from, float* _dest,
|
||||
float _fs_in, float _fs_out, unsigned int _length_in,
|
||||
unsigned int _length_out);
|
||||
/*!
|
||||
* \brief This function resamples a sequence of complex values.
|
||||
*
|
||||
|
@ -34,7 +34,7 @@
|
||||
|
||||
auto auxCeil = [](float x){ return static_cast<int>(static_cast<long>((x)+1)); };
|
||||
|
||||
void gps_l1_ca_code_gen_complex(std::complex<float>* _dest, signed int _prn, unsigned int _chip_shift)
|
||||
void gps_l1_ca_code_gen_int(int* _dest, signed int _prn, unsigned int _chip_shift)
|
||||
{
|
||||
const unsigned int _code_length = 1023;
|
||||
bool G1[_code_length];
|
||||
@ -102,11 +102,11 @@ void gps_l1_ca_code_gen_complex(std::complex<float>* _dest, signed int _prn, uns
|
||||
aux = G1[(lcv + _chip_shift) % _code_length]^G2[delay];
|
||||
if(aux == true)
|
||||
{
|
||||
_dest[lcv] = std::complex<float>(1, 0);
|
||||
_dest[lcv] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
_dest[lcv] = std::complex<float>(-1, 0);
|
||||
_dest[lcv] = -1;
|
||||
}
|
||||
delay++;
|
||||
delay %= _code_length;
|
||||
@ -114,6 +114,33 @@ void gps_l1_ca_code_gen_complex(std::complex<float>* _dest, signed int _prn, uns
|
||||
}
|
||||
|
||||
|
||||
void gps_l1_ca_code_gen_float(float* _dest, signed int _prn, unsigned int _chip_shift)
|
||||
{
|
||||
unsigned int _code_length = 1023;
|
||||
int ca_code_int[ _code_length ];
|
||||
|
||||
gps_l1_ca_code_gen_int( ca_code_int, _prn, _chip_shift );
|
||||
|
||||
for( unsigned int ii = 0; ii < _code_length; ++ii )
|
||||
{
|
||||
_dest[ii] = static_cast<float>( ca_code_int[ii] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void gps_l1_ca_code_gen_complex(std::complex<float>* _dest, signed int _prn, unsigned int _chip_shift)
|
||||
{
|
||||
unsigned int _code_length = 1023;
|
||||
int ca_code_int[ _code_length ];
|
||||
|
||||
gps_l1_ca_code_gen_int( ca_code_int, _prn, _chip_shift );
|
||||
|
||||
for( unsigned int ii = 0; ii < _code_length; ++ii )
|
||||
{
|
||||
_dest[ii] = std::complex<float>( static_cast<float>(ca_code_int[ii]), 0.0f );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Generates complex GPS L1 C/A code for the desired SV ID and sampled to specific sampling frequency
|
||||
|
@ -35,6 +35,12 @@
|
||||
|
||||
#include <complex>
|
||||
|
||||
//!Generates int GPS L1 C/A code for the desired SV ID and code shift
|
||||
void gps_l1_ca_code_gen_int(int* _dest, signed int _prn, unsigned int _chip_shift);
|
||||
|
||||
//!Generates float GPS L1 C/A code for the desired SV ID and code shift
|
||||
void gps_l1_ca_code_gen_float(float* _dest, signed int _prn, unsigned int _chip_shift);
|
||||
|
||||
//!Generates complex GPS L1 C/A code for the desired SV ID and code shift, and sampled to specific sampling frequency
|
||||
void gps_l1_ca_code_gen_complex(std::complex<float>* _dest, signed int _prn, unsigned int _chip_shift);
|
||||
|
||||
|
@ -61,6 +61,14 @@ _mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2){
|
||||
return _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
|
||||
}
|
||||
|
||||
static inline __m256 _mm256_complexnormalise_ps( __m256 z ){
|
||||
__m256 tmp1 = _mm256_mul_ps(z, z);
|
||||
__m256 tmp2 = _mm256_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
tmp2 = _mm256_sqrt_ps(tmp1);
|
||||
return _mm256_div_ps(z, tmp2);
|
||||
}
|
||||
|
||||
static inline __m256
|
||||
_mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2){
|
||||
return _mm256_sqrt_ps(_mm256_magnitudesquared_ps(cplxValue1, cplxValue2));
|
||||
|
@ -0,0 +1,282 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16i_resamplerxnpuppet_16i.h
|
||||
* \brief VOLK_GNSSSDR puppet for the multiple 16-bit vector resampler kernel.
|
||||
* \authors <ul>
|
||||
* <li> Cillian O'Driscoll 2017 cillian.odriscoll at gmail dot com
|
||||
* </ul>
|
||||
*
|
||||
* VOLK_GNSSSDR puppet for integrating the multiple resampler into the test system
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2017 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16i_resamplerxnpuppet_16i_H
|
||||
#define INCLUDED_volk_gnsssdr_16i_resamplerxnpuppet_16i_H
|
||||
|
||||
#include "volk_gnsssdr/volk_gnsssdr_16i_xn_resampler_16i_xn.h"
|
||||
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_generic(int16_t* result, const int16_t* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
unsigned int n;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
int16_t** result_aux = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_16i_xn_resampler_16i_xn_generic(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_a_sse3(int16_t* result, const int16_t* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
int16_t** result_aux = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_u_sse3(int16_t* result, const int16_t* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
int16_t** result_aux = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_u_sse4_1(int16_t* result, const int16_t* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
int16_t** result_aux = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse4_1(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_a_sse4_1(int16_t* result, const int16_t* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
int16_t** result_aux = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse4_1(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_u_avx(int16_t* result, const int16_t* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
int16_t** result_aux = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_16i_xn_resampler_16i_xn_u_avx(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_a_avx(int16_t* result, const int16_t* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
int16_t** result_aux = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_16i_xn_resampler_16i_xn_a_avx(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_neon(int16_t* result, const int16_t* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
int16_t** result_aux = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_16i_xn_resampler_16i_xn_neon(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((int16_t*)result, (int16_t*)result_aux[0], sizeof(int16_t) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_16i_resamplerpuppet_16i_H
|
||||
|
@ -0,0 +1,608 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16i_xn_resampler_16i_xn.h
|
||||
* \brief VOLK_GNSSSDR kernel: Resamples N 16 bits integer short vectors using zero hold resample algorithm.
|
||||
* \authors <ul>
|
||||
* <li> Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* VOLK_GNSSSDR kernel that resamples N 16 bits integer short complex vectors using zero hold resample algorithm.
|
||||
* It resamples a single GNSS local code signal replica into N vectors fractional-resampled and fractional-delayed
|
||||
* (i.e. it creates the Early, Prompt, and Late code replicas)
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2017 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_16i_xn_resampler_16i_xn
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Resamples a complex vector (16-bit integer each component), providing \p num_out_vectors outputs.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_16i_xn_resampler_16i_xn(int16_t** result, const int16_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li local_code: Vector to be resampled.
|
||||
* \li rem_code_phase_chips: Remnant code phase [chips].
|
||||
* \li code_phase_step_chips: Phase increment per sample [chips/sample].
|
||||
* \li shifts_chips: Vector of floats that defines the spacing (in chips) between the replicas of \p local_code
|
||||
* \li code_length_chips: Code length in chips.
|
||||
* \li num_out_vectors: Number of output vectors.
|
||||
* \li num_points: The number of data values to be in the resampled vector.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li result: Pointer to a vector of pointers where the results will be stored.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16i_xn_resampler_16i_xn_H
|
||||
#define INCLUDED_volk_gnsssdr_16i_xn_resampler_16i_xn_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_generic(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
int local_code_chip_index;
|
||||
int current_correlator_tap;
|
||||
int n;
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
for (n = 0; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index < 0) local_code_chip_index += (int)code_length_chips * (abs(local_code_chip_index) / code_length_chips + 1);
|
||||
local_code_chip_index = local_code_chip_index % code_length_chips;
|
||||
result[current_correlator_tap][n] = local_code[local_code_chip_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse4_1(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
int16_t** _result = result;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m128 fours = _mm_set1_ps(4.0f);
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
|
||||
const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
|
||||
__m128i local_code_chip_index_reg, aux_i, negatives, i;
|
||||
__m128 aux, aux2, shifts_chips_reg, c, cTrunc, base;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
__m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
|
||||
for(n = 0; n < quarterPoints; n++)
|
||||
{
|
||||
aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm_add_ps(aux, aux2);
|
||||
// floor
|
||||
aux = _mm_floor_ps(aux);
|
||||
|
||||
// fmod
|
||||
c = _mm_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm_cvttps_epi32(c);
|
||||
cTrunc = _mm_cvtepi32_ps(i);
|
||||
base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
|
||||
|
||||
negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
|
||||
aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
|
||||
local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
|
||||
_mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 4; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm_add_ps(indexn, fours);
|
||||
}
|
||||
for(n = quarterPoints * 4; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse4_1(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
int16_t** _result = result;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m128 fours = _mm_set1_ps(4.0f);
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
|
||||
const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
|
||||
__m128i local_code_chip_index_reg, aux_i, negatives, i;
|
||||
__m128 aux, aux2, shifts_chips_reg, c, cTrunc, base;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
__m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
|
||||
for(n = 0; n < quarterPoints; n++)
|
||||
{
|
||||
aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm_add_ps(aux, aux2);
|
||||
// floor
|
||||
aux = _mm_floor_ps(aux);
|
||||
|
||||
// fmod
|
||||
c = _mm_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm_cvttps_epi32(c);
|
||||
cTrunc = _mm_cvtepi32_ps(i);
|
||||
base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
|
||||
|
||||
negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
|
||||
aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
|
||||
local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
|
||||
_mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 4; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm_add_ps(indexn, fours);
|
||||
}
|
||||
for(n = quarterPoints * 4; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse3(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
int16_t** _result = result;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m128 ones = _mm_set1_ps(1.0f);
|
||||
const __m128 fours = _mm_set1_ps(4.0f);
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
|
||||
const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
|
||||
__m128i local_code_chip_index_reg, aux_i, negatives, i;
|
||||
__m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
__m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
|
||||
for(n = 0; n < quarterPoints; n++)
|
||||
{
|
||||
aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm_add_ps(aux, aux2);
|
||||
// floor
|
||||
i = _mm_cvttps_epi32(aux);
|
||||
fi = _mm_cvtepi32_ps(i);
|
||||
igx = _mm_cmpgt_ps(fi, aux);
|
||||
j = _mm_and_ps(igx, ones);
|
||||
aux = _mm_sub_ps(fi, j);
|
||||
// fmod
|
||||
c = _mm_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm_cvttps_epi32(c);
|
||||
cTrunc = _mm_cvtepi32_ps(i);
|
||||
base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
|
||||
|
||||
negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
|
||||
aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
|
||||
local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
|
||||
_mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 4; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm_add_ps(indexn, fours);
|
||||
}
|
||||
for(n = quarterPoints * 4; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse3(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
int16_t** _result = result;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m128 ones = _mm_set1_ps(1.0f);
|
||||
const __m128 fours = _mm_set1_ps(4.0f);
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
|
||||
const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
|
||||
__m128i local_code_chip_index_reg, aux_i, negatives, i;
|
||||
__m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
__m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
|
||||
for(n = 0; n < quarterPoints; n++)
|
||||
{
|
||||
aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm_add_ps(aux, aux2);
|
||||
// floor
|
||||
i = _mm_cvttps_epi32(aux);
|
||||
fi = _mm_cvtepi32_ps(i);
|
||||
igx = _mm_cmpgt_ps(fi, aux);
|
||||
j = _mm_and_ps(igx, ones);
|
||||
aux = _mm_sub_ps(fi, j);
|
||||
// fmod
|
||||
c = _mm_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm_cvttps_epi32(c);
|
||||
cTrunc = _mm_cvtepi32_ps(i);
|
||||
base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
|
||||
|
||||
negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
|
||||
aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
|
||||
local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
|
||||
_mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 4; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm_add_ps(indexn, fours);
|
||||
}
|
||||
for(n = quarterPoints * 4; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_avx(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
int16_t** _result = result;
|
||||
const unsigned int avx_iters = num_points / 8;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m256 eights = _mm256_set1_ps(8.0f);
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
const __m256 code_length_chips_reg_f = _mm256_set1_ps((float)code_length_chips);
|
||||
const __m256 n0 = _mm256_set_ps(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
|
||||
|
||||
__m256i local_code_chip_index_reg, i;
|
||||
__m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
indexn = n0;
|
||||
for(n = 0; n < avx_iters; n++)
|
||||
{
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][8 * n + 7], 1, 0);
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&local_code_chip_index[8], 1, 3);
|
||||
aux = _mm256_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm256_add_ps(aux, aux2);
|
||||
// floor
|
||||
aux = _mm256_floor_ps(aux);
|
||||
|
||||
// fmod
|
||||
c = _mm256_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm256_cvttps_epi32(c);
|
||||
cTrunc = _mm256_cvtepi32_ps(i);
|
||||
base = _mm256_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm256_cvttps_epi32(_mm256_sub_ps(aux, base));
|
||||
|
||||
// no negatives
|
||||
c = _mm256_cvtepi32_ps(local_code_chip_index_reg);
|
||||
negatives = _mm256_cmp_ps(c, zeros, 0x01 );
|
||||
aux3 = _mm256_and_ps(code_length_chips_reg_f, negatives);
|
||||
aux = _mm256_add_ps(c, aux3);
|
||||
local_code_chip_index_reg = _mm256_cvttps_epi32(aux);
|
||||
|
||||
_mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 8; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm256_add_ps(indexn, eights);
|
||||
}
|
||||
}
|
||||
_mm256_zeroupper();
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
for(n = avx_iters * 8; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_avx(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
int16_t** _result = result;
|
||||
const unsigned int avx_iters = num_points / 8;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m256 eights = _mm256_set1_ps(8.0f);
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
const __m256 code_length_chips_reg_f = _mm256_set1_ps((float)code_length_chips);
|
||||
const __m256 n0 = _mm256_set_ps(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
|
||||
|
||||
__m256i local_code_chip_index_reg, i;
|
||||
__m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
indexn = n0;
|
||||
for(n = 0; n < avx_iters; n++)
|
||||
{
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][8 * n + 7], 1, 0);
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&local_code_chip_index[8], 1, 3);
|
||||
aux = _mm256_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm256_add_ps(aux, aux2);
|
||||
// floor
|
||||
aux = _mm256_floor_ps(aux);
|
||||
|
||||
// fmod
|
||||
c = _mm256_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm256_cvttps_epi32(c);
|
||||
cTrunc = _mm256_cvtepi32_ps(i);
|
||||
base = _mm256_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm256_cvttps_epi32(_mm256_sub_ps(aux, base));
|
||||
|
||||
// no negatives
|
||||
c = _mm256_cvtepi32_ps(local_code_chip_index_reg);
|
||||
negatives = _mm256_cmp_ps(c, zeros, 0x01 );
|
||||
aux3 = _mm256_and_ps(code_length_chips_reg_f, negatives);
|
||||
aux = _mm256_add_ps(c, aux3);
|
||||
local_code_chip_index_reg = _mm256_cvttps_epi32(aux);
|
||||
|
||||
_mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 8; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm256_add_ps(indexn, eights);
|
||||
}
|
||||
}
|
||||
_mm256_zeroupper();
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
for(n = avx_iters * 8; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_neon(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
int16_t** _result = result;
|
||||
const unsigned int neon_iters = num_points / 4;
|
||||
const int32x4_t ones = vdupq_n_s32(1);
|
||||
const float32x4_t fours = vdupq_n_f32(4.0f);
|
||||
const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
|
||||
const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
|
||||
int32_t local_code_chip_index_;
|
||||
|
||||
const int32x4_t zeros = vdupq_n_s32(0);
|
||||
const float32x4_t code_length_chips_reg_f = vdupq_n_f32((float)code_length_chips);
|
||||
const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
|
||||
int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
|
||||
float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
|
||||
__VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
uint32x4_t igx;
|
||||
reciprocal = vrecpeq_f32(code_length_chips_reg_f);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal); // this refinement is required!
|
||||
float32x4_t n0 = vld1q_f32((float*)vec);
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = vdupq_n_f32((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = vsubq_f32(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
indexn = n0;
|
||||
for(n = 0; n < neon_iters; n++)
|
||||
{
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][4 * n + 3], 1, 0);
|
||||
__VOLK_GNSSSDR_PREFETCH(&local_code_chip_index[4]);
|
||||
aux = vmulq_f32(code_phase_step_chips_reg, indexn);
|
||||
aux = vaddq_f32(aux, aux2);
|
||||
|
||||
//floor
|
||||
i = vcvtq_s32_f32(aux);
|
||||
fi = vcvtq_f32_s32(i);
|
||||
igx = vcgtq_f32(fi, aux);
|
||||
j = vcvtq_f32_s32(vandq_s32(vreinterpretq_s32_u32(igx), ones));
|
||||
aux = vsubq_f32(fi, j);
|
||||
|
||||
// fmod
|
||||
c = vmulq_f32(aux, reciprocal);
|
||||
i = vcvtq_s32_f32(c);
|
||||
cTrunc = vcvtq_f32_s32(i);
|
||||
base = vmulq_f32(cTrunc, code_length_chips_reg_f);
|
||||
aux = vsubq_f32(aux, base);
|
||||
local_code_chip_index_reg = vcvtq_s32_f32(aux);
|
||||
|
||||
negatives = vreinterpretq_s32_u32(vcltq_s32(local_code_chip_index_reg, zeros));
|
||||
aux_i = vandq_s32(code_length_chips_reg_i, negatives);
|
||||
local_code_chip_index_reg = vaddq_s32(local_code_chip_index_reg, aux_i);
|
||||
|
||||
vst1q_s32((int32_t*)local_code_chip_index, local_code_chip_index_reg);
|
||||
|
||||
for(k = 0; k < 4; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = vaddq_f32(indexn, fours);
|
||||
}
|
||||
for(n = neon_iters * 4; n < num_points; n++)
|
||||
{
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][n], 1, 0);
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_16i_xn_resampler_16i_xn_H*/
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,384 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic.h
|
||||
* \brief Volk puppet for the multiple 16-bit complex dot product kernel.
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
|
||||
* </ul>
|
||||
*
|
||||
* Volk puppet for integrating the resampler into volk's test system
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_H
|
||||
|
||||
#include "volk_gnsssdr/volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn.h"
|
||||
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
unsigned int n;
|
||||
int num_a_vectors = 3;
|
||||
int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
}
|
||||
volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_generic(result, local_code, phase_inc[0], phase,(const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // Generic
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_generic_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
unsigned int n;
|
||||
int num_a_vectors = 3;
|
||||
int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
}
|
||||
volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_generic_reload(result, local_code, phase_inc[0], phase,(const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // Generic
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_a_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
unsigned int n;
|
||||
int num_a_vectors = 3;
|
||||
int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
}
|
||||
|
||||
volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_sse3(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // SSE3
|
||||
|
||||
|
||||
//#ifdef LV_HAVE_SSE3
|
||||
//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_a_sse3_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
//{
|
||||
//// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
//float rem_carrier_phase_in_rad = 0.345;
|
||||
//float phase_step_rad = 0.1;
|
||||
//lv_32fc_t phase[1];
|
||||
//phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
//lv_32fc_t phase_inc[1];
|
||||
//phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
//unsigned int n;
|
||||
//int num_a_vectors = 3;
|
||||
//int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
//for(n = 0; n < num_a_vectors; n++)
|
||||
//{
|
||||
//in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
//memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
//}
|
||||
|
||||
//volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_sse3_reload(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
//for(n = 0; n < num_a_vectors; n++)
|
||||
//{
|
||||
//volk_gnsssdr_free(in_a[n]);
|
||||
//}
|
||||
//volk_gnsssdr_free(in_a);
|
||||
//}
|
||||
|
||||
//#endif // SSE3
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_u_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
unsigned int n;
|
||||
int num_a_vectors = 3;
|
||||
int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
}
|
||||
|
||||
volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_sse3(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // SSE3
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX2
|
||||
static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_a_avx2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
unsigned int n;
|
||||
int num_a_vectors = 3;
|
||||
int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
}
|
||||
|
||||
volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
|
||||
//#ifdef LV_HAVE_AVX2
|
||||
//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_a_avx2_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
//{
|
||||
//// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
//float rem_carrier_phase_in_rad = 0.345;
|
||||
//float phase_step_rad = 0.1;
|
||||
//lv_32fc_t phase[1];
|
||||
//phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
//lv_32fc_t phase_inc[1];
|
||||
//phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
//unsigned int n;
|
||||
//int num_a_vectors = 3;
|
||||
//int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
//for(n = 0; n < num_a_vectors; n++)
|
||||
//{
|
||||
//in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
//memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
//}
|
||||
|
||||
//volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2_reload(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
//for(n = 0; n < num_a_vectors; n++)
|
||||
//{
|
||||
//volk_gnsssdr_free(in_a[n]);
|
||||
//}
|
||||
//volk_gnsssdr_free(in_a);
|
||||
//}
|
||||
|
||||
//#endif // AVX2
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX2
|
||||
static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_u_avx2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
unsigned int n;
|
||||
int num_a_vectors = 3;
|
||||
int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
}
|
||||
|
||||
volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // AVX2
|
||||
|
||||
|
||||
//#ifdef LV_HAVE_AVX2
|
||||
//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_u_avx2_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
//{
|
||||
//// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
//float rem_carrier_phase_in_rad = 0.345;
|
||||
//float phase_step_rad = 0.1;
|
||||
//lv_32fc_t phase[1];
|
||||
//phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
//lv_32fc_t phase_inc[1];
|
||||
//phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
//unsigned int n;
|
||||
//int num_a_vectors = 3;
|
||||
//int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
//for(n = 0; n < num_a_vectors; n++)
|
||||
//{
|
||||
//in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
//memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
//}
|
||||
|
||||
//volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2_reload(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
//for(n = 0; n < num_a_vectors; n++)
|
||||
//{
|
||||
//volk_gnsssdr_free(in_a[n]);
|
||||
//}
|
||||
//volk_gnsssdr_free(in_a);
|
||||
//}
|
||||
|
||||
//#endif // AVX2
|
||||
|
||||
|
||||
//#ifdef LV_HAVE_NEON
|
||||
//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
//{
|
||||
//// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
//float rem_carrier_phase_in_rad = 0.345;
|
||||
//float phase_step_rad = 0.1;
|
||||
//lv_32fc_t phase[1];
|
||||
//phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
//lv_32fc_t phase_inc[1];
|
||||
//phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
//unsigned int n;
|
||||
//int num_a_vectors = 3;
|
||||
//int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
//for(n = 0; n < num_a_vectors; n++)
|
||||
//{
|
||||
//in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
//memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
//}
|
||||
|
||||
//volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_neon(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
//for(n = 0; n < num_a_vectors; n++)
|
||||
//{
|
||||
//volk_gnsssdr_free(in_a[n]);
|
||||
//}
|
||||
//volk_gnsssdr_free(in_a);
|
||||
//}
|
||||
|
||||
//#endif // NEON
|
||||
|
||||
|
||||
//#ifdef LV_HAVE_NEON
|
||||
//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
//{
|
||||
//// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
//float rem_carrier_phase_in_rad = 0.345;
|
||||
//float phase_step_rad = 0.1;
|
||||
//lv_32fc_t phase[1];
|
||||
//phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
//lv_32fc_t phase_inc[1];
|
||||
//phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
//unsigned int n;
|
||||
//int num_a_vectors = 3;
|
||||
//int16_t** in_a = (int16_t**)volk_gnsssdr_malloc(sizeof(int16_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
//for(n = 0; n < num_a_vectors; n++)
|
||||
//{
|
||||
//in_a[n] = (int16_t*)volk_gnsssdr_malloc(sizeof(int16_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
//memcpy((int16_t*)in_a[n], (int16_t*)in, sizeof(int16_t) * num_points);
|
||||
//}
|
||||
|
||||
//volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_neon_vma(result, local_code, phase_inc[0], phase, (const int16_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
//for(n = 0; n < num_a_vectors; n++)
|
||||
//{
|
||||
//volk_gnsssdr_free(in_a[n]);
|
||||
//}
|
||||
//volk_gnsssdr_free(in_a);
|
||||
//}
|
||||
|
||||
//#endif // NEON
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_H
|
||||
|
||||
|
||||
|
@ -44,8 +44,8 @@
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
unsigned int n;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
@ -74,8 +74,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_generic(lv_16sc_t* r
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -103,8 +103,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse3(lv_16sc_t* re
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -133,8 +133,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse3(lv_16sc_t* re
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse4_1(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -163,8 +163,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse4_1(lv_16sc_t*
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse4_1(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -193,8 +193,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse4_1(lv_16sc_t*
|
||||
#ifdef LV_HAVE_AVX
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_avx(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -223,8 +223,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_avx(lv_16sc_t* res
|
||||
#ifdef LV_HAVE_AVX
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_avx(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -253,8 +253,8 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_avx(lv_16sc_t* res
|
||||
#ifdef LV_HAVE_NEON
|
||||
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
|
@ -0,0 +1,279 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_32f_resamplerxnpuppet_32f.h
|
||||
* \brief VOLK_GNSSSDR puppet for the multiple 32-bit float vector resampler kernel.
|
||||
* \authors <ul>
|
||||
* <li> Cillian O'Driscoll 2017 cillian.odriscoll at gmail dot com
|
||||
* </ul>
|
||||
*
|
||||
* VOLK_GNSSSDR puppet for integrating the multiple resampler into the test system
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2017 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_32f_resamplerxnpuppet_32f_H
|
||||
#define INCLUDED_volk_gnsssdr_32f_resamplerxnpuppet_32f_H
|
||||
|
||||
#include "volk_gnsssdr/volk_gnsssdr_32f_xn_resampler_32f_xn.h"
|
||||
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_generic(float* result, const float* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
|
||||
float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_32f_xn_resampler_32f_xn_generic(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_a_sse3(float* result, const float* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
|
||||
float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_u_sse3(float* result, const float* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
|
||||
float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse3(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_u_sse4_1(float* result, const float* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
|
||||
float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse4_1(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_a_sse4_1(float* result, const float* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
|
||||
float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse4_1(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_a_avx(float* result, const float* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
|
||||
float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_32f_xn_resampler_32f_xn_a_avx(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_u_avx(float* result, const float* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
|
||||
float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_32f_xn_resampler_32f_xn_u_avx(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_neon(float* result, const float* local_code, unsigned int num_points)
|
||||
{
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
float shifts_chips[3] = { -0.1, 0.0, 0.1 };
|
||||
|
||||
float** result_aux = (float**)volk_gnsssdr_malloc(sizeof(float*) * num_out_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
result_aux[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
|
||||
}
|
||||
|
||||
volk_gnsssdr_32f_xn_resampler_32f_xn_neon(result_aux, local_code, rem_code_phase_chips, code_phase_step_chips, shifts_chips, code_length_chips, num_out_vectors, num_points);
|
||||
|
||||
memcpy((float*)result, (float*)result_aux[0], sizeof(float) * num_points);
|
||||
|
||||
for(n = 0; n < num_out_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(result_aux[n]);
|
||||
}
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_32f_resamplerpuppet_32f_H
|
||||
|
@ -0,0 +1,610 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_32f_xn_resampler_32f_xn.h
|
||||
* \brief VOLK_GNSSSDR kernel: Resamples N complex 32-bit float vectors using zero hold resample algorithm.
|
||||
* \authors <ul>
|
||||
* <li> Cillian O'Driscoll, 2017. cillian.odirscoll(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* VOLK_GNSSSDR kernel that resamples N 32-bit float vectors using zero hold resample algorithm.
|
||||
* It is optimized to resample a single GNSS local code signal replica into N vectors fractional-resampled and fractional-delayed
|
||||
* (i.e. it creates the Early, Prompt, and Late code replicas)
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2017 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_32f_xn_resampler_32f_xn
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Resamples a 32-bit floating point vector , providing \p num_out_vectors outputs.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_32f_xn_resampler_32f_xn(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li local_code: Vector to be resampled.
|
||||
* \li rem_code_phase_chips: Remnant code phase [chips].
|
||||
* \li code_phase_step_chips: Phase increment per sample [chips/sample].
|
||||
* \li shifts_chips: Vector of floats that defines the spacing (in chips) between the replicas of \p local_code
|
||||
* \li code_length_chips: Code length in chips.
|
||||
* \li num_out_vectors Number of output vectors.
|
||||
* \li num_points: The number of data values to be in the resampled vector.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li result: Pointer to a vector of pointers where the results will be stored.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_32f_xn_resampler_32f_xn_H
|
||||
#define INCLUDED_volk_gnsssdr_32f_xn_resampler_32f_xn_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h> /* abs */
|
||||
#include <stdint.h> /* int64_t */
|
||||
#include <stdio.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_generic(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
int local_code_chip_index;
|
||||
int current_correlator_tap;
|
||||
int n;
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
for (n = 0; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index < 0) local_code_chip_index += (int)code_length_chips * (abs(local_code_chip_index) / code_length_chips + 1);
|
||||
local_code_chip_index = local_code_chip_index % code_length_chips;
|
||||
result[current_correlator_tap][n] = local_code[local_code_chip_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
float** _result = result;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m128 ones = _mm_set1_ps(1.0f);
|
||||
const __m128 fours = _mm_set1_ps(4.0f);
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
|
||||
const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
|
||||
__m128i local_code_chip_index_reg, aux_i, negatives, i;
|
||||
__m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
__m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
|
||||
for(n = 0; n < quarterPoints; n++)
|
||||
{
|
||||
aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm_add_ps(aux, aux2);
|
||||
// floor
|
||||
i = _mm_cvttps_epi32(aux);
|
||||
fi = _mm_cvtepi32_ps(i);
|
||||
igx = _mm_cmpgt_ps(fi, aux);
|
||||
j = _mm_and_ps(igx, ones);
|
||||
aux = _mm_sub_ps(fi, j);
|
||||
// fmod
|
||||
c = _mm_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm_cvttps_epi32(c);
|
||||
cTrunc = _mm_cvtepi32_ps(i);
|
||||
base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
|
||||
|
||||
negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
|
||||
aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
|
||||
local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
|
||||
_mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 4; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm_add_ps(indexn, fours);
|
||||
}
|
||||
for(n = quarterPoints * 4; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse3(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
float** _result = result;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m128 ones = _mm_set1_ps(1.0f);
|
||||
const __m128 fours = _mm_set1_ps(4.0f);
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
|
||||
const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
|
||||
__m128i local_code_chip_index_reg, aux_i, negatives, i;
|
||||
__m128 aux, aux2, shifts_chips_reg, fi, igx, j, c, cTrunc, base;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
__m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
|
||||
for(n = 0; n < quarterPoints; n++)
|
||||
{
|
||||
aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm_add_ps(aux, aux2);
|
||||
// floor
|
||||
i = _mm_cvttps_epi32(aux);
|
||||
fi = _mm_cvtepi32_ps(i);
|
||||
igx = _mm_cmpgt_ps(fi, aux);
|
||||
j = _mm_and_ps(igx, ones);
|
||||
aux = _mm_sub_ps(fi, j);
|
||||
// fmod
|
||||
c = _mm_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm_cvttps_epi32(c);
|
||||
cTrunc = _mm_cvtepi32_ps(i);
|
||||
base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
|
||||
|
||||
negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
|
||||
aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
|
||||
local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
|
||||
_mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 4; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm_add_ps(indexn, fours);
|
||||
}
|
||||
for(n = quarterPoints * 4; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse4_1(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
float** _result = result;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m128 fours = _mm_set1_ps(4.0f);
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
|
||||
const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
|
||||
__m128i local_code_chip_index_reg, aux_i, negatives, i;
|
||||
__m128 aux, aux2, shifts_chips_reg, c, cTrunc, base;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
__m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
|
||||
for(n = 0; n < quarterPoints; n++)
|
||||
{
|
||||
aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm_add_ps(aux, aux2);
|
||||
// floor
|
||||
aux = _mm_floor_ps(aux);
|
||||
|
||||
// fmod
|
||||
c = _mm_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm_cvttps_epi32(c);
|
||||
cTrunc = _mm_cvtepi32_ps(i);
|
||||
base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
|
||||
|
||||
negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
|
||||
aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
|
||||
local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
|
||||
_mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 4; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm_add_ps(indexn, fours);
|
||||
}
|
||||
for(n = quarterPoints * 4; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse4_1(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
float** _result = result;
|
||||
const unsigned int quarterPoints = num_points / 4;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m128 fours = _mm_set1_ps(4.0f);
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
const __m128 code_length_chips_reg_f = _mm_set_ps1((float)code_length_chips);
|
||||
const __m128i code_length_chips_reg_i = _mm_set1_epi32((int)code_length_chips);
|
||||
__m128i local_code_chip_index_reg, aux_i, negatives, i;
|
||||
__m128 aux, aux2, shifts_chips_reg, c, cTrunc, base;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm_set_ps1((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
__m128 indexn = _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f);
|
||||
for(n = 0; n < quarterPoints; n++)
|
||||
{
|
||||
aux = _mm_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm_add_ps(aux, aux2);
|
||||
// floor
|
||||
aux = _mm_floor_ps(aux);
|
||||
|
||||
// fmod
|
||||
c = _mm_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm_cvttps_epi32(c);
|
||||
cTrunc = _mm_cvtepi32_ps(i);
|
||||
base = _mm_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm_cvtps_epi32(_mm_sub_ps(aux, base));
|
||||
|
||||
negatives = _mm_cmplt_epi32(local_code_chip_index_reg, zeros);
|
||||
aux_i = _mm_and_si128(code_length_chips_reg_i, negatives);
|
||||
local_code_chip_index_reg = _mm_add_epi32(local_code_chip_index_reg, aux_i);
|
||||
_mm_store_si128((__m128i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 4; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm_add_ps(indexn, fours);
|
||||
}
|
||||
for(n = quarterPoints * 4; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_avx(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
float** _result = result;
|
||||
const unsigned int avx_iters = num_points / 8;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m256 eights = _mm256_set1_ps(8.0f);
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
const __m256 code_length_chips_reg_f = _mm256_set1_ps((float)code_length_chips);
|
||||
const __m256 n0 = _mm256_set_ps(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
|
||||
|
||||
__m256i local_code_chip_index_reg, i;
|
||||
__m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
indexn = n0;
|
||||
for(n = 0; n < avx_iters; n++)
|
||||
{
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][8 * n + 7], 1, 0);
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&local_code_chip_index[8], 1, 3);
|
||||
aux = _mm256_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm256_add_ps(aux, aux2);
|
||||
// floor
|
||||
aux = _mm256_floor_ps(aux);
|
||||
|
||||
// fmod
|
||||
c = _mm256_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm256_cvttps_epi32(c);
|
||||
cTrunc = _mm256_cvtepi32_ps(i);
|
||||
base = _mm256_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm256_cvttps_epi32(_mm256_sub_ps(aux, base));
|
||||
|
||||
// no negatives
|
||||
c = _mm256_cvtepi32_ps(local_code_chip_index_reg);
|
||||
negatives = _mm256_cmp_ps(c, zeros, 0x01 );
|
||||
aux3 = _mm256_and_ps(code_length_chips_reg_f, negatives);
|
||||
aux = _mm256_add_ps(c, aux3);
|
||||
local_code_chip_index_reg = _mm256_cvttps_epi32(aux);
|
||||
|
||||
_mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 8; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm256_add_ps(indexn, eights);
|
||||
}
|
||||
}
|
||||
_mm256_zeroupper();
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
for(n = avx_iters * 8; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_avx(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
float** _result = result;
|
||||
const unsigned int avx_iters = num_points / 8;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const __m256 eights = _mm256_set1_ps(8.0f);
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
const __m256 code_length_chips_reg_f = _mm256_set1_ps((float)code_length_chips);
|
||||
const __m256 n0 = _mm256_set_ps(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
|
||||
|
||||
__m256i local_code_chip_index_reg, i;
|
||||
__m256 aux, aux2, aux3, shifts_chips_reg, c, cTrunc, base, negatives, indexn;
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = _mm256_set1_ps((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = _mm256_sub_ps(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
indexn = n0;
|
||||
for(n = 0; n < avx_iters; n++)
|
||||
{
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][8 * n + 7], 1, 0);
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&local_code_chip_index[8], 1, 3);
|
||||
aux = _mm256_mul_ps(code_phase_step_chips_reg, indexn);
|
||||
aux = _mm256_add_ps(aux, aux2);
|
||||
// floor
|
||||
aux = _mm256_floor_ps(aux);
|
||||
|
||||
// fmod
|
||||
c = _mm256_div_ps(aux, code_length_chips_reg_f);
|
||||
i = _mm256_cvttps_epi32(c);
|
||||
cTrunc = _mm256_cvtepi32_ps(i);
|
||||
base = _mm256_mul_ps(cTrunc, code_length_chips_reg_f);
|
||||
local_code_chip_index_reg = _mm256_cvttps_epi32(_mm256_sub_ps(aux, base));
|
||||
|
||||
// no negatives
|
||||
c = _mm256_cvtepi32_ps(local_code_chip_index_reg);
|
||||
negatives = _mm256_cmp_ps(c, zeros, 0x01 );
|
||||
aux3 = _mm256_and_ps(code_length_chips_reg_f, negatives);
|
||||
aux = _mm256_add_ps(c, aux3);
|
||||
local_code_chip_index_reg = _mm256_cvttps_epi32(aux);
|
||||
|
||||
_mm256_store_si256((__m256i*)local_code_chip_index, local_code_chip_index_reg);
|
||||
for(k = 0; k < 8; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 8 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = _mm256_add_ps(indexn, eights);
|
||||
}
|
||||
}
|
||||
_mm256_zeroupper();
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
for(n = avx_iters * 8; n < num_points; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1) ;
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_neon(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
|
||||
{
|
||||
float** _result = result;
|
||||
const unsigned int neon_iters = num_points / 4;
|
||||
int current_correlator_tap;
|
||||
unsigned int n;
|
||||
unsigned int k;
|
||||
const int32x4_t ones = vdupq_n_s32(1);
|
||||
const float32x4_t fours = vdupq_n_f32(4.0f);
|
||||
const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
|
||||
const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
|
||||
int32_t local_code_chip_index_;
|
||||
|
||||
const int32x4_t zeros = vdupq_n_s32(0);
|
||||
const float32x4_t code_length_chips_reg_f = vdupq_n_f32((float)code_length_chips);
|
||||
const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
|
||||
int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
|
||||
float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
|
||||
__VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
uint32x4_t igx;
|
||||
reciprocal = vrecpeq_f32(code_length_chips_reg_f);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal); // this refinement is required!
|
||||
float32x4_t n0 = vld1q_f32((float*)vec);
|
||||
|
||||
for (current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
shifts_chips_reg = vdupq_n_f32((float)shifts_chips[current_correlator_tap]);
|
||||
aux2 = vsubq_f32(shifts_chips_reg, rem_code_phase_chips_reg);
|
||||
indexn = n0;
|
||||
for(n = 0; n < neon_iters; n++)
|
||||
{
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][4 * n + 3], 1, 0);
|
||||
__VOLK_GNSSSDR_PREFETCH(&local_code_chip_index[4]);
|
||||
aux = vmulq_f32(code_phase_step_chips_reg, indexn);
|
||||
aux = vaddq_f32(aux, aux2);
|
||||
|
||||
//floor
|
||||
i = vcvtq_s32_f32(aux);
|
||||
fi = vcvtq_f32_s32(i);
|
||||
igx = vcgtq_f32(fi, aux);
|
||||
j = vcvtq_f32_s32(vandq_s32(vreinterpretq_s32_u32(igx), ones));
|
||||
aux = vsubq_f32(fi, j);
|
||||
|
||||
// fmod
|
||||
c = vmulq_f32(aux, reciprocal);
|
||||
i = vcvtq_s32_f32(c);
|
||||
cTrunc = vcvtq_f32_s32(i);
|
||||
base = vmulq_f32(cTrunc, code_length_chips_reg_f);
|
||||
aux = vsubq_f32(aux, base);
|
||||
local_code_chip_index_reg = vcvtq_s32_f32(aux);
|
||||
|
||||
negatives = vreinterpretq_s32_u32(vcltq_s32(local_code_chip_index_reg, zeros));
|
||||
aux_i = vandq_s32(code_length_chips_reg_i, negatives);
|
||||
local_code_chip_index_reg = vaddq_s32(local_code_chip_index_reg, aux_i);
|
||||
|
||||
vst1q_s32((int32_t*)local_code_chip_index, local_code_chip_index_reg);
|
||||
|
||||
for(k = 0; k < 4; ++k)
|
||||
{
|
||||
_result[current_correlator_tap][n * 4 + k] = local_code[local_code_chip_index[k]];
|
||||
}
|
||||
indexn = vaddq_f32(indexn, fours);
|
||||
}
|
||||
for(n = neon_iters * 4; n < num_points; n++)
|
||||
{
|
||||
__VOLK_GNSSSDR_PREFETCH_LOCALITY(&_result[current_correlator_tap][n], 1, 0);
|
||||
// resample code for current tap
|
||||
local_code_chip_index_ = (int)floor(code_phase_step_chips * (float)n + shifts_chips[current_correlator_tap] - rem_code_phase_chips);
|
||||
//Take into account that in multitap correlators, the shifts can be negative!
|
||||
if (local_code_chip_index_ < 0) local_code_chip_index_ += (int)code_length_chips * (abs(local_code_chip_index_) / code_length_chips + 1);
|
||||
local_code_chip_index_ = local_code_chip_index_ % code_length_chips;
|
||||
_result[current_correlator_tap][n] = local_code[local_code_chip_index_];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_32f_xn_resampler_32f_xn_H*/
|
||||
|
||||
|
@ -0,0 +1,320 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h
|
||||
* \brief VOLK_GNSSSDR kernel: multiplies N complex (32-bit float per component) vectors
|
||||
* by a common vector, phase rotated and accumulates the results in N float complex outputs.
|
||||
* \authors <ul>
|
||||
* <li> Cillian O'Driscoll 2016. cillian.odriscoll(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* VOLK_GNSSSDR kernel that multiplies N 32 bits complex vectors by a common vector, which is
|
||||
* phase-rotated by phase offset and phase increment, and accumulates the results
|
||||
* in N 32 bits float complex outputs.
|
||||
* It is optimized to perform the N tap correlation process in GNSS receivers.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2016 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn
|
||||
*
|
||||
* \b Overview
|
||||
*
|
||||
* Rotates and multiplies the reference complex vector with an arbitrary number of other real vectors,
|
||||
* accumulates the results and stores them in the output vector.
|
||||
* The rotation is done at a fixed rate per sample, from an initial \p phase offset.
|
||||
* This function can be used for Doppler wipe-off and multiple correlator.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
* \code
|
||||
* void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points);
|
||||
* \endcode
|
||||
*
|
||||
* \b Inputs
|
||||
* \li in_common: Pointer to one of the vectors to be rotated, multiplied and accumulated (reference vector).
|
||||
* \li phase_inc: Phase increment = lv_cmake(cos(phase_step_rad), sin(phase_step_rad))
|
||||
* \li phase: Initial phase = lv_cmake(cos(initial_phase_rad), sin(initial_phase_rad))
|
||||
* \li in_a: Pointer to an array of pointers to multiple vectors to be multiplied and accumulated.
|
||||
* \li num_a_vectors: Number of vectors to be multiplied by the reference vector and accumulated.
|
||||
* \li num_points: Number of complex values to be multiplied together, accumulated and stored into \p result.
|
||||
*
|
||||
* \b Outputs
|
||||
* \li phase: Final phase.
|
||||
* \li result: Vector of \p num_a_vectors components with the multiple vectors of \p in_a rotated, multiplied by \p in_common and accumulated.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_H
|
||||
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <volk_gnsssdr/saturation_arithmetic.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_32fc_t tmp32_1, tmp32_2;
|
||||
int n_vec;
|
||||
unsigned int n;
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
result[n_vec] = lv_cmake(0,0);
|
||||
}
|
||||
for (n = 0; n < num_points; n++)
|
||||
{
|
||||
tmp32_1 = *in_common++ * (*phase);//if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
|
||||
|
||||
// Regenerate phase
|
||||
if (n % 256 == 0)
|
||||
{
|
||||
//printf("Phase before regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
|
||||
#endif
|
||||
//printf("Phase after regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
|
||||
}
|
||||
|
||||
(*phase) *= phase_inc;
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
tmp32_2 = tmp32_1 * in_a[n_vec][n];
|
||||
result[n_vec] += tmp32_2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_32fc_t tmp32_1, tmp32_2;
|
||||
const unsigned int ROTATOR_RELOAD = 256;
|
||||
int n_vec;
|
||||
unsigned int n;
|
||||
unsigned int j;
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
result[n_vec] = lv_cmake(0,0);
|
||||
}
|
||||
|
||||
for (n = 0; n < num_points / ROTATOR_RELOAD; n++)
|
||||
{
|
||||
for (j = 0; j < ROTATOR_RELOAD; j++)
|
||||
{
|
||||
tmp32_1 = *in_common++ * (*phase);
|
||||
(*phase) *= phase_inc;
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
tmp32_2 = tmp32_1 * in_a[n_vec][n * ROTATOR_RELOAD + j];
|
||||
result[n_vec] += tmp32_2;
|
||||
}
|
||||
}
|
||||
/* Regenerate phase */
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
//(*phase) /= cabsf((*phase));
|
||||
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
|
||||
#endif
|
||||
}
|
||||
|
||||
for (j = 0; j < num_points % ROTATOR_RELOAD; j++)
|
||||
{
|
||||
tmp32_1 = *in_common++ * (*phase);
|
||||
(*phase) *= phase_inc;
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
tmp32_2 = tmp32_1 * in_a[n_vec][(num_points / ROTATOR_RELOAD) * ROTATOR_RELOAD + j];
|
||||
result[n_vec] += tmp32_2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_avx_intrinsics.h>
|
||||
static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
unsigned int number = 0;
|
||||
unsigned int vec_ind = 0;
|
||||
unsigned int i = 0;
|
||||
const unsigned int sixteenthPoints = num_points / 16;
|
||||
|
||||
const float* aPtr = (float*)in_common;
|
||||
const float* bPtr[ num_a_vectors];
|
||||
for( vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind ){
|
||||
bPtr[vec_ind] = in_a[vec_ind];
|
||||
}
|
||||
|
||||
lv_32fc_t _phase = (*phase);
|
||||
lv_32fc_t wo;
|
||||
|
||||
__m256 a0Val, a1Val, a2Val, a3Val;
|
||||
__m256 b0Val[num_a_vectors], b1Val[num_a_vectors], b2Val[num_a_vectors], b3Val[num_a_vectors];
|
||||
__m256 x0Val[num_a_vectors], x1Val[num_a_vectors], x0loVal[num_a_vectors], x0hiVal[num_a_vectors], x1loVal[num_a_vectors], x1hiVal[num_a_vectors];
|
||||
__m256 c0Val[num_a_vectors], c1Val[num_a_vectors], c2Val[num_a_vectors], c3Val[num_a_vectors];
|
||||
|
||||
__m256 dotProdVal0[num_a_vectors];
|
||||
__m256 dotProdVal1[num_a_vectors];
|
||||
__m256 dotProdVal2[num_a_vectors];
|
||||
__m256 dotProdVal3[num_a_vectors];
|
||||
|
||||
for( vec_ind = 0; vec_ind < num_a_vectors; vec_ind++ ){
|
||||
dotProdVal0[vec_ind] = _mm256_setzero_ps();
|
||||
dotProdVal1[vec_ind] = _mm256_setzero_ps();
|
||||
dotProdVal2[vec_ind] = _mm256_setzero_ps();
|
||||
dotProdVal3[vec_ind] = _mm256_setzero_ps();
|
||||
}
|
||||
|
||||
// Set up the complex rotator
|
||||
__m256 z0, z1, z2, z3;
|
||||
__attribute__((aligned(32))) lv_32fc_t phase_vec[16];
|
||||
for( vec_ind = 0; vec_ind < 16; ++vec_ind ){
|
||||
phase_vec[vec_ind] = _phase;
|
||||
_phase *= phase_inc;
|
||||
}
|
||||
|
||||
z0 = _mm256_load_ps( (float *)phase_vec );
|
||||
z1 = _mm256_load_ps( (float *)(phase_vec + 4) );
|
||||
z2 = _mm256_load_ps( (float *)(phase_vec + 8) );
|
||||
z3 = _mm256_load_ps( (float *)(phase_vec + 12) );
|
||||
|
||||
lv_32fc_t dz = phase_inc; dz *= dz; dz *= dz; dz *= dz; dz *= dz; // dz = phase_inc^16;
|
||||
|
||||
for( vec_ind = 0; vec_ind < 4; ++vec_ind ){
|
||||
phase_vec[vec_ind] = dz;
|
||||
}
|
||||
|
||||
__m256 dz_reg = _mm256_load_ps( (float *)phase_vec );
|
||||
dz_reg = _mm256_complexnormalise_ps( dz_reg );
|
||||
|
||||
for(;number < sixteenthPoints; number++){
|
||||
|
||||
a0Val = _mm256_loadu_ps(aPtr);
|
||||
a1Val = _mm256_loadu_ps(aPtr+8);
|
||||
a2Val = _mm256_loadu_ps(aPtr+16);
|
||||
a3Val = _mm256_loadu_ps(aPtr+24);
|
||||
|
||||
a0Val = _mm256_complexmul_ps( a0Val, z0 );
|
||||
a1Val = _mm256_complexmul_ps( a1Val, z1 );
|
||||
a2Val = _mm256_complexmul_ps( a2Val, z2 );
|
||||
a3Val = _mm256_complexmul_ps( a3Val, z3 );
|
||||
|
||||
z0 = _mm256_complexmul_ps( z0, dz_reg );
|
||||
z1 = _mm256_complexmul_ps( z1, dz_reg );
|
||||
z2 = _mm256_complexmul_ps( z2, dz_reg );
|
||||
z3 = _mm256_complexmul_ps( z3, dz_reg );
|
||||
|
||||
|
||||
for( vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind ){
|
||||
x0Val[vec_ind] = _mm256_loadu_ps(bPtr[vec_ind]); // t0|t1|t2|t3|t4|t5|t6|t7
|
||||
x1Val[vec_ind] = _mm256_loadu_ps(bPtr[vec_ind]+8);
|
||||
x0loVal[vec_ind] = _mm256_unpacklo_ps(x0Val[vec_ind], x0Val[vec_ind]); // t0|t0|t1|t1|t4|t4|t5|t5
|
||||
x0hiVal[vec_ind] = _mm256_unpackhi_ps(x0Val[vec_ind], x0Val[vec_ind]); // t2|t2|t3|t3|t6|t6|t7|t7
|
||||
x1loVal[vec_ind] = _mm256_unpacklo_ps(x1Val[vec_ind], x1Val[vec_ind]);
|
||||
x1hiVal[vec_ind] = _mm256_unpackhi_ps(x1Val[vec_ind], x1Val[vec_ind]);
|
||||
|
||||
// TODO: it may be possible to rearrange swizzling to better pipeline data
|
||||
b0Val[vec_ind] = _mm256_permute2f128_ps(x0loVal[vec_ind], x0hiVal[vec_ind], 0x20); // t0|t0|t1|t1|t2|t2|t3|t3
|
||||
b1Val[vec_ind] = _mm256_permute2f128_ps(x0loVal[vec_ind], x0hiVal[vec_ind], 0x31); // t4|t4|t5|t5|t6|t6|t7|t7
|
||||
b2Val[vec_ind] = _mm256_permute2f128_ps(x1loVal[vec_ind], x1hiVal[vec_ind], 0x20);
|
||||
b3Val[vec_ind] = _mm256_permute2f128_ps(x1loVal[vec_ind], x1hiVal[vec_ind], 0x31);
|
||||
|
||||
c0Val[vec_ind] = _mm256_mul_ps(a0Val, b0Val[vec_ind]);
|
||||
c1Val[vec_ind] = _mm256_mul_ps(a1Val, b1Val[vec_ind]);
|
||||
c2Val[vec_ind] = _mm256_mul_ps(a2Val, b2Val[vec_ind]);
|
||||
c3Val[vec_ind] = _mm256_mul_ps(a3Val, b3Val[vec_ind]);
|
||||
|
||||
dotProdVal0[vec_ind] = _mm256_add_ps(c0Val[vec_ind], dotProdVal0[vec_ind]);
|
||||
dotProdVal1[vec_ind] = _mm256_add_ps(c1Val[vec_ind], dotProdVal1[vec_ind]);
|
||||
dotProdVal2[vec_ind] = _mm256_add_ps(c2Val[vec_ind], dotProdVal2[vec_ind]);
|
||||
dotProdVal3[vec_ind] = _mm256_add_ps(c3Val[vec_ind], dotProdVal3[vec_ind]);
|
||||
|
||||
bPtr[vec_ind] += 16;
|
||||
}
|
||||
|
||||
// Force the rotators back onto the unit circle
|
||||
if ((number % 64) == 0)
|
||||
{
|
||||
z0 = _mm256_complexnormalise_ps( z0 );
|
||||
z1 = _mm256_complexnormalise_ps( z1 );
|
||||
z2 = _mm256_complexnormalise_ps( z2 );
|
||||
z3 = _mm256_complexnormalise_ps( z3 );
|
||||
}
|
||||
|
||||
aPtr += 32;
|
||||
}
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
|
||||
|
||||
for( vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind ){
|
||||
dotProdVal0[vec_ind] = _mm256_add_ps(dotProdVal0[vec_ind], dotProdVal1[vec_ind]);
|
||||
dotProdVal0[vec_ind] = _mm256_add_ps(dotProdVal0[vec_ind], dotProdVal2[vec_ind]);
|
||||
dotProdVal0[vec_ind] = _mm256_add_ps(dotProdVal0[vec_ind], dotProdVal3[vec_ind]);
|
||||
|
||||
_mm256_store_ps((float *)dotProductVector,dotProdVal0[vec_ind]); // Store the results back into the dot product vector
|
||||
|
||||
result[ vec_ind ] = lv_cmake( 0, 0 );
|
||||
for( i = 0; i < 4; ++i ){
|
||||
result[vec_ind] += dotProductVector[i];
|
||||
}
|
||||
}
|
||||
|
||||
z0 = _mm256_complexnormalise_ps( z0 );
|
||||
_mm256_store_ps((float*)phase_vec, z0);
|
||||
_phase = phase_vec[0];
|
||||
_mm256_zeroupper();
|
||||
|
||||
|
||||
number = sixteenthPoints*16;
|
||||
for(;number < num_points; number++){
|
||||
wo = (*aPtr++)*_phase;
|
||||
_phase *= phase_inc;
|
||||
|
||||
for( vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind ){
|
||||
result[vec_ind] += wo * in_a[vec_ind][number];
|
||||
}
|
||||
}
|
||||
|
||||
*phase = _phase;
|
||||
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_AVX */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_H */
|
||||
|
||||
|
@ -0,0 +1,132 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc.h
|
||||
* \brief Volk puppet for the multiple 16-bit complex dot product kernel.
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
|
||||
* </ul>
|
||||
*
|
||||
* Volk puppet for integrating the resampler into volk's test system
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_H
|
||||
#define INCLUDED_volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_H
|
||||
|
||||
#include "volk_gnsssdr/volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn.h"
|
||||
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_generic(lv_32fc_t* result, const lv_32fc_t* local_code, const float* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.25;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
unsigned int n;
|
||||
int num_a_vectors = 3;
|
||||
float ** in_a = (float **)volk_gnsssdr_malloc(sizeof(float *) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (float *)volk_gnsssdr_malloc(sizeof(float ) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((float*)in_a[n], (float*)in, sizeof(float) * num_points);
|
||||
}
|
||||
volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic(result, local_code, phase_inc[0], phase, (const float**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
#endif // Generic
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_generic_reload(lv_32fc_t* result, const lv_32fc_t* local_code, const float* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.25;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
unsigned int n;
|
||||
int num_a_vectors = 3;
|
||||
float ** in_a = (float **)volk_gnsssdr_malloc(sizeof(float *) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (float *)volk_gnsssdr_malloc(sizeof(float ) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((float*)in_a[n], (float*)in, sizeof(float) * num_points);
|
||||
}
|
||||
volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(result, local_code, phase_inc[0], phase, (const float**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // Generic
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_u_avx(lv_32fc_t* result, const lv_32fc_t* local_code, const float* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.25;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
unsigned int n;
|
||||
int num_a_vectors = 3;
|
||||
float ** in_a = (float **)volk_gnsssdr_malloc(sizeof(float *) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (float *)volk_gnsssdr_malloc(sizeof(float ) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((float*)in_a[n], (float*)in, sizeof(float) * num_points);
|
||||
}
|
||||
volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(result, local_code, phase_inc[0], phase, (const float**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // AVX
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_H
|
||||
|
@ -46,8 +46,8 @@
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_generic(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 1023;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -70,14 +70,15 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_generic(lv_32fc_t* r
|
||||
volk_gnsssdr_free(result_aux);
|
||||
}
|
||||
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse3(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 1023;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -105,8 +106,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse3(lv_32fc_t* re
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse3(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 1023;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -135,8 +136,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse3(lv_32fc_t* re
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse4_1(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 1023;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -164,8 +165,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_sse4_1(lv_32fc_t*
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse4_1(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 1023;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -193,8 +194,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_sse4_1(lv_32fc_t*
|
||||
#ifdef LV_HAVE_AVX
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_avx(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 1023;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -222,8 +223,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_avx(lv_32fc_t* res
|
||||
#ifdef LV_HAVE_AVX
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 1023;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -251,8 +252,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx(lv_32fc_t* res
|
||||
#ifdef LV_HAVE_AVX2
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_avx2(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 1023;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -280,8 +281,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_a_avx2(lv_32fc_t* re
|
||||
#ifdef LV_HAVE_AVX2
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx2(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 1023;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
@ -309,8 +310,8 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx2(lv_32fc_t* re
|
||||
#ifdef LV_HAVE_NEON
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_neon(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
float code_phase_step_chips = -0.6;
|
||||
int code_length_chips = 1023;
|
||||
int code_length_chips = 2046;
|
||||
float code_phase_step_chips = ((float)(code_length_chips) + 0.1 )/( (float) num_points );
|
||||
int num_out_vectors = 3;
|
||||
float rem_code_phase_chips = -0.234;
|
||||
unsigned int n;
|
||||
|
@ -89,10 +89,14 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerfastpuppet_16ic, volk_gnsssdr_16ic_resampler_fast_16ic, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic, volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerxnpuppet_16ic, volk_gnsssdr_16ic_xn_resampler_16ic_xn, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16i_resamplerxnpuppet_16i, volk_gnsssdr_16i_xn_resampler_16i_xn, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_32fc_resamplerxnpuppet_32fc, volk_gnsssdr_32fc_xn_resampler_32fc_xn, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_32f_resamplerxnpuppet_32f, volk_gnsssdr_32f_xn_resampler_32f_xn, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_dot_prod_16ic_xn, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn, test_params_int16))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn, test_params_int16))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc, volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn, test_params_int1))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc, volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn, test_params_int1))
|
||||
;
|
||||
|
||||
return test_cases;
|
||||
|
@ -717,7 +717,7 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
{
|
||||
if(both_sigs[j].is_signed)
|
||||
{
|
||||
fail = icompare((int8_t *) test_data[generic_offset][j], (int8_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
|
||||
fail = icompare((int16_t *) test_data[generic_offset][j], (int16_t *) test_data[i][j], vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -11,7 +11,7 @@
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
* Copyright (C) 2010-2017 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
@ -129,7 +129,7 @@ galileo_e1_dll_pll_veml_tracking_cc::galileo_e1_dll_pll_veml_tracking_cc(
|
||||
|
||||
// Initialization of local code replica
|
||||
// Get space for a vector with the sinboc(1,1) replica sampled 2x/chip
|
||||
d_ca_code = static_cast<gr_complex*>(volk_gnsssdr_malloc((2 * Galileo_E1_B_CODE_LENGTH_CHIPS) * sizeof(gr_complex), volk_gnsssdr_get_alignment()));
|
||||
d_ca_code = static_cast<float*>(volk_gnsssdr_malloc((2 * Galileo_E1_B_CODE_LENGTH_CHIPS) * sizeof(float), volk_gnsssdr_get_alignment()));
|
||||
|
||||
// correlator outputs (scalar)
|
||||
d_n_correlator_taps = 5; // Very-Early, Early, Prompt, Late, Very-Late
|
||||
@ -211,7 +211,7 @@ void galileo_e1_dll_pll_veml_tracking_cc::start_tracking()
|
||||
d_code_loop_filter.initialize(); // initialize the code filter
|
||||
|
||||
// generate local reference ALWAYS starting at chip 1 (2 samples per chip)
|
||||
galileo_e1_code_gen_complex_sampled(d_ca_code,
|
||||
galileo_e1_code_gen_float_sampled(d_ca_code,
|
||||
d_acquisition_gnss_synchro->Signal,
|
||||
false,
|
||||
d_acquisition_gnss_synchro->PRN,
|
||||
|
@ -6,7 +6,7 @@
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
* Copyright (C) 2010-2017 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
@ -39,7 +39,7 @@
|
||||
#include "gnss_synchro.h"
|
||||
#include "tracking_2nd_DLL_filter.h"
|
||||
#include "tracking_2nd_PLL_filter.h"
|
||||
#include "cpu_multicorrelator.h"
|
||||
#include "cpu_multicorrelator_real_codes.h"
|
||||
|
||||
class galileo_e1_dll_pll_veml_tracking_cc;
|
||||
|
||||
@ -120,10 +120,10 @@ private:
|
||||
double d_early_late_spc_chips;
|
||||
double d_very_early_late_spc_chips;
|
||||
|
||||
gr_complex* d_ca_code;
|
||||
float* d_ca_code;
|
||||
float* d_local_code_shift_chips;
|
||||
gr_complex* d_correlator_outs;
|
||||
cpu_multicorrelator multicorrelator_cpu;
|
||||
cpu_multicorrelator_real_codes multicorrelator_cpu;
|
||||
|
||||
gr_complex *d_Very_Early;
|
||||
gr_complex *d_Early;
|
||||
|
@ -123,16 +123,16 @@ Gps_L1_Ca_Dll_Pll_Tracking_cc::Gps_L1_Ca_Dll_Pll_Tracking_cc(
|
||||
|
||||
// Initialization of local code replica
|
||||
// Get space for a vector with the C/A code replica sampled 1x/chip
|
||||
d_ca_code = static_cast<gr_complex*>(volk_gnsssdr_malloc(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS) * sizeof(gr_complex), volk_gnsssdr_get_alignment()));
|
||||
d_ca_code = static_cast<float*>(volk_gnsssdr_malloc(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS) * sizeof(float), volk_gnsssdr_get_alignment()));
|
||||
|
||||
// correlator outputs (scalar)
|
||||
d_n_correlator_taps = 3; // Early, Prompt, and Late
|
||||
d_correlator_outs = static_cast<gr_complex*>(volk_gnsssdr_malloc(d_n_correlator_taps*sizeof(gr_complex), volk_gnsssdr_get_alignment()));
|
||||
d_correlator_outs = static_cast<gr_complex*>(volk_gnsssdr_malloc(d_n_correlator_taps * sizeof(gr_complex), volk_gnsssdr_get_alignment()));
|
||||
for (int n = 0; n < d_n_correlator_taps; n++)
|
||||
{
|
||||
d_correlator_outs[n] = gr_complex(0,0);
|
||||
}
|
||||
d_local_code_shift_chips = static_cast<float*>(volk_gnsssdr_malloc(d_n_correlator_taps*sizeof(float), volk_gnsssdr_get_alignment()));
|
||||
d_local_code_shift_chips = static_cast<float*>(volk_gnsssdr_malloc(d_n_correlator_taps * sizeof(float), volk_gnsssdr_get_alignment()));
|
||||
// Set TAPs delay values [chips]
|
||||
d_local_code_shift_chips[0] = - d_early_late_spc_chips;
|
||||
d_local_code_shift_chips[1] = 0.0;
|
||||
@ -194,7 +194,7 @@ void Gps_L1_Ca_Dll_Pll_Tracking_cc::start_tracking()
|
||||
long int acq_trk_diff_samples;
|
||||
double acq_trk_diff_seconds;
|
||||
acq_trk_diff_samples = static_cast<long int>(d_sample_counter) - static_cast<long int>(d_acq_sample_stamp); //-d_vector_length;
|
||||
DLOG(INFO) << "Number of samples between Acquisition and Tracking =" << acq_trk_diff_samples;
|
||||
DLOG(INFO) << "Number of samples between Acquisition and Tracking = " << acq_trk_diff_samples;
|
||||
acq_trk_diff_seconds = static_cast<float>(acq_trk_diff_samples) / static_cast<float>(d_fs_in);
|
||||
// Doppler effect
|
||||
// Fd=(C/(C+Vr))*F
|
||||
@ -233,7 +233,7 @@ void Gps_L1_Ca_Dll_Pll_Tracking_cc::start_tracking()
|
||||
d_code_loop_filter.initialize(); // initialize the code filter
|
||||
|
||||
// generate local reference ALWAYS starting at chip 1 (1 sample per chip)
|
||||
gps_l1_ca_code_gen_complex(d_ca_code, d_acquisition_gnss_synchro->PRN, 0);
|
||||
gps_l1_ca_code_gen_float(d_ca_code, d_acquisition_gnss_synchro->PRN, 0);
|
||||
|
||||
multicorrelator_cpu.set_local_code_and_taps(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS), d_ca_code, d_local_code_shift_chips);
|
||||
for (int n = 0; n < d_n_correlator_taps; n++)
|
||||
|
@ -3,6 +3,7 @@
|
||||
* \brief Interface of a code DLL + carrier PLL tracking block
|
||||
* \author Carlos Aviles, 2010. carlos.avilesr(at)googlemail.com
|
||||
* Javier Arribas, 2011. jarribas(at)cttc.es
|
||||
* Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com
|
||||
*
|
||||
* Code DLL + carrier PLL according to the algorithms described in:
|
||||
* K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
|
||||
@ -44,7 +45,7 @@
|
||||
#include "gnss_synchro.h"
|
||||
#include "tracking_2nd_DLL_filter.h"
|
||||
#include "tracking_2nd_PLL_filter.h"
|
||||
#include "cpu_multicorrelator.h"
|
||||
#include "cpu_multicorrelator_real_codes.h"
|
||||
|
||||
class Gps_L1_Ca_Dll_Pll_Tracking_cc;
|
||||
|
||||
@ -126,11 +127,10 @@ private:
|
||||
double d_acq_carrier_doppler_hz;
|
||||
// correlator
|
||||
int d_n_correlator_taps;
|
||||
gr_complex* d_ca_code;
|
||||
float* d_ca_code;
|
||||
float* d_local_code_shift_chips;
|
||||
gr_complex* d_correlator_outs;
|
||||
cpu_multicorrelator multicorrelator_cpu;
|
||||
|
||||
cpu_multicorrelator_real_codes multicorrelator_cpu;
|
||||
|
||||
// tracking vars
|
||||
double d_code_freq_chips;
|
||||
|
@ -33,6 +33,7 @@ endif(ENABLE_CUDA)
|
||||
|
||||
set(TRACKING_LIB_SOURCES
|
||||
cpu_multicorrelator.cc
|
||||
cpu_multicorrelator_real_codes.cc
|
||||
cpu_multicorrelator_16sc.cc
|
||||
lock_detectors.cc
|
||||
tcp_communication.cc
|
||||
|
147
src/algorithms/tracking/libs/cpu_multicorrelator_real_codes.cc
Normal file
147
src/algorithms/tracking/libs/cpu_multicorrelator_real_codes.cc
Normal file
@ -0,0 +1,147 @@
|
||||
/*!
|
||||
* \file cpu_multicorrelator_real_codes.cc
|
||||
* \brief High optimized CPU vector multiTAP correlator class with real-valued local codes
|
||||
* \authors <ul>
|
||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||
* <li> Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2017 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "cpu_multicorrelator_real_codes.h"
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
|
||||
|
||||
cpu_multicorrelator_real_codes::cpu_multicorrelator_real_codes()
|
||||
{
|
||||
d_sig_in = nullptr;
|
||||
d_local_code_in = nullptr;
|
||||
d_shifts_chips = nullptr;
|
||||
d_corr_out = nullptr;
|
||||
d_local_codes_resampled = nullptr;
|
||||
d_code_length_chips = 0;
|
||||
d_n_correlators = 0;
|
||||
}
|
||||
|
||||
|
||||
cpu_multicorrelator_real_codes::~cpu_multicorrelator_real_codes()
|
||||
{
|
||||
if(d_local_codes_resampled != nullptr)
|
||||
{
|
||||
cpu_multicorrelator_real_codes::free();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool cpu_multicorrelator_real_codes::init(
|
||||
int max_signal_length_samples,
|
||||
int n_correlators)
|
||||
{
|
||||
// ALLOCATE MEMORY FOR INTERNAL vectors
|
||||
size_t size = max_signal_length_samples * sizeof(float);
|
||||
|
||||
d_local_codes_resampled = static_cast<float**>(volk_gnsssdr_malloc(n_correlators * sizeof(float*), volk_gnsssdr_get_alignment()));
|
||||
for (int n = 0; n < n_correlators; n++)
|
||||
{
|
||||
d_local_codes_resampled[n] = static_cast<float*>(volk_gnsssdr_malloc(size, volk_gnsssdr_get_alignment()));
|
||||
}
|
||||
d_n_correlators = n_correlators;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool cpu_multicorrelator_real_codes::set_local_code_and_taps(
|
||||
int code_length_chips,
|
||||
const float* local_code_in,
|
||||
float *shifts_chips)
|
||||
{
|
||||
d_local_code_in = local_code_in;
|
||||
d_shifts_chips = shifts_chips;
|
||||
d_code_length_chips = code_length_chips;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool cpu_multicorrelator_real_codes::set_input_output_vectors(std::complex<float>* corr_out, const std::complex<float>* sig_in)
|
||||
{
|
||||
// Save CPU pointers
|
||||
d_sig_in = sig_in;
|
||||
d_corr_out = corr_out;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void cpu_multicorrelator_real_codes::update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips)
|
||||
{
|
||||
volk_gnsssdr_32f_xn_resampler_32f_xn(d_local_codes_resampled,
|
||||
d_local_code_in,
|
||||
rem_code_phase_chips,
|
||||
code_phase_step_chips,
|
||||
d_shifts_chips,
|
||||
d_code_length_chips,
|
||||
d_n_correlators,
|
||||
correlator_length_samples);
|
||||
}
|
||||
|
||||
|
||||
bool cpu_multicorrelator_real_codes::Carrier_wipeoff_multicorrelator_resampler(
|
||||
float rem_carrier_phase_in_rad,
|
||||
float phase_step_rad,
|
||||
float rem_code_phase_chips,
|
||||
float code_phase_step_chips,
|
||||
int signal_length_samples)
|
||||
{
|
||||
update_local_code(signal_length_samples, rem_code_phase_chips, code_phase_step_chips);
|
||||
// Regenerate phase at each call in order to avoid numerical issues
|
||||
lv_32fc_t phase_offset_as_complex[1];
|
||||
phase_offset_as_complex[0] = lv_cmake(std::cos(rem_carrier_phase_in_rad), -std::sin(rem_carrier_phase_in_rad));
|
||||
// call VOLK_GNSSSDR kernel
|
||||
volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn(d_corr_out, d_sig_in, std::exp(lv_32fc_t(0, - phase_step_rad)), phase_offset_as_complex, (const float**)d_local_codes_resampled, d_n_correlators, signal_length_samples);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool cpu_multicorrelator_real_codes::free()
|
||||
{
|
||||
// Free memory
|
||||
if (d_local_codes_resampled != nullptr)
|
||||
{
|
||||
for (int n = 0; n < d_n_correlators; n++)
|
||||
{
|
||||
volk_gnsssdr_free(d_local_codes_resampled[n]);
|
||||
}
|
||||
volk_gnsssdr_free(d_local_codes_resampled);
|
||||
d_local_codes_resampled = nullptr;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,70 @@
|
||||
/*!
|
||||
* \file cpu_multicorrelator_real_codes.h
|
||||
* \brief High optimized CPU vector multiTAP correlator class using real-valued local codes
|
||||
* \authors <ul>
|
||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||
* <li> Cillian O'Driscoll, 2017, cillian.odriscoll(at)gmail.com
|
||||
* </ul>
|
||||
*
|
||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2017 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef GNSS_SDR_CPU_MULTICORRELATOR_REAL_CODES_H_
|
||||
#define GNSS_SDR_CPU_MULTICORRELATOR_REAL_CODES_H_
|
||||
|
||||
|
||||
#include <complex>
|
||||
|
||||
/*!
|
||||
* \brief Class that implements carrier wipe-off and correlators.
|
||||
*/
|
||||
class cpu_multicorrelator_real_codes
|
||||
{
|
||||
public:
|
||||
cpu_multicorrelator_real_codes();
|
||||
~cpu_multicorrelator_real_codes();
|
||||
bool init(int max_signal_length_samples, int n_correlators);
|
||||
bool set_local_code_and_taps(int code_length_chips, const float* local_code_in, float *shifts_chips);
|
||||
bool set_input_output_vectors(std::complex<float>* corr_out, const std::complex<float>* sig_in);
|
||||
void update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips);
|
||||
bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float rem_code_phase_chips, float code_phase_step_chips, int signal_length_samples);
|
||||
bool free();
|
||||
|
||||
private:
|
||||
// Allocate the device input vectors
|
||||
const std::complex<float> *d_sig_in;
|
||||
float **d_local_codes_resampled;
|
||||
const float *d_local_code_in;
|
||||
std::complex<float> *d_corr_out;
|
||||
float *d_shifts_chips;
|
||||
int d_code_length_chips;
|
||||
int d_n_correlators;
|
||||
};
|
||||
|
||||
|
||||
#endif /* CPU_MULTICORRELATOR_REAL_CODES_H_ */
|
||||
|
@ -116,6 +116,7 @@ DECLARE_string(log_dir);
|
||||
#include "unit-tests/signal-processing-blocks/tracking/galileo_e5a_tracking_test.cc"
|
||||
#include "unit-tests/signal-processing-blocks/tracking/tracking_loop_filter_test.cc"
|
||||
#include "unit-tests/signal-processing-blocks/tracking/cpu_multicorrelator_test.cc"
|
||||
#include "unit-tests/signal-processing-blocks/tracking/cpu_multicorrelator_real_codes_test.cc"
|
||||
|
||||
#if CUDA_BLOCKS_TEST
|
||||
#include "unit-tests/signal-processing-blocks/tracking/gpu_multicorrelator_test.cc"
|
||||
|
@ -0,0 +1,173 @@
|
||||
/*!
|
||||
* \file cpu_multicorrelator_real_codes_test.cc
|
||||
* \brief This file implements timing tests for the FFT.
|
||||
* \author Carles Fernandez-Prades, 2016. cfernandez(at)cttc.es
|
||||
*
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2016 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include <chrono>
|
||||
#include <complex>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
#include <gtest/gtest.h>
|
||||
#include <gflags/gflags.h>
|
||||
#include <gnuradio/gr_complex.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include "cpu_multicorrelator_real_codes.h"
|
||||
#include "gps_sdr_signal_processing.h"
|
||||
#include "GPS_L1_CA.h"
|
||||
|
||||
|
||||
DEFINE_int32(cpu_multicorrelator_real_codes_iterations_test, 1000, "Number of averaged iterations in CPU multicorrelator test timing test");
|
||||
DEFINE_int32(cpu_multicorrelator_real_codes_max_threads_test, 12, "Number of maximum concurrent correlators in CPU multicorrelator test timing test");
|
||||
|
||||
void run_correlator_cpu_real_codes(cpu_multicorrelator_real_codes* correlator,
|
||||
float d_rem_carrier_phase_rad,
|
||||
float d_carrier_phase_step_rad,
|
||||
float d_code_phase_step_chips,
|
||||
float d_rem_code_phase_chips,
|
||||
int correlation_size)
|
||||
{
|
||||
for(int k = 0; k < FLAGS_cpu_multicorrelator_real_codes_iterations_test; k++)
|
||||
{
|
||||
correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad,
|
||||
d_carrier_phase_step_rad,
|
||||
d_code_phase_step_chips,
|
||||
d_rem_code_phase_chips,
|
||||
correlation_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(CpuMulticorrelatorRealCodesTest, MeasureExecutionTime)
|
||||
{
|
||||
std::chrono::time_point<std::chrono::system_clock> start, end;
|
||||
std::chrono::duration<double> elapsed_seconds(0);
|
||||
int max_threads = FLAGS_cpu_multicorrelator_real_codes_max_threads_test;
|
||||
std::vector<std::thread> thread_pool;
|
||||
cpu_multicorrelator_real_codes* correlator_pool[max_threads];
|
||||
unsigned int correlation_sizes [3] = { 2048, 4096, 8192};
|
||||
double execution_times [3];
|
||||
|
||||
float* d_ca_code;
|
||||
gr_complex* in_cpu;
|
||||
gr_complex* d_correlator_outs;
|
||||
|
||||
int d_n_correlator_taps = 3;
|
||||
int d_vector_length = correlation_sizes[2]; //max correlation size to allocate all the necessary memory
|
||||
float* d_local_code_shift_chips;
|
||||
|
||||
//allocate host memory
|
||||
// Get space for a vector with the C/A code replica sampled 1x/chip
|
||||
d_ca_code = static_cast<float*>(volk_gnsssdr_malloc(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS) * sizeof(float), volk_gnsssdr_get_alignment()));
|
||||
in_cpu = static_cast<gr_complex*>(volk_gnsssdr_malloc(2 * d_vector_length * sizeof(gr_complex), volk_gnsssdr_get_alignment()));
|
||||
|
||||
// correlator outputs (scalar)
|
||||
d_n_correlator_taps = 3; // Early, Prompt, and Late
|
||||
d_correlator_outs = static_cast<gr_complex*>(volk_gnsssdr_malloc(d_n_correlator_taps*sizeof(gr_complex), volk_gnsssdr_get_alignment()));
|
||||
for (int n = 0; n < d_n_correlator_taps; n++)
|
||||
{
|
||||
d_correlator_outs[n] = gr_complex(0,0);
|
||||
}
|
||||
d_local_code_shift_chips = static_cast<float*>(volk_gnsssdr_malloc(d_n_correlator_taps*sizeof(float), volk_gnsssdr_get_alignment()));
|
||||
// Set TAPs delay values [chips]
|
||||
float d_early_late_spc_chips = 0.5;
|
||||
d_local_code_shift_chips[0] = - d_early_late_spc_chips;
|
||||
d_local_code_shift_chips[1] = 0.0;
|
||||
d_local_code_shift_chips[2] = d_early_late_spc_chips;
|
||||
|
||||
//--- Perform initializations ------------------------------
|
||||
|
||||
//local code resampler on GPU
|
||||
// generate local reference (1 sample per chip)
|
||||
gps_l1_ca_code_gen_float(d_ca_code, 1, 0);
|
||||
// generate inut signal
|
||||
std::random_device r;
|
||||
std::default_random_engine e1(r());
|
||||
std::uniform_real_distribution<float> uniform_dist(0, 1);
|
||||
for (int n = 0; n < 2 * d_vector_length; n++)
|
||||
{
|
||||
in_cpu[n] = std::complex<float>(uniform_dist(e1), uniform_dist(e1));
|
||||
}
|
||||
|
||||
for (int n = 0; n < max_threads; n++)
|
||||
{
|
||||
correlator_pool[n] = new cpu_multicorrelator_real_codes();
|
||||
correlator_pool[n]->init(d_vector_length, d_n_correlator_taps);
|
||||
correlator_pool[n]->set_input_output_vectors(d_correlator_outs, in_cpu);
|
||||
correlator_pool[n]->set_local_code_and_taps(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS), d_ca_code, d_local_code_shift_chips);
|
||||
}
|
||||
|
||||
float d_rem_carrier_phase_rad = 0.0;
|
||||
float d_carrier_phase_step_rad = 0.1;
|
||||
float d_code_phase_step_chips = 0.3;
|
||||
float d_rem_code_phase_chips = 0.4;
|
||||
|
||||
EXPECT_NO_THROW(
|
||||
for(int correlation_sizes_idx = 0; correlation_sizes_idx < 3; correlation_sizes_idx++)
|
||||
{
|
||||
for(int current_max_threads = 1; current_max_threads < (max_threads+1); current_max_threads++)
|
||||
{
|
||||
std::cout << "Running " << current_max_threads << " concurrent correlators" << std::endl;
|
||||
start = std::chrono::system_clock::now();
|
||||
//create the concurrent correlator threads
|
||||
for (int current_thread = 0; current_thread < current_max_threads; current_thread++)
|
||||
{
|
||||
thread_pool.push_back(std::thread(run_correlator_cpu_real_codes,
|
||||
correlator_pool[current_thread],
|
||||
d_rem_carrier_phase_rad,
|
||||
d_carrier_phase_step_rad,
|
||||
d_code_phase_step_chips,
|
||||
d_rem_code_phase_chips,
|
||||
correlation_sizes[correlation_sizes_idx]));
|
||||
}
|
||||
//wait the threads to finish they work and destroy the thread objects
|
||||
for(auto &t : thread_pool)
|
||||
{
|
||||
t.join();
|
||||
}
|
||||
thread_pool.clear();
|
||||
end = std::chrono::system_clock::now();
|
||||
elapsed_seconds = end - start;
|
||||
execution_times[correlation_sizes_idx] = elapsed_seconds.count() / static_cast<double>(FLAGS_cpu_multicorrelator_real_codes_iterations_test);
|
||||
std::cout << "CPU Multicorrelator (real codes) execution time for length=" << correlation_sizes[correlation_sizes_idx]
|
||||
<< " : " << execution_times[correlation_sizes_idx] << " [s]" << std::endl;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
volk_gnsssdr_free(d_local_code_shift_chips);
|
||||
volk_gnsssdr_free(d_correlator_outs);
|
||||
volk_gnsssdr_free(d_ca_code);
|
||||
volk_gnsssdr_free(in_cpu);
|
||||
|
||||
for (int n = 0; n < max_threads; n++)
|
||||
{
|
||||
correlator_pool[n]->free();
|
||||
}
|
||||
}
|
||||
|
@ -33,6 +33,9 @@
|
||||
#include <complex>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
#include <gtest/gtest.h>
|
||||
#include <gflags/gflags.h>
|
||||
#include <gnuradio/gr_complex.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr.h>
|
||||
#include "cpu_multicorrelator.h"
|
||||
#include "gps_sdr_signal_processing.h"
|
||||
|
Loading…
Reference in New Issue
Block a user