1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-06-18 11:09:56 +00:00

- New NCO library for carrier signal generation: Provides a fixed point optimized wrapper for GNU Radio fxp CORDIC and SSE2 floating point implementation ( sse_mathfunc.h implementation). The library is available as nco_lib.h

- Updated Unit Test to benchmark all the current NCO implementations (./run_tests --gtest_filter=Cordic_Test.StandardCIsFasterThanCordic)

-Gps_L1_Ca_Dll_Pll_Optim_Tracking algorithm updated with new NCO library for carrier wipeoff and some other optimizations.



git-svn-id: https://svn.code.sf.net/p/gnss-sdr/code/trunk@281 64b25241-fba3-4117-9849-534c7e92360d
This commit is contained in:
Javier Arribas 2012-11-22 17:43:24 +00:00
parent 54df5928ab
commit 0760fa0f3e
11 changed files with 1581 additions and 234 deletions

View File

@ -0,0 +1,433 @@
; Default configuration file
; You can define your own receiver and invoke it by doing
; gnss-sdr --config_file=my_GNSS_SDR_configuration.conf
;
[GNSS-SDR]
;######### GLOBAL OPTIONS ##################
;internal_fs_hz: Internal signal sampling frequency after the signal conditioning stage [Hz].
GNSS-SDR.internal_fs_hz=4000000
;######### CONTROL_THREAD CONFIG ############
ControlThread.wait_for_flowgraph=false
;######### SIGNAL_SOURCE CONFIG ############
;#implementation: Use [File_Signal_Source] or [UHD_Signal_Source] or [GN3S_Signal_Source] (experimental)
SignalSource.implementation=File_Signal_Source
;#filename: path to file with the captured GNSS signal samples to be processed
SignalSource.filename=/media/DATALOGGER_/signals/Agilent GPS Generator/cap2/agilent_cap2.dat
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
SignalSource.item_type=gr_complex
;#sampling_frequency: Original Signal sampling frequency in [Hz]
SignalSource.sampling_frequency=4000000
;#freq: RF front-end center frequency in [Hz]
SignalSource.freq=1575420000
;#gain: Front-end Gain in [dB]
SignalSource.gain=60
;#subdevice: UHD subdevice specification (for USRP1 use A:0 or B:0)
SignalSource.subdevice=B:0
;#samples: Number of samples to be processed. Notice that 0 indicates the entire file.
SignalSource.samples=250000000
;#repeat: Repeat the processing file. Disable this option in this version
SignalSource.repeat=false
;#dump: Dump the Signal source data to a file. Disable this option in this version
SignalSource.dump=false
SignalSource.dump_filename=../data/signal_source.dat
;#enable_throttle_control: Enabling this option tells the signal source to keep the delay between samples in post processing.
; it helps to not overload the CPU, but the processing time will be longer.
SignalSource.enable_throttle_control=false
;######### SIGNAL_CONDITIONER CONFIG ############
;## It holds blocks to change data type, filter and resample input data.
;#implementation: Use [Pass_Through] or [Signal_Conditioner]
;#[Pass_Through] disables this block and the [DataTypeAdapter], [InputFilter] and [Resampler] blocks
;#[Signal_Conditioner] enables this block. Then you have to configure [DataTypeAdapter], [InputFilter] and [Resampler] blocks
;SignalConditioner.implementation=Signal_Conditioner
SignalConditioner.implementation=Pass_Through
;######### DATA_TYPE_ADAPTER CONFIG ############
;## Changes the type of input data. Please disable it in this version.
;#implementation: [Pass_Through] disables this block
DataTypeAdapter.implementation=Pass_Through
;######### INPUT_FILTER CONFIG ############
;## Filter the input data. Can be combined with frequency translation for IF signals
;#implementation: Use [Pass_Through] or [Fir_Filter] or [Freq_Xlating_Fir_Filter]
;#[Pass_Through] disables this block
;#[Fir_Filter] enables a FIR Filter
;#[Freq_Xlating_Fir_Filter] enables FIR filter and a composite frequency translation that shifts IF down to zero Hz.
;InputFilter.implementation=Fir_Filter
;InputFilter.implementation=Freq_Xlating_Fir_Filter
InputFilter.implementation=Pass_Through
;#dump: Dump the filtered data to a file.
InputFilter.dump=false
;#dump_filename: Log path and filename.
InputFilter.dump_filename=../data/input_filter.dat
;#The following options are used in the filter design of Fir_Filter and Freq_Xlating_Fir_Filter implementation.
;#These options are based on parameters of gnuradio's function: gr_remez.
;#These function calculates the optimal (in the Chebyshev/minimax sense) FIR filter inpulse reponse given a set of band edges, the desired reponse on those bands, and the weight given to the error in those bands.
;#input_item_type: Type and resolution for input signal samples. Use only gr_complex in this version.
InputFilter.input_item_type=gr_complex
;#outut_item_type: Type and resolution for output filtered signal samples. Use only gr_complex in this version.
InputFilter.output_item_type=gr_complex
;#taps_item_type: Type and resolution for the taps of the filter. Use only float in this version.
InputFilter.taps_item_type=float
;#number_of_taps: Number of taps in the filter. Increasing this parameter increases the processing time
InputFilter.number_of_taps=5
;#number_of _bands: Number of frequency bands in the filter.
InputFilter.number_of_bands=2
;#bands: frequency at the band edges [ b1 e1 b2 e2 b3 e3 ...].
;#Frequency is in the range [0, 1], with 1 being the Nyquist frequency (Fs/2)
;#The number of band_begin and band_end elements must match the number of bands
InputFilter.band1_begin=0.0
InputFilter.band1_end=0.45
InputFilter.band2_begin=0.55
InputFilter.band2_end=1.0
;#ampl: desired amplitude at the band edges [ a(b1) a(e1) a(b2) a(e2) ...].
;#The number of ampl_begin and ampl_end elements must match the number of bands
InputFilter.ampl1_begin=1.0
InputFilter.ampl1_end=1.0
InputFilter.ampl2_begin=0.0
InputFilter.ampl2_end=0.0
;#band_error: weighting applied to each band (usually 1).
;#The number of band_error elements must match the number of bands
InputFilter.band1_error=1.0
InputFilter.band2_error=1.0
;#filter_type: one of "bandpass", "hilbert" or "differentiator"
InputFilter.filter_type=bandpass
;#grid_density: determines how accurately the filter will be constructed.
;The minimum value is 16; higher values are slower to compute the filter.
InputFilter.grid_density=16
;#The following options are used only in Freq_Xlating_Fir_Filter implementation.
;#InputFilter.IF is the intermediate frequency (in Hz) shifted down to zero Hz
InputFilter.sampling_frequency=4000000
InputFilter.IF=0
;######### RESAMPLER CONFIG ############
;## Resamples the input data.
;#implementation: Use [Pass_Through] or [Direct_Resampler]
;#[Pass_Through] disables this block
;#[Direct_Resampler] enables a resampler that implements a nearest neigbourhood interpolation
;Resampler.implementation=Direct_Resampler
Resampler.implementation=Pass_Through
;#dump: Dump the resamplered data to a file.
Resampler.dump=false
;#dump_filename: Log path and filename.
Resampler.dump_filename=../data/resampler.dat
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
Resampler.item_type=gr_complex
;#sample_freq_in: the sample frequency of the input signal
Resampler.sample_freq_in=8000000
;#sample_freq_out: the desired sample frequency of the output signal
Resampler.sample_freq_out=4000000
;######### CHANNELS GLOBAL CONFIG ############
;#count: Number of available satellite channels.
Channels.count=5
;#in_acquisition: Number of channels simultaneously acquiring
Channels.in_acquisition=1
;######### CHANNEL 0 CONFIG ############
;#system: GPS, GLONASS, GALILEO, SBAS or COMPASS
;#if the option is disabled by default is assigned GPS
Channel0.system=GPS
;#signal:
;# "1C" GPS L1 C/A
;# "1P" GPS L1 P
;# "1W" GPS L1 Z-tracking and similar (AS on)
;# "1Y" GPS L1 Y
;# "1M" GPS L1 M
;# "1N" GPS L1 codeless
;# "2C" GPS L2 C/A
;# "2D" GPS L2 L1(C/A)+(P2-P1) semi-codeless
;# "2S" GPS L2 L2C (M)
;# "2L" GPS L2 L2C (L)
;# "2X" GPS L2 L2C (M+L)
;# "2P" GPS L2 P
;# "2W" GPS L2 Z-tracking and similar (AS on)
;# "2Y" GPS L2 Y
;# "2M" GPS GPS L2 M
;# "2N" GPS L2 codeless
;# "5I" GPS L5 I
;# "5Q" GPS L5 Q
;# "5X" GPS L5 I+Q
;# "1C" GLONASS G1 C/A
;# "1P" GLONASS G1 P
;# "2C" GLONASS G2 C/A (Glonass M)
;# "2P" GLONASS G2 P
;# "1A" GALILEO E1 A (PRS)
;# "1B" GALILEO E1 B (I/NAV OS/CS/SoL)
;# "1C" GALILEO E1 C (no data)
;# "1X" GALILEO E1 B+C
;# "1Z" GALILEO E1 A+B+C
;# "5I" GALILEO E5a I (F/NAV OS)
;# "5Q" GALILEO E5a Q (no data)
;# "5X" GALILEO E5a I+Q
;# "7I" GALILEO E5b I
;# "7Q" GALILEO E5b Q
;# "7X" GALILEO E5b I+Q
;# "8I" GALILEO E5 I
;# "8Q" GALILEO E5 Q
;# "8X" GALILEO E5 I+Q
;# "6A" GALILEO E6 A
;# "6B" GALILEO E6 B
;# "6C" GALILEO E6 C
;# "6X" GALILEO E6 B+C
;# "6Z" GALILEO E6 A+B+C
;# "1C" SBAS L1 C/A
;# "5I" SBAS L5 I
;# "5Q" SBAS L5 Q
;# "5X" SBAS L5 I+Q
;# "2I" COMPASS E2 I
;# "2Q" COMPASS E2 Q
;# "2X" COMPASS E2 IQ
;# "7I" COMPASS E5b I
;# "7Q" COMPASS E5b Q
;# "7X" COMPASS E5b IQ
;# "6I" COMPASS E6 I
;# "6Q" COMPASS E6 Q
;# "6X" COMPASS E6 IQ
;#if the option is disabled by default is assigned "1C" GPS L1 C/A
Channel0.signal=1C
;#satellite: Satellite PRN ID for this channel. Disable this option to random search
Channel0.satellite=15
Channel0.repeat_satellite=false
;######### CHANNEL 1 CONFIG ############
Channel1.system=GPS
Channel1.signal=1C
Channel1.satellite=18
Channel1.repeat_satellite=false
;######### CHANNEL 2 CONFIG ############
Channel2.system=GPS
Channel2.signal=1C
Channel2.satellite=16
Channel2.repeat_satellite=false
;######### CHANNEL 3 CONFIG ############
Channel3.system=GPS
Channel3.signal=1C
Channel3.satellite=21
Channel3.repeat_satellite=false
;######### CHANNEL 4 CONFIG ############
Channel4.system=GPS
Channel4.signal=1C
Channel4.satellite=3
Channel4.repeat_satellite=false
;######### CHANNEL 5 CONFIG ############
Channel5.system=GPS
Channel5.signal=1C
;Channel5.satellite=21
;Channel5.repeat_satellite=false
;######### ACQUISITION GLOBAL CONFIG ############
;#dump: Enable or disable the acquisition internal data file logging [true] or [false]
Acquisition.dump=false
;#filename: Log path and filename
Acquisition.dump_filename=./acq_dump.dat
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
Acquisition.item_type=gr_complex
;#if: Signal intermediate frequency in [Hz]
Acquisition.if=0
;#sampled_ms: Signal block duration for the acquisition signal detection [ms]
Acquisition.sampled_ms=1
;######### ACQUISITION CHANNELS CONFIG ######
;######### ACQUISITION CH 0 CONFIG ############
;#implementation: Acquisition algorithm selection for this channel: [GPS_L1_CA_PCPS_Acquisition]
Acquisition0.implementation=GPS_L1_CA_PCPS_Acquisition
;#threshold: Acquisition threshold
Acquisition0.threshold=70
;#doppler_max: Maximum expected Doppler shift [Hz]
Acquisition0.doppler_max=10000
;#doppler_max: Doppler step in the grid search [Hz]
Acquisition0.doppler_step=250
;#repeat_satellite: Use only jointly with the satellte PRN ID option.
;######### ACQUISITION CH 1 CONFIG ############
Acquisition1.implementation=GPS_L1_CA_PCPS_Acquisition
Acquisition1.threshold=70
Acquisition1.doppler_max=10000
Acquisition1.doppler_step=250
;######### ACQUISITION CH 2 CONFIG ############
Acquisition2.implementation=GPS_L1_CA_PCPS_Acquisition
Acquisition2.threshold=70
Acquisition2.doppler_max=10000
Acquisition2.doppler_step=250
;######### ACQUISITION CH 3 CONFIG ############
Acquisition3.implementation=GPS_L1_CA_PCPS_Acquisition
Acquisition3.threshold=70
Acquisition3.doppler_max=10000
Acquisition3.doppler_step=250
;######### ACQUISITION CH 4 CONFIG ############
Acquisition4.implementation=GPS_L1_CA_PCPS_Acquisition
Acquisition4.threshold=70
Acquisition4.doppler_max=10000
Acquisition4.doppler_step=250
;######### ACQUISITION CH 5 CONFIG ############
Acquisition5.implementation=GPS_L1_CA_PCPS_Acquisition
Acquisition5.threshold=50
Acquisition5.doppler_max=10000
Acquisition5.doppler_step=250
;######### ACQUISITION CH 6 CONFIG ############
Acquisition6.implementation=GPS_L1_CA_PCPS_Acquisition
Acquisition6.threshold=70
Acquisition6.doppler_max=10000
Acquisition6.doppler_step=250
;######### ACQUISITION CH 7 CONFIG ############
Acquisition7.implementation=GPS_L1_CA_PCPS_Acquisition
Acquisition7.threshold=70
Acquisition7.doppler_max=10000
Acquisition7.doppler_step=250
;######### ACQUISITION CH 8 CONFIG ############
Acquisition8.implementation=GPS_L1_CA_PCPS_Acquisition
Acquisition8.threshold=70
Acquisition8.doppler_max=10000
Acquisition8.doppler_step=250
;######### TRACKING GLOBAL CONFIG ############
;#implementation: Selected tracking algorithm: [GPS_L1_CA_DLL_PLL_Tracking] or [GPS_L1_CA_DLL_FLL_PLL_Tracking]
Tracking.implementation=GPS_L1_CA_DLL_PLL_Optim_Tracking
;#item_type: Type and resolution for each of the signal samples. Use only [gr_complex] in this version.
Tracking.item_type=gr_complex
;#sampling_frequency: Signal Intermediate Frequency in [Hz]
Tracking.if=0
;#dump: Enable or disable the Tracking internal binary data file logging [true] or [false]
Tracking.dump=false
;#dump_filename: Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
Tracking.dump_filename=./tracking_ch_
;#pll_bw_hz: PLL loop filter bandwidth [Hz]
Tracking.pll_bw_hz=50.0;
;#dll_bw_hz: DLL loop filter bandwidth [Hz]
Tracking.dll_bw_hz=2.0;
;#fll_bw_hz: FLL loop filter bandwidth [Hz]
Tracking.fll_bw_hz=10.0;
;#order: PLL/DLL loop filter order [2] or [3]
Tracking.order=3;
;#early_late_space_chips: correlator early-late space [chips]. Use [0.5]
Tracking.early_late_space_chips=0.5;
;######### TELEMETRY DECODER CONFIG ############
;#implementation: Use [GPS_L1_CA_Telemetry_Decoder] for GPS L1 C/A.
TelemetryDecoder.implementation=GPS_L1_CA_Telemetry_Decoder
TelemetryDecoder.dump=false
;######### OBSERVABLES CONFIG ############
;#implementation: Use [GPS_L1_CA_Observables] for GPS L1 C/A.
Observables.implementation=GPS_L1_CA_Observables
;#dump: Enable or disable the Observables internal binary data file logging [true] or [false]
Observables.dump=false
;#dump_filename: Log path and filename.
Observables.dump_filename=./observables.dat
;######### PVT CONFIG ############
;#implementation: Position Velocity and Time (PVT) implementation algorithm: Use [GPS_L1_CA_PVT] in this version.
PVT.implementation=GPS_L1_CA_PVT
;#averaging_depth: Number of PVT observations in the moving average algorithm
PVT.averaging_depth=10
;#flag_average: Enables the PVT averaging between output intervals (arithmetic mean) [true] or [false]
PVT.flag_averaging=true
;#output_rate_ms: Period between two PVT outputs. Notice that the minimum period is equal to the tracking integration time (for GPS CA L1 is 1ms) [ms]
PVT.output_rate_ms=100;
;#display_rate_ms: Position console print (std::out) interval [ms]. Notice that output_rate_ms<=display_rate_ms.
PVT.display_rate_ms=500;
;#dump: Enable or disable the PVT internal binary data file logging [true] or [false]
PVT.dump=false
;#dump_filename: Log path and filename without extension. Notice that PVT will add ".dat" to the binary dump and ".kml" to GoogleEarth dump.
PVT.dump_filename=./PVT
;######### OUTPUT_FILTER CONFIG ############
;# Receiver output filter: Leave this block disabled in this version
OutputFilter.implementation=Null_Sink_Output_Filter
OutputFilter.filename=data/gnss-sdr.dat
OutputFilter.item_type=gr_complex

View File

@ -45,7 +45,7 @@ project
project : requirements
<define>OMNITHREAD_POSIX
<cxxflags>"-std=c++0x -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free"
<cxxflags>"-msse2 -mfpmath=sse -std=c++0x -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free"
<linkflags>"-larmadillo -lboost_system -lboost_filesystem -lboost_thread -lboost_date_time -llapack -lblas -lprofiler -ltcmalloc -lvolk"
<include>src/algorithms/acquisition/adapters
<include>src/algorithms/acquisition/gnuradio_blocks

View File

@ -5,6 +5,8 @@ obj gps_sdr_signal_processing : gps_sdr_signal_processing.cc ;
obj galileo_e1_signal_processing : galileo_e1_signal_processing.cc ;
obj gnss_sdr_valve : gnss_sdr_valve.cc ;
obj pass_through : pass_through.cc ;
obj nco_lib : nco_lib.cc ;
#obj gps_sdr_fft : gps_sdr_fft.cc : <toolset>darwin:<define>NO_SIMD <toolset>gcc:<define>USE_SIMD ;
#obj gps_sdr_simd : gps_sdr_simd.cc : <toolset>darwin:<define>NO_SIMD <toolset>gcc:<define>USE_SIMD ;
#obj gps_sdr_x86 : gps_sdr_x86.cc ;

View File

@ -0,0 +1,122 @@
/*!
* \file nco_lib.cc
* \brief A set of Numeric Controlled Oscillator (NCO) functions to generate the carrier wipeoff signal,
* regardless of system used
*
* \author Javier Arribas 2012, jarribas(at)cttc.es
*
* Detailed description of the file here if needed.
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2012 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#include "nco_lib.h"
typedef ALIGN16_BEG union {
float f[4];
int i[4];
v4sf v;
} ALIGN16_END V4SF;
void sse_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad)
{
//SSE NCO
int sse_loops_four_op;
int remnant_ops;
sse_loops_four_op=(int)n_samples/4;
remnant_ops=n_samples%4;
V4SF vx, sin4, cos4;
float phase_rad;
phase_rad=start_phase_rad;
int index=0;
for(int i=0;i<sse_loops_four_op;i++)
{
vx.f[0]=phase_rad;
phase_rad=phase_rad+phase_step_rad;
vx.f[1]=phase_rad;
phase_rad=phase_rad+phase_step_rad;
vx.f[2]=phase_rad;
phase_rad=phase_rad+phase_step_rad;
vx.f[3]=phase_rad;
phase_rad=phase_rad+phase_step_rad;
sincos_ps(vx.v, &sin4.v, &cos4.v);
dest[index] = std::complex<float>(cos4.f[0], -sin4.f[0]);
index++;
dest[index] = std::complex<float>(cos4.f[1], -sin4.f[1]);
index++;
dest[index] = std::complex<float>(cos4.f[2], -sin4.f[2]);
index++;
dest[index] = std::complex<float>(cos4.f[3], -sin4.f[3]);
index++;
}
for(int i=0;i<remnant_ops;i++)
{
vx.f[i]=phase_rad;
phase_rad=phase_rad+phase_step_rad;
}
sincos_ps(vx.v, &sin4.v, &cos4.v);
for(int i=0;i<remnant_ops;i++)
{
dest[index] = std::complex<float>(cos4.f[i], -sin4.f[i]);
index++;
}
}
void fxp_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad)
{
int phase_rad_i;
phase_rad_i=gr_fxpt::float_to_fixed(start_phase_rad);
int phase_step_rad_i;
phase_step_rad_i=gr_fxpt::float_to_fixed(phase_step_rad);
float sin_f,cos_f;
for(int i = 0; i < n_samples; i++)
{
//using temp variables
gr_fxpt::sincos(phase_rad_i,&sin_f,&cos_f);
dest[i] = gr_complex(cos_f, -sin_f);
//using references (may be it can be a problem for c++11 standard
//gr_fxpt::sincos(phase_rad_i,&d_carr_sign[i].imag(),&d_carr_sign[i].real());
phase_rad_i += phase_step_rad_i;
}
}
void std_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad)
{
float phase_rad;
phase_rad=start_phase_rad;
for(int i = 0; i < n_samples; i++)
{
// Using std::cos and std::sin
dest[i] = gr_complex(std::cos(phase_rad), -std::sin(phase_rad));
phase_rad=phase_rad+phase_step_rad;
}
}

View File

@ -0,0 +1,55 @@
/*!
* \file nco_lib.h
* \brief A set of Numeric Controlled Oscillator (NCO) functions to generate the carrier wipeoff signal,
* regardless of system used
*
* \author Javier Arribas 2012, jarribas(at)cttc.es
*
* Detailed description of the file here if needed.
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2012 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
/*!
* \brief Implements a conjugate complex exponential vector in std::complex<float> *d_carr_sign
* containing int n_samples, with the starting phase .
*
*/
#ifndef NCO_LIB_CC_H
#define NCO_LIB_CC_H
#include <gr_fxpt.h>
#include <xmmintrin.h>
#include <sse_mathfun.h>
#include <cmath>
void sse_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad);
void fxp_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad);
void std_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad);
#endif //NCO_LIB_CC_H

View File

@ -0,0 +1,766 @@
/* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log
Inspired by Intel Approximate Math library, and based on the
corresponding algorithms of the cephes math library
The default is to use the SSE1 version. If you define USE_SSE2 the
the SSE2 intrinsics will be used in place of the MMX intrinsics. Do
not expect any significant performance improvement with SSE2.
*/
/* Copyright (C) 2007 Julien Pommier
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
(this is the zlib license)
*/
#ifndef SSE_MATHFUN_H
#define SSE_MATHFUN_H
#include <xmmintrin.h>
#define USE_SSE2
/* yes I know, the top of this file is quite ugly */
#ifdef _MSC_VER /* visual c++ */
# define ALIGN16_BEG __declspec(align(16))
# define ALIGN16_END
#else /* gcc or icc */
# define ALIGN16_BEG
# define ALIGN16_END __attribute__((aligned(16)))
#endif
/* __m128 is ugly to write */
typedef __m128 v4sf; // vector of 4 float (sse1)
#ifdef USE_SSE2
# include <emmintrin.h>
typedef __m128i v4si; // vector of 4 int (sse2)
#else
typedef __m64 v2si; // vector of 2 int (mmx)
#endif
/* declare some SSE constants -- why can't I figure a better way to do that? */
#define _PS_CONST(Name, Val) \
static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
#define _PI32_CONST(Name, Val) \
static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
#define _PS_CONST_TYPE(Name, Type, Val) \
static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
_PS_CONST(1 , 1.0f);
_PS_CONST(0p5, 0.5f);
/* the smallest non denormalized float number */
_PS_CONST_TYPE(min_norm_pos, int, 0x00800000);
_PS_CONST_TYPE(mant_mask, int, 0x7f800000);
_PS_CONST_TYPE(inv_mant_mask, int, ~0x7f800000);
_PS_CONST_TYPE(sign_mask, int, 0x80000000);
_PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000);
_PI32_CONST(1, 1);
_PI32_CONST(inv1, ~1);
_PI32_CONST(2, 2);
_PI32_CONST(4, 4);
_PI32_CONST(0x7f, 0x7f);
_PS_CONST(cephes_SQRTHF, 0.707106781186547524);
_PS_CONST(cephes_log_p0, 7.0376836292E-2);
_PS_CONST(cephes_log_p1, - 1.1514610310E-1);
_PS_CONST(cephes_log_p2, 1.1676998740E-1);
_PS_CONST(cephes_log_p3, - 1.2420140846E-1);
_PS_CONST(cephes_log_p4, + 1.4249322787E-1);
_PS_CONST(cephes_log_p5, - 1.6668057665E-1);
_PS_CONST(cephes_log_p6, + 2.0000714765E-1);
_PS_CONST(cephes_log_p7, - 2.4999993993E-1);
_PS_CONST(cephes_log_p8, + 3.3333331174E-1);
_PS_CONST(cephes_log_q1, -2.12194440e-4);
_PS_CONST(cephes_log_q2, 0.693359375);
#if defined (__MINGW32__)
/* the ugly part below: many versions of gcc used to be completely buggy with respect to some intrinsics
The movehl_ps is fixed in mingw 3.4.5, but I found out that all the _mm_cmp* intrinsics were completely
broken on my mingw gcc 3.4.5 ...
Note that the bug on _mm_cmp* does occur only at -O0 optimization level
*/
inline __m128 my_movehl_ps(__m128 a, const __m128 b) {
asm (
"movhlps %2,%0\n\t"
: "=x" (a)
: "0" (a), "x"(b)
);
return a; }
#warning "redefined _mm_movehl_ps (see gcc bug 21179)"
#define _mm_movehl_ps my_movehl_ps
inline __m128 my_cmplt_ps(__m128 a, const __m128 b) {
asm (
"cmpltps %2,%0\n\t"
: "=x" (a)
: "0" (a), "x"(b)
);
return a;
}
inline __m128 my_cmpgt_ps(__m128 a, const __m128 b) {
asm (
"cmpnleps %2,%0\n\t"
: "=x" (a)
: "0" (a), "x"(b)
);
return a;
}
inline __m128 my_cmpeq_ps(__m128 a, const __m128 b) {
asm (
"cmpeqps %2,%0\n\t"
: "=x" (a)
: "0" (a), "x"(b)
);
return a;
}
#warning "redefined _mm_cmpxx_ps functions..."
#define _mm_cmplt_ps my_cmplt_ps
#define _mm_cmpgt_ps my_cmpgt_ps
#define _mm_cmpeq_ps my_cmpeq_ps
#endif
#ifndef USE_SSE2
typedef union xmm_mm_union {
__m128 xmm;
__m64 mm[2];
} xmm_mm_union;
#define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) { \
xmm_mm_union u; u.xmm = xmm_; \
mm0_ = u.mm[0]; \
mm1_ = u.mm[1]; \
}
#define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { \
xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; \
}
#endif // USE_SSE2
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
inline v4sf log_ps(v4sf x) {
#ifdef USE_SSE2
v4si emm0;
#else
v2si mm0, mm1;
#endif
v4sf one = *(v4sf*)_ps_1;
v4sf invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
x = _mm_max_ps(x, *(v4sf*)_ps_min_norm_pos); /* cut off denormalized stuff */
#ifndef USE_SSE2
/* part 1: x = frexpf(x, &e); */
COPY_XMM_TO_MM(x, mm0, mm1);
mm0 = _mm_srli_pi32(mm0, 23);
mm1 = _mm_srli_pi32(mm1, 23);
#else
emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
#endif
/* keep only the fractional part */
x = _mm_and_ps(x, *(v4sf*)_ps_inv_mant_mask);
x = _mm_or_ps(x, *(v4sf*)_ps_0p5);
#ifndef USE_SSE2
/* now e=mm0:mm1 contain the really base-2 exponent */
mm0 = _mm_sub_pi32(mm0, *(v2si*)_pi32_0x7f);
mm1 = _mm_sub_pi32(mm1, *(v2si*)_pi32_0x7f);
v4sf e = _mm_cvtpi32x2_ps(mm0, mm1);
_mm_empty(); /* bye bye mmx */
#else
emm0 = _mm_sub_epi32(emm0, *(v4si*)_pi32_0x7f);
v4sf e = _mm_cvtepi32_ps(emm0);
#endif
e = _mm_add_ps(e, one);
/* part2:
if( x < SQRTHF ) {
e -= 1;
x = x + x - 1.0;
} else { x = x - 1.0; }
*/
v4sf mask = _mm_cmplt_ps(x, *(v4sf*)_ps_cephes_SQRTHF);
v4sf tmp = _mm_and_ps(x, mask);
x = _mm_sub_ps(x, one);
e = _mm_sub_ps(e, _mm_and_ps(one, mask));
x = _mm_add_ps(x, tmp);
v4sf z = _mm_mul_ps(x,x);
v4sf y = *(v4sf*)_ps_cephes_log_p0;
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p1);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p2);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p3);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p4);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p5);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p6);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p7);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p8);
y = _mm_mul_ps(y, x);
y = _mm_mul_ps(y, z);
tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q1);
y = _mm_add_ps(y, tmp);
tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
y = _mm_sub_ps(y, tmp);
tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q2);
x = _mm_add_ps(x, y);
x = _mm_add_ps(x, tmp);
x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
return x;
}
_PS_CONST(exp_hi, 88.3762626647949f);
_PS_CONST(exp_lo, -88.3762626647949f);
_PS_CONST(cephes_LOG2EF, 1.44269504088896341);
_PS_CONST(cephes_exp_C1, 0.693359375);
_PS_CONST(cephes_exp_C2, -2.12194440e-4);
_PS_CONST(cephes_exp_p0, 1.9875691500E-4);
_PS_CONST(cephes_exp_p1, 1.3981999507E-3);
_PS_CONST(cephes_exp_p2, 8.3334519073E-3);
_PS_CONST(cephes_exp_p3, 4.1665795894E-2);
_PS_CONST(cephes_exp_p4, 1.6666665459E-1);
_PS_CONST(cephes_exp_p5, 5.0000001201E-1);
inline v4sf exp_ps(v4sf x) {
v4sf tmp = _mm_setzero_ps(), fx;
#ifdef USE_SSE2
v4si emm0;
#else
v2si mm0, mm1;
#endif
v4sf one = *(v4sf*)_ps_1;
x = _mm_min_ps(x, *(v4sf*)_ps_exp_hi);
x = _mm_max_ps(x, *(v4sf*)_ps_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */
fx = _mm_mul_ps(x, *(v4sf*)_ps_cephes_LOG2EF);
fx = _mm_add_ps(fx, *(v4sf*)_ps_0p5);
/* how to perform a floorf with SSE: just below */
#ifndef USE_SSE2
/* step 1 : cast to int */
tmp = _mm_movehl_ps(tmp, fx);
mm0 = _mm_cvttps_pi32(fx);
mm1 = _mm_cvttps_pi32(tmp);
/* step 2 : cast back to float */
tmp = _mm_cvtpi32x2_ps(mm0, mm1);
#else
emm0 = _mm_cvttps_epi32(fx);
tmp = _mm_cvtepi32_ps(emm0);
#endif
/* if greater, substract 1 */
v4sf mask = _mm_cmpgt_ps(tmp, fx);
mask = _mm_and_ps(mask, one);
fx = _mm_sub_ps(tmp, mask);
tmp = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C1);
v4sf z = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C2);
x = _mm_sub_ps(x, tmp);
x = _mm_sub_ps(x, z);
z = _mm_mul_ps(x,x);
v4sf y = *(v4sf*)_ps_cephes_exp_p0;
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p1);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p2);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p3);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p4);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p5);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, x);
y = _mm_add_ps(y, one);
/* build 2^n */
#ifndef USE_SSE2
z = _mm_movehl_ps(z, fx);
mm0 = _mm_cvttps_pi32(fx);
mm1 = _mm_cvttps_pi32(z);
mm0 = _mm_add_pi32(mm0, *(v2si*)_pi32_0x7f);
mm1 = _mm_add_pi32(mm1, *(v2si*)_pi32_0x7f);
mm0 = _mm_slli_pi32(mm0, 23);
mm1 = _mm_slli_pi32(mm1, 23);
v4sf pow2n;
COPY_MM_TO_XMM(mm0, mm1, pow2n);
_mm_empty();
#else
emm0 = _mm_cvttps_epi32(fx);
emm0 = _mm_add_epi32(emm0, *(v4si*)_pi32_0x7f);
emm0 = _mm_slli_epi32(emm0, 23);
v4sf pow2n = _mm_castsi128_ps(emm0);
#endif
y = _mm_mul_ps(y, pow2n);
return y;
}
_PS_CONST(minus_cephes_DP1, -0.78515625);
_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
_PS_CONST(sincof_p0, -1.9515295891E-4);
_PS_CONST(sincof_p1, 8.3321608736E-3);
_PS_CONST(sincof_p2, -1.6666654611E-1);
_PS_CONST(coscof_p0, 2.443315711809948E-005);
_PS_CONST(coscof_p1, -1.388731625493765E-003);
_PS_CONST(coscof_p2, 4.166664568298827E-002);
_PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
/* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
it runs also on old athlons XPs and the pentium III of your grand
mother.
The code is the exact rewriting of the cephes sinf function.
Precision is excellent as long as x < 8192 (I did not bother to
take into account the special handling they have for greater values
-- it does not return garbage for arguments over 8192, though, but
the extra precision is missing).
Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
surprising but correct result.
Performance is also surprisingly good, 1.33 times faster than the
macos vsinf SSE2 function, and 1.5 times faster than the
__vrs4_sinf of amd's ACML (which is only available in 64 bits). Not
too bad for an SSE1 function (with no special tuning) !
However the latter libraries probably have a much better handling of NaN,
Inf, denormalized and other special arguments..
On my core 1 duo, the execution of this function takes approximately 95 cycles.
From what I have observed on the experiments with Intel AMath lib, switching to an
SSE2 version would improve the perf by only 10%.
Since it is based on SSE intrinsics, it has to be compiled at -O2 to
deliver full speed.
*/
inline v4sf sin_ps(v4sf x) { // any x
v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
#ifdef USE_SSE2
v4si emm0, emm2;
#else
v2si mm0, mm1, mm2, mm3;
#endif
sign_bit = x;
/* take the absolute value */
x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
/* extract the sign bit (upper one) */
sign_bit = _mm_and_ps(sign_bit, *(v4sf*)_ps_sign_mask);
/* scale by 4/Pi */
y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
//printf("plop:"); print4(y);
#ifdef USE_SSE2
/* store the integer part of y in mm0 */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
y = _mm_cvtepi32_ps(emm2);
/* get the swap sign flag */
emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
emm0 = _mm_slli_epi32(emm0, 29);
/* get the polynom selection mask
there is one polynom for 0 <= x <= Pi/4
and another one for Pi/4<x<=Pi/2
Both branches will be computed.
*/
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
v4sf swap_sign_bit = _mm_castsi128_ps(emm0);
v4sf poly_mask = _mm_castsi128_ps(emm2);
sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
#else
/* store the integer part of y in mm0:mm1 */
xmm2 = _mm_movehl_ps(xmm2, y);
mm2 = _mm_cvttps_pi32(y);
mm3 = _mm_cvttps_pi32(xmm2);
/* j=(j+1) & (~1) (see the cephes sources) */
mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
y = _mm_cvtpi32x2_ps(mm2, mm3);
/* get the swap sign flag */
mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
mm0 = _mm_slli_pi32(mm0, 29);
mm1 = _mm_slli_pi32(mm1, 29);
/* get the polynom selection mask */
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
v4sf swap_sign_bit, poly_mask;
COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit);
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
_mm_empty(); /* good-bye mmx */
#endif
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
xmm1 = _mm_mul_ps(y, xmm1);
xmm2 = _mm_mul_ps(y, xmm2);
xmm3 = _mm_mul_ps(y, xmm3);
x = _mm_add_ps(x, xmm1);
x = _mm_add_ps(x, xmm2);
x = _mm_add_ps(x, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
y = *(v4sf*)_ps_coscof_p0;
v4sf z = _mm_mul_ps(x,x);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
y = _mm_mul_ps(y, z);
y = _mm_mul_ps(y, z);
v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
y = _mm_sub_ps(y, tmp);
y = _mm_add_ps(y, *(v4sf*)_ps_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
v4sf y2 = *(v4sf*)_ps_sincof_p0;
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_mul_ps(y2, x);
y2 = _mm_add_ps(y2, x);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
y2 = _mm_and_ps(xmm3, y2); //, xmm3);
y = _mm_andnot_ps(xmm3, y);
y = _mm_add_ps(y,y2);
/* update the sign */
y = _mm_xor_ps(y, sign_bit);
return y;
}
/* almost the same as sin_ps */
inline v4sf cos_ps(v4sf x) { // any x
v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
#ifdef USE_SSE2
v4si emm0, emm2;
#else
v2si mm0, mm1, mm2, mm3;
#endif
/* take the absolute value */
x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
/* scale by 4/Pi */
y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
#ifdef USE_SSE2
/* store the integer part of y in mm0 */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
y = _mm_cvtepi32_ps(emm2);
emm2 = _mm_sub_epi32(emm2, *(v4si*)_pi32_2);
/* get the swap sign flag */
emm0 = _mm_andnot_si128(emm2, *(v4si*)_pi32_4);
emm0 = _mm_slli_epi32(emm0, 29);
/* get the polynom selection mask */
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
v4sf sign_bit = _mm_castsi128_ps(emm0);
v4sf poly_mask = _mm_castsi128_ps(emm2);
#else
/* store the integer part of y in mm0:mm1 */
xmm2 = _mm_movehl_ps(xmm2, y);
mm2 = _mm_cvttps_pi32(y);
mm3 = _mm_cvttps_pi32(xmm2);
/* j=(j+1) & (~1) (see the cephes sources) */
mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
y = _mm_cvtpi32x2_ps(mm2, mm3);
mm2 = _mm_sub_pi32(mm2, *(v2si*)_pi32_2);
mm3 = _mm_sub_pi32(mm3, *(v2si*)_pi32_2);
/* get the swap sign flag in mm0:mm1 and the
polynom selection mask in mm2:mm3 */
mm0 = _mm_andnot_si64(mm2, *(v2si*)_pi32_4);
mm1 = _mm_andnot_si64(mm3, *(v2si*)_pi32_4);
mm0 = _mm_slli_pi32(mm0, 29);
mm1 = _mm_slli_pi32(mm1, 29);
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
v4sf sign_bit, poly_mask;
COPY_MM_TO_XMM(mm0, mm1, sign_bit);
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
_mm_empty(); /* good-bye mmx */
#endif
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
xmm1 = _mm_mul_ps(y, xmm1);
xmm2 = _mm_mul_ps(y, xmm2);
xmm3 = _mm_mul_ps(y, xmm3);
x = _mm_add_ps(x, xmm1);
x = _mm_add_ps(x, xmm2);
x = _mm_add_ps(x, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
y = *(v4sf*)_ps_coscof_p0;
v4sf z = _mm_mul_ps(x,x);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
y = _mm_mul_ps(y, z);
y = _mm_mul_ps(y, z);
v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
y = _mm_sub_ps(y, tmp);
y = _mm_add_ps(y, *(v4sf*)_ps_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
v4sf y2 = *(v4sf*)_ps_sincof_p0;
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_mul_ps(y2, x);
y2 = _mm_add_ps(y2, x);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
y2 = _mm_and_ps(xmm3, y2); //, xmm3);
y = _mm_andnot_ps(xmm3, y);
y = _mm_add_ps(y,y2);
/* update the sign */
y = _mm_xor_ps(y, sign_bit);
return y;
}
/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
it is almost as fast, and gives you a free cosine with your sine */
inline void sincos_ps(v4sf x, v4sf *s, v4sf *c) {
v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
#ifdef USE_SSE2
v4si emm0, emm2, emm4;
#else
v2si mm0, mm1, mm2, mm3, mm4, mm5;
#endif
sign_bit_sin = x;
/* take the absolute value */
x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
/* extract the sign bit (upper one) */
sign_bit_sin = _mm_and_ps(sign_bit_sin, *(v4sf*)_ps_sign_mask);
/* scale by 4/Pi */
y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
#ifdef USE_SSE2
/* store the integer part of y in emm2 */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
y = _mm_cvtepi32_ps(emm2);
emm4 = emm2;
/* get the swap sign flag for the sine */
emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
emm0 = _mm_slli_epi32(emm0, 29);
v4sf swap_sign_bit_sin = _mm_castsi128_ps(emm0);
/* get the polynom selection mask for the sine*/
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
v4sf poly_mask = _mm_castsi128_ps(emm2);
#else
/* store the integer part of y in mm2:mm3 */
xmm3 = _mm_movehl_ps(xmm3, y);
mm2 = _mm_cvttps_pi32(y);
mm3 = _mm_cvttps_pi32(xmm3);
/* j=(j+1) & (~1) (see the cephes sources) */
mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
y = _mm_cvtpi32x2_ps(mm2, mm3);
mm4 = mm2;
mm5 = mm3;
/* get the swap sign flag for the sine */
mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
mm0 = _mm_slli_pi32(mm0, 29);
mm1 = _mm_slli_pi32(mm1, 29);
v4sf swap_sign_bit_sin;
COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit_sin);
/* get the polynom selection mask for the sine */
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
v4sf poly_mask;
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
#endif
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
xmm1 = _mm_mul_ps(y, xmm1);
xmm2 = _mm_mul_ps(y, xmm2);
xmm3 = _mm_mul_ps(y, xmm3);
x = _mm_add_ps(x, xmm1);
x = _mm_add_ps(x, xmm2);
x = _mm_add_ps(x, xmm3);
#ifdef USE_SSE2
emm4 = _mm_sub_epi32(emm4, *(v4si*)_pi32_2);
emm4 = _mm_andnot_si128(emm4, *(v4si*)_pi32_4);
emm4 = _mm_slli_epi32(emm4, 29);
v4sf sign_bit_cos = _mm_castsi128_ps(emm4);
#else
/* get the sign flag for the cosine */
mm4 = _mm_sub_pi32(mm4, *(v2si*)_pi32_2);
mm5 = _mm_sub_pi32(mm5, *(v2si*)_pi32_2);
mm4 = _mm_andnot_si64(mm4, *(v2si*)_pi32_4);
mm5 = _mm_andnot_si64(mm5, *(v2si*)_pi32_4);
mm4 = _mm_slli_pi32(mm4, 29);
mm5 = _mm_slli_pi32(mm5, 29);
v4sf sign_bit_cos;
COPY_MM_TO_XMM(mm4, mm5, sign_bit_cos);
_mm_empty(); /* good-bye mmx */
#endif
sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
v4sf z = _mm_mul_ps(x,x);
y = *(v4sf*)_ps_coscof_p0;
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
y = _mm_mul_ps(y, z);
y = _mm_mul_ps(y, z);
v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
y = _mm_sub_ps(y, tmp);
y = _mm_add_ps(y, *(v4sf*)_ps_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
v4sf y2 = *(v4sf*)_ps_sincof_p0;
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_mul_ps(y2, x);
y2 = _mm_add_ps(y2, x);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
v4sf ysin2 = _mm_and_ps(xmm3, y2);
v4sf ysin1 = _mm_andnot_ps(xmm3, y);
y2 = _mm_sub_ps(y2,ysin2);
y = _mm_sub_ps(y, ysin1);
xmm1 = _mm_add_ps(ysin1,ysin2);
xmm2 = _mm_add_ps(y,y2);
/* update the sign */
*s = _mm_xor_ps(xmm1, sign_bit_sin);
*c = _mm_xor_ps(xmm2, sign_bit_cos);
}
#endif

View File

@ -40,16 +40,15 @@
#include "tracking_discriminators.h"
#include "lock_detectors.h"
#include "GPS_L1_CA.h"
#include "nco_lib.h"
#include "control_message_factory.h"
#include <boost/lexical_cast.hpp>
#include <iostream>
#include <sstream>
#include <cmath>
#include "math.h"
#include <gnuradio/gr_io_signature.h>
#include <glog/log_severity.h>
#include <glog/logging.h>
#include <gr_fxpt.h>
/*!
* \todo Include in definition header file
@ -59,7 +58,6 @@
#define MAXIMUM_LOCK_FAIL_COUNTER 50
#define CARRIER_LOCK_THRESHOLD 0.85
using google::LogMessage;
gps_l1_ca_dll_pll_optim_tracking_cc_sptr
@ -126,9 +124,6 @@ Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(
d_ca_code = new gr_complex[(int)GPS_L1_CA_CODE_LENGTH_CHIPS + 2];
d_carr_sign = new gr_complex[d_vector_length*2];
/* If an array is partitioned for more than one thread to operate on,
* having the sub-array boundaries unaligned to cache lines could lead
* to performance degradation. Here we allocate memory
@ -149,7 +144,7 @@ Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(
//--- Perform initializations ------------------------------
// define initial code frequency basis of NCO
d_code_freq_hz = GPS_L1_CA_CODE_RATE_HZ;
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ;
// define residual code phase (in chips)
d_rem_code_phase_samples = 0.0;
// define residual carrier phase
@ -204,12 +199,12 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking()
float T_chip_mod_seconds;
float T_prn_mod_seconds;
float T_prn_mod_samples;
d_code_freq_hz = radial_velocity * GPS_L1_CA_CODE_RATE_HZ;
T_chip_mod_seconds = 1/d_code_freq_hz;
d_code_freq_chips = radial_velocity * GPS_L1_CA_CODE_RATE_HZ;
T_chip_mod_seconds = 1/d_code_freq_chips;
T_prn_mod_seconds = T_chip_mod_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
T_prn_mod_samples = T_prn_mod_seconds * (float)d_fs_in;
d_next_prn_length_samples = round(T_prn_mod_samples);
d_current_prn_length_samples = round(T_prn_mod_samples);
float T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ;
float T_prn_true_samples = T_prn_true_seconds * (float)d_fs_in;
@ -242,22 +237,25 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking()
// Experimental: pre-sampled local signal replica at nominal code frequency.
// No code doppler correction
// unified loop for E, P, L code vectors
double code_phase_step_chips;
code_phase_step_chips = (GPS_L1_CA_CODE_RATE_HZ / (double)d_fs_in);
double tcode_chips = 0;
// Alternative EPL code generation (40% of speed improvement!)
int early_late_spc_samples;
early_late_spc_samples=round(d_early_late_spc_chips/code_phase_step_chips);
double epl_loop_length_samples;
epl_loop_length_samples=d_current_prn_length_samples+early_late_spc_samples*2;
double tcode_chips;
int associated_chip_index;
int code_length_chips = (int)GPS_L1_CA_CODE_LENGTH_CHIPS;
double code_phase_step_chips;
int early_late_spc_samples;
int epl_loop_length_samples;
// unified loop for E, P, L code vectors
code_phase_step_chips = ((double)d_code_freq_chips) / ((double)d_fs_in);
tcode_chips = 0;
// Alternative EPL code generation (40% of speed improvement!)
early_late_spc_samples=round(d_early_late_spc_chips/code_phase_step_chips);
epl_loop_length_samples=d_current_prn_length_samples+early_late_spc_samples*2;
for (int i=0; i<epl_loop_length_samples; i++)
{
associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips));
d_early_code[i] = d_ca_code[associated_chip_index];
tcode_chips = tcode_chips + d_code_phase_step_chips;
tcode_chips = tcode_chips + code_phase_step_chips;
}
memcpy(d_prompt_code,&d_early_code[early_late_spc_samples],d_current_prn_length_samples* sizeof(gr_complex));
@ -268,8 +266,6 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking()
d_carrier_lock_fail_counter = 0;
d_rem_code_phase_samples = 0;
d_rem_carr_phase_rad = 0;
d_rem_code_phase_samples = 0;
d_next_rem_code_phase_samples = 0;
d_acc_carrier_phase_rad = 0;
d_code_phase_samples = d_acq_code_phase_samples;
@ -291,6 +287,7 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking()
}
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_code()
{
double tcode_chips;
@ -302,18 +299,18 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_code()
int epl_loop_length_samples;
// unified loop for E, P, L code vectors
code_phase_step_chips = ((double)d_code_freq_hz) / ((double)d_fs_in);
rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_hz / d_fs_in);
code_phase_step_chips = ((double)d_code_freq_chips) / ((double)d_fs_in);
rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
tcode_chips = -rem_code_phase_chips;
// Alternative EPL code generation (40% of speed improvement!)
//EPL code generation
early_late_spc_samples=round(d_early_late_spc_chips/code_phase_step_chips);
epl_loop_length_samples=d_current_prn_length_samples+early_late_spc_samples*2;
for (int i=0; i<epl_loop_length_samples; i++)
{
associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips));
d_early_code[i] = d_ca_code[associated_chip_index];
tcode_chips = tcode_chips + d_code_phase_step_chips;
tcode_chips = tcode_chips + code_phase_step_chips;
}
memcpy(d_prompt_code,&d_early_code[early_late_spc_samples],d_current_prn_length_samples* sizeof(gr_complex));
@ -321,43 +318,14 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_code()
}
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_carrier()
{
float phase_rad, phase_step_rad;
float phase_step_rad;
phase_step_rad = (float)GPS_TWO_PI*d_carrier_doppler_hz / (float)d_fs_in;
phase_rad = d_rem_carr_phase_rad;
int phase_rad_i;
phase_rad_i=gr_fxpt::float_to_fixed(phase_rad);
int phase_step_rad_i;
phase_step_rad_i=gr_fxpt::float_to_fixed(phase_step_rad);
float sin_f,cos_f;
for(int i = 0; i < d_current_prn_length_samples; i++)
{
//using temp variables
gr_fxpt::sincos(phase_rad_i,&sin_f,&cos_f);
d_carr_sign[i] = gr_complex(cos_f, -sin_f);
//using references (may be it can be a problem for c++11 standard
//gr_fxpt::sincos(phase_rad_i,&d_carr_sign[i].imag(),&d_carr_sign[i].real());
phase_rad_i += phase_step_rad_i;
// Using std::cos and std::sin
//d_carr_sign[i] = gr_complex(cos(phase_rad), -sin(phase_rad));
}
d_rem_carr_phase_rad = fmod(gr_fxpt::fixed_to_float(phase_rad_i), GPS_TWO_PI);
d_acc_carrier_phase_rad = d_acc_carrier_phase_rad + d_rem_carr_phase_rad;
fxp_nco(d_carr_sign, d_current_prn_length_samples,d_rem_carr_phase_rad, phase_step_rad);
//sse_nco(d_carr_sign, d_current_prn_length_samples,d_rem_carr_phase_rad, phase_step_rad);
}
Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::~Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc()
{
d_dump_file.close();
@ -384,51 +352,49 @@ int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vec
{
// process vars
float carr_error;
float carr_nco;
float code_error;
float code_nco;
float carr_error_hz;
float carr_error_filt_hz;
float code_error_chips;
float code_error_filt_chips;
if (d_enable_tracking == true)
{
/*
* Receiver signal alignment
*/
if (d_pull_in == true)
{
int samples_offset;
/*
* Receiver signal alignment
*/
if (d_pull_in == true)
{
int samples_offset;
// 28/11/2011 ACQ to TRK transition BUG CORRECTION
float acq_trk_shif_correction_samples;
int acq_to_trk_delay_samples;
acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp;
acq_trk_shif_correction_samples = d_next_prn_length_samples - fmod((float)acq_to_trk_delay_samples, (float)d_next_prn_length_samples);
//std::cout<<"acq_trk_shif_correction="<<acq_trk_shif_correction_samples<<"\r\n";
samples_offset = round(d_acq_code_phase_samples + acq_trk_shif_correction_samples);
// /todo: Check if the sample counter sent to the next block as a time reference should be incremented AFTER sended or BEFORE
//d_sample_counter_seconds = d_sample_counter_seconds + (((double)samples_offset) / (double)d_fs_in);
d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples
d_pull_in = false;
//std::cout<<" samples_offset="<<samples_offset<<"\r\n";
consume_each(samples_offset); //shift input to perform alignement with local replica
return 1;
}
// 28/11/2011 ACQ to TRK transition BUG CORRECTION
float acq_trk_shif_correction_samples;
int acq_to_trk_delay_samples;
acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp;
acq_trk_shif_correction_samples = d_current_prn_length_samples - fmod((float)acq_to_trk_delay_samples, (float)d_current_prn_length_samples);
//std::cout<<"acq_trk_shif_correction="<<acq_trk_shif_correction_samples<<"\r\n";
samples_offset = round(d_acq_code_phase_samples + acq_trk_shif_correction_samples);
// /todo: Check if the sample counter sent to the next block as a time reference should be incremented AFTER sended or BEFORE
//d_sample_counter_seconds = d_sample_counter_seconds + (((double)samples_offset) / (double)d_fs_in);
d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples
d_pull_in = false;
//std::cout<<" samples_offset="<<samples_offset<<"\r\n";
consume_each(samples_offset); //shift input to perform alignement with local replica
return 1;
}
// GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
Gnss_Synchro current_synchro_data;
// Fill the acquisition data
current_synchro_data = *d_acquisition_gnss_synchro;
// GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
Gnss_Synchro current_synchro_data;
// Fill the acquisition data
current_synchro_data = *d_acquisition_gnss_synchro;
// Block input data and block output stream pointers
const gr_complex* in = (gr_complex*) input_items[0]; //PRN start block alignement
Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0];
const gr_complex* in = (gr_complex*) input_items[0]; //PRN start block alignement
Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0];
// Generate local code and carrier replicas (using \hat{f}_d(k-1))
// Update the prn length based on code freq (variable) and
// sampling frequency (fixed)
// variable code PRN sample block size
d_current_prn_length_samples = d_next_prn_length_samples;
//update_local_code(); //disabled in the speed optimized tracking!
update_local_code();
update_local_carrier();
update_local_carrier();
// perform Early, Prompt and Late correlation
d_correlator.Carrier_wipeoff_and_EPL_volk(d_current_prn_length_samples,
@ -442,111 +408,91 @@ int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vec
d_Late,
is_unaligned());
// check for samples consistency (this should be done before in the receiver / here only if the source is a file)
if (std::isnan((*d_Prompt).real()) == true or std::isnan((*d_Prompt).imag()) == true )// or std::isinf(in[i].real())==true or std::isinf(in[i].imag())==true)
{
const int samples_available = ninput_items[0];
d_sample_counter = d_sample_counter + samples_available;
LOG_AT_LEVEL(WARNING) << "Detected NaN samples at sample number " << d_sample_counter;
consume_each(samples_available);
// ################## PLL ##########################################################
// PLL discriminator
carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / (float)GPS_TWO_PI;
// Carrier discriminator filter
carr_error_filt_hz = d_carrier_loop_filter.get_carrier_nco(carr_error_hz);
// New carrier Doppler frequency estimation
d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_error_filt_hz;
// New code Doppler frequency estimation
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ);
//carrier phase accumulator for (K) doppler estimation
d_acc_carrier_phase_rad=d_acc_carrier_phase_rad+GPS_TWO_PI*d_carrier_doppler_hz*GPS_L1_CA_CODE_PERIOD;
//remanent carrier phase to prevent overflow in the code NCO
d_rem_carr_phase_rad=d_rem_carr_phase_rad+GPS_TWO_PI*d_carrier_doppler_hz*GPS_L1_CA_CODE_PERIOD;
d_rem_carr_phase_rad=fmod(d_rem_carr_phase_rad,GPS_TWO_PI);
// make an output to not stop the rest of the processing blocks
current_synchro_data.Prompt_I=0.0;
current_synchro_data.Prompt_Q=0.0;
current_synchro_data.Tracking_timestamp_secs=(double)d_sample_counter/(double)d_fs_in;
current_synchro_data.Carrier_phase_rads=0.0;
current_synchro_data.Code_phase_secs=0.0;
current_synchro_data.CN0_dB_hz=0.0;
current_synchro_data.Flag_valid_tracking=false;
// ################## DLL ##########################################################
// DLL discriminator
code_error_chips =dll_nc_e_minus_l_normalized(*d_Early, *d_Late); //[chips/Ti]
// Code discriminator filter
code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips); //[chips/second]
//Code phase accumulator
float code_error_filt_secs;
code_error_filt_secs=(GPS_L1_CA_CODE_PERIOD*code_error_filt_chips)/GPS_L1_CA_CODE_RATE_HZ; //[seconds]
d_acc_code_phase_secs=d_acc_code_phase_secs+code_error_filt_secs;
*out[0] =current_synchro_data;
return 1;
}
// Compute PLL error and update carrier NCO -
carr_error = pll_cloop_two_quadrant_atan(*d_Prompt) / (float)GPS_TWO_PI;
// Implement carrier loop filter and generate NCO command
carr_nco = d_carrier_loop_filter.get_carrier_nco(carr_error);
// Modify carrier freq based on NCO command
d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_nco;
// Compute DLL error and update code NCO
code_error = dll_nc_e_minus_l_normalized(*d_Early, *d_Late);
// Implement code loop filter and generate NCO command
code_nco = d_code_loop_filter.get_code_nco(code_error);
// Modify code freq based on NCO command
d_code_freq_hz = GPS_L1_CA_CODE_RATE_HZ - code_nco;
// Update the phasestep based on code freq (variable) and
// sampling frequency (fixed)
d_code_phase_step_chips = d_code_freq_hz / (float)d_fs_in; //[chips]
// variable code PRN sample block size
// ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT #######################
// keep alignment parameters for the next input buffer
float T_chip_seconds;
float T_prn_seconds;
float T_prn_samples;
float K_blk_samples;
T_chip_seconds = 1 / d_code_freq_hz;
// Compute the next buffer lenght based in the new period of the PRN sequence and the code phase error estimation
T_chip_seconds = 1 / d_code_freq_chips;
T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
T_prn_samples = T_prn_seconds * d_fs_in;
d_rem_code_phase_samples = d_next_rem_code_phase_samples;
K_blk_samples = T_prn_samples + d_rem_code_phase_samples;
d_next_prn_length_samples = round(K_blk_samples); //round to a discrete samples
d_next_rem_code_phase_samples = K_blk_samples - d_next_prn_length_samples; //rounding error
T_prn_samples = T_prn_seconds * (float)d_fs_in;
K_blk_samples = T_prn_samples + d_rem_code_phase_samples + code_error_filt_secs*(float)d_fs_in;
d_current_prn_length_samples = round(K_blk_samples); //round to a discrete samples
d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
/*!
* \todo Improve the lock detection algorithm!
*/
// ####### CN0 ESTIMATION AND LOCK DETECTORS ######
if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
{
// fill buffer with prompt correlator output values
d_Prompt_buffer[d_cn0_estimation_counter] = *d_Prompt;
d_cn0_estimation_counter++;
}
else
{
d_cn0_estimation_counter = 0;
d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
// ###### TRACKING UNLOCK NOTIFICATION #####
if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
{
d_carrier_lock_fail_counter++;
}
else
{
if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
}
if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
{
std::cout << "Channel " << d_channel << " loss of lock!" << std::endl ;
//tracking_message = 3; //loss of lock
//d_channel_internal_queue->push(tracking_message);
ControlMessageFactory* cmf = new ControlMessageFactory();
if (d_queue != gr_msg_queue_sptr())
{
d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
}
delete cmf;
d_carrier_lock_fail_counter = 0;
d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
}
//std::cout<<"d_carrier_lock_fail_counter"<<d_carrier_lock_fail_counter<<"\r\n";
}
if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
{
// fill buffer with prompt correlator output values
d_Prompt_buffer[d_cn0_estimation_counter] = *d_Prompt;
d_cn0_estimation_counter++;
}
else
{
d_cn0_estimation_counter = 0;
// Code lock indicator
d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
// Carrier lock indicator
d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
// Loss of lock detection
if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
{
d_carrier_lock_fail_counter++;
}
else
{
if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
}
if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
{
std::cout << "Channel " << d_channel << " loss of lock!" << std::endl ;
ControlMessageFactory* cmf = new ControlMessageFactory();
if (d_queue != gr_msg_queue_sptr())
{
d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
}
delete cmf;
d_carrier_lock_fail_counter = 0;
d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
}
}
// ########### Output the tracking data to navigation and PVT ##########
current_synchro_data.Prompt_I = (double)(*d_Prompt).real();
current_synchro_data.Prompt_Q = (double)(*d_Prompt).imag();
// Tracking_timestamp_secs is aligned with the PRN start sample
current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter +
(double)d_next_prn_length_samples + (double)d_next_rem_code_phase_samples) / (double)d_fs_in;
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN,
// thus, Code_phase_secs = 0
current_synchro_data.Code_phase_secs = 0;
current_synchro_data.Tracking_timestamp_secs=((double)d_sample_counter+(double)d_current_prn_length_samples+(double)d_rem_code_phase_samples)/(double)d_fs_in;
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
current_synchro_data.Code_phase_secs=0;
current_synchro_data.Carrier_phase_rads = (double)d_acc_carrier_phase_rad;
current_synchro_data.CN0_dB_hz = (double)d_CN0_SNV_dB_Hz;
*out[0] = current_synchro_data;
@ -585,7 +531,6 @@ int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vec
*d_Prompt = gr_complex(0,0);
*d_Late = gr_complex(0,0);
Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0]; //block output streams pointer
//std::cout<<output_items.size()<<std::endl;
// GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
Gnss_Synchro current_synchro_data;
*out[0] = current_synchro_data;
@ -621,15 +566,15 @@ int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vec
// carrier and code frequency
d_dump_file.write((char*)&d_carrier_doppler_hz, sizeof(float));
d_dump_file.write((char*)&d_code_freq_hz, sizeof(float));
d_dump_file.write((char*)&d_code_freq_chips, sizeof(float));
//PLL commands
d_dump_file.write((char*)&carr_error, sizeof(float));
d_dump_file.write((char*)&carr_nco, sizeof(float));
d_dump_file.write((char*)&carr_error_hz, sizeof(float));
d_dump_file.write((char*)&carr_error_filt_hz, sizeof(float));
//DLL commands
d_dump_file.write((char*)&code_error, sizeof(float));
d_dump_file.write((char*)&code_nco, sizeof(float));
d_dump_file.write((char*)&code_error_chips, sizeof(float));
d_dump_file.write((char*)&code_error_filt_chips, sizeof(float));
// CN0 and carrier lock test
d_dump_file.write((char*)&d_CN0_SNV_dB_Hz, sizeof(float));
@ -641,14 +586,13 @@ int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vec
tmp_double=(double)(d_sample_counter+d_current_prn_length_samples);
d_dump_file.write((char*)&tmp_double, sizeof(double));
}
catch (std::ifstream::failure e)
catch (std::ifstream::failure& e)
{
std::cout << "Exception writing trk dump file " << e.what() << std::endl;
}
}
consume_each(d_current_prn_length_samples); // this is necesary in gr_block derivates
//d_sample_counter_seconds = d_sample_counter_seconds + ( ((double)d_current_prn_length_samples) / (double)d_fs_in );
d_sample_counter += d_current_prn_length_samples; //count for the processed samples
return 1; //output tracking result ALWAYS even in the case of d_enable_tracking==false
}
@ -672,7 +616,7 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel(unsigned int channel)
d_dump_file.open(d_dump_filename.c_str(), std::ios::out | std::ios::binary);
std::cout << "Tracking dump enabled on channel " << d_channel << " Log file: " << d_dump_filename.c_str() << std::endl;
}
catch (std::ifstream::failure e)
catch (std::ifstream::failure& e)
{
std::cout << "channel " << d_channel << " Exception opening trk dump file " << e.what() << std::endl;
}
@ -681,7 +625,6 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel(unsigned int channel)
}
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel_queue(concurrent_queue<int> *channel_internal_queue)
{
d_channel_internal_queue = channel_internal_queue;

View File

@ -35,7 +35,7 @@
*/
#ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H
#define GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_CC_H
#define GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H
#include <fstream>
#include <queue>
@ -56,7 +56,7 @@
class Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc;
typedef boost::shared_ptr<Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc>
gps_l1_ca_dll_pll_optim_tracking_cc_sptr;
gps_l1_ca_dll_pll_optim_tracking_cc_sptr;
gps_l1_ca_dll_pll_optim_tracking_cc_sptr
gps_l1_ca_dll_pll_make_optim_tracking_cc(long if_freq,
@ -135,7 +135,6 @@ private:
long d_fs_in;
double d_early_late_spc_chips;
double d_code_phase_step_chips;
gr_complex* d_ca_code;
@ -150,7 +149,6 @@ private:
// remaining code phase and carrier phase between tracking loops
float d_rem_code_phase_samples;
float d_next_rem_code_phase_samples;
float d_rem_carr_phase_rad;
// PLL and DLL filter library
@ -164,15 +162,14 @@ private:
Correlator d_correlator;
// tracking vars
float d_code_freq_hz;
float d_code_freq_chips;
float d_carrier_doppler_hz;
float d_acc_carrier_phase_rad;
float d_code_phase_samples;
float d_acc_code_phase_secs;
//PRN period in samples
int d_current_prn_length_samples;
int d_next_prn_length_samples;
//double d_sample_counter_seconds;
//processing samples counters
unsigned long int d_sample_counter;
@ -187,9 +184,9 @@ private:
int d_carrier_lock_fail_counter;
// control vars
int d_gnuradio_forecast_samples;
bool d_enable_tracking;
bool d_pull_in;
int d_gnuradio_forecast_samples;
// file dump
std::string d_dump_filename;

View File

@ -28,6 +28,7 @@ exe gnss-sdr : main.cc
../algorithms/data_type_adapter/adapters//ishort_to_complex
../algorithms/input_filter/adapters//fir_filter
../algorithms/input_filter/adapters//freq_xlating_fir_filter
../algorithms/libs//nco_lib
../algorithms/libs//gnss_signal_processing
../algorithms/libs//gps_sdr_signal_processing
../algorithms/libs//galileo_e1_signal_processing

View File

@ -2,6 +2,7 @@
* \file cordic_test.cc
* \brief Test of the CORDIC (COordinate Rotation DIgital Computer) algorithm
* \author Carles Fernandez-Prades, 2012. cfernandez(at)cttc.es
* Javier Arribas, 2012. jarribas(at)cttc.es
*
*
* -------------------------------------------------------------------------
@ -30,79 +31,105 @@
*/
#include "GPS_L1_CA.h"
#include <iostream>
#include <math.h>
#include <cmath>
#include <gtest/gtest.h>
#include "cordic.h"
#include <sys/time.h>
#include <algorithm>
#include <cstdlib> // for RAND_MAX
#include <gnuradio/gr_fxpt_nco.h>
#include "nco_lib.h"
TEST(Cordic_Test, StandardCIsFasterThanCordic)
{
int largest_k = 10;
Cordic* cordicPtr;
cordicPtr = new Cordic(largest_k);
double phase = rand();
phase = (phase/RAND_MAX) * 3.141592;
double cos_phase1 = 0;
double sin_phase1 = 0;
double cos_phase2 = 0;
double sin_phase2 = 0;
double cos_phase3 = 0;
double sin_phase3 = 0;
gr_fxpt_nco* nco;
nco = new gr_fxpt_nco();
std::complex<float> *d_carr_sign;
// carrier parameters
int d_vector_length=4000;
float phase_rad;
float phase_step_rad;
float carrier_freq=2000;
float d_fs_in=4000000;
phase_step_rad = (float)GPS_TWO_PI*carrier_freq / (float)d_fs_in;
double niter = 10000000;
// space for carrier wipeoff and signal baseband vectors
if (posix_memalign((void**)&d_carr_sign, 16, d_vector_length * sizeof(std::complex<float>) * 2) == 0){};
double sin_d,cos_d;
double sin_f,cos_f;
double niter = 10000;
struct timeval tv;
//*** NON-OPTIMIZED CORDIC *****
gettimeofday(&tv, NULL);
long long int begin1 = tv.tv_sec * 1000000 + tv.tv_usec;
for(int i=0; i<niter; i++)
{
cordicPtr->cordic_get_cos_sin(phase, cos_phase1, sin_phase1);
phase_rad=0;
for(int j=0;j<d_vector_length;j++)
{
cordicPtr->cordic_get_cos_sin(phase_rad, cos_d, sin_d);
d_carr_sign[j]=std::complex<float>(cos_d,-sin_d);
phase_rad=phase_rad+phase_step_rad;
}
}
gettimeofday(&tv, NULL);
long long int end1 = tv.tv_sec *1000000 + tv.tv_usec;
//*** STD COS, SIN standalone *****
gettimeofday(&tv, NULL);
long long int begin2 = tv.tv_sec * 1000000 + tv.tv_usec;
for(int i=0; i<niter; i++)
{
cos_phase2 = cos(phase);
sin_phase2 = sin(phase);
for(int j=0;j<d_vector_length;j++)
{
cos_f = std::cos(phase_rad);
sin_f = std::sin(phase_rad);
d_carr_sign[j]=std::complex<float>(cos_f,-sin_f);
phase_rad=phase_rad+phase_step_rad;
}
}
gettimeofday(&tv, NULL);
long long int end2 = tv.tv_sec *1000000 + tv.tv_usec;
float phase_f;
phase_f = (float)phase;
//*** GNU RADIO FIXED POINT ARITHMETIC ********
gettimeofday(&tv, NULL);
long long int begin3 = tv.tv_sec * 1000000 + tv.tv_usec;
for(int i=0; i<niter; i++)
{
nco->set_phase(phase_f);
cos_phase3 = nco->cos();
sin_phase3 = nco->sin();
fxp_nco(d_carr_sign, d_vector_length,0, phase_step_rad);
}
gettimeofday(&tv, NULL);
long long int end3 = tv.tv_sec *1000000 + tv.tv_usec;
delete nco;
//*** SSE2 NCO ****************
gettimeofday(&tv, NULL);
long long int begin4 = tv.tv_sec * 1000000 + tv.tv_usec;
for(int i=0; i<niter; i++)
{
sse_nco(d_carr_sign, d_vector_length,0, phase_step_rad);
}
gettimeofday(&tv, NULL);
long long int end4 = tv.tv_sec *1000000 + tv.tv_usec;
delete cordicPtr;
std::cout << "CORDIC sin =" << sin_phase1 << ", cos =" << cos_phase1 << " computed " << niter << " times in " << (end1-begin1) << " microseconds" << std::endl;
std::cout << "STD sin =" << sin_phase2 << ", cos =" << cos_phase2 << " computed " << niter << " times in " << (end2-begin2) << " microseconds" << std::endl;
std::cout << "FXPT sin =" << sin_phase3 << ", cos =" << cos_phase3 << " computed " << niter << " times in " << (end3-begin3) << " microseconds" << std::endl;
std::cout << "NON-OPTIMIZED CORDIC computed " << niter << " times in " << (end1-begin1) << " microseconds" << std::endl;
std::cout << "STD LIB ARITHM computed " << niter << " times in " << (end2-begin2) << " microseconds" << std::endl;
std::cout << "FXPT CORDIC computed " << niter << " times in " << (end3-begin3) << " microseconds" << std::endl;
std::cout << "SSE CORDIC computed " << niter << " times in " << (end4-begin4) << " microseconds" << std::endl;
EXPECT_TRUE((end2-begin2) < (end1-begin1)); // if true, standard C++ is faster than the cordic implementation
}

View File

@ -25,6 +25,7 @@ exe run_tests : test_main.cc
../algorithms/data_type_adapter/adapters//ishort_to_complex
../algorithms/input_filter/adapters//fir_filter
../algorithms/input_filter/adapters//freq_xlating_fir_filter
../algorithms/libs//nco_lib
../algorithms/libs//gnss_signal_processing
../algorithms/libs//gps_sdr_signal_processing
../algorithms/libs//galileo_e1_signal_processing