- New NCO library for carrier signal generation: Provides a fixed point optimized wrapper for GNU Radio fxp CORDIC and SSE2 floating point implementation ( sse_mathfunc.h implementation). The library is available as nco_lib.h

- Updated Unit Test to benchmark all the current NCO implementations (./run_tests --gtest_filter=Cordic_Test.StandardCIsFasterThanCordic) -Gps_L1_Ca_Dll_Pll_Optim_Tracking algorithm updated with new NCO library for carrier wipeoff and some other optimizations. git-svn-id: https://svn.code.sf.net/p/gnss-sdr/code/trunk@281 64b25241-fba3-4117-9849-534c7e92360d
2025-07-03 02:22:54 +00:00 · 2012-11-22 17:43:24 +00:00 · 2012-11-22 17:43:24 +00:00 · 0760fa0f3e
commit 0760fa0f3e
parent 54df5928ab
11 changed files with 1581 additions and 234 deletions
--- a/conf/gnss-sdr_optim_tracking.conf
+++ b/conf/gnss-sdr_optim_tracking.conf
@ -0,0 +1,433 @@
 ; Default configuration file
 ; You can define your own receiver and invoke it by doing
 ; gnss-sdr --config_file=my_GNSS_SDR_configuration.conf
 ;
 [GNSS-SDR]
 ;######### GLOBAL OPTIONS ##################
 ;internal_fs_hz: Internal signal sampling frequency after the signal conditioning stage [Hz].
 GNSS-SDR.internal_fs_hz=4000000
 ;######### CONTROL_THREAD CONFIG ############
 ControlThread.wait_for_flowgraph=false
 ;######### SIGNAL_SOURCE CONFIG ############
 ;#implementation: Use [File_Signal_Source] or [UHD_Signal_Source] or [GN3S_Signal_Source] (experimental)
 SignalSource.implementation=File_Signal_Source
 ;#filename: path to file with the captured GNSS signal samples to be processed
 SignalSource.filename=/media/DATALOGGER_/signals/Agilent GPS Generator/cap2/agilent_cap2.dat
 ;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
 SignalSource.item_type=gr_complex
 ;#sampling_frequency: Original Signal sampling frequency in [Hz] 
 SignalSource.sampling_frequency=4000000
 ;#freq: RF front-end center frequency in [Hz] 
 SignalSource.freq=1575420000
 ;#gain: Front-end Gain in [dB] 
 SignalSource.gain=60
 ;#subdevice: UHD subdevice specification (for USRP1 use A:0 or B:0)
 SignalSource.subdevice=B:0
 ;#samples: Number of samples to be processed. Notice that 0 indicates the entire file.
 SignalSource.samples=250000000
 ;#repeat: Repeat the processing file. Disable this option in this version
 SignalSource.repeat=false
 ;#dump: Dump the Signal source data to a file. Disable this option in this version
 SignalSource.dump=false
 SignalSource.dump_filename=../data/signal_source.dat
 ;#enable_throttle_control: Enabling this option tells the signal source to keep the delay between samples in post processing.
 ; it helps to not overload the CPU, but the processing time will be longer. 
 SignalSource.enable_throttle_control=false
 ;######### SIGNAL_CONDITIONER CONFIG ############
 ;## It holds blocks to change data type, filter and resample input data. 
 ;#implementation: Use [Pass_Through] or [Signal_Conditioner]
 ;#[Pass_Through] disables this block and the [DataTypeAdapter], [InputFilter] and [Resampler] blocks
 ;#[Signal_Conditioner] enables this block. Then you have to configure [DataTypeAdapter], [InputFilter] and [Resampler] blocks
 ;SignalConditioner.implementation=Signal_Conditioner
 SignalConditioner.implementation=Pass_Through
 ;######### DATA_TYPE_ADAPTER CONFIG ############
 ;## Changes the type of input data. Please disable it in this version.
 ;#implementation: [Pass_Through] disables this block
 DataTypeAdapter.implementation=Pass_Through
 ;######### INPUT_FILTER CONFIG ############
 ;## Filter the input data. Can be combined with frequency translation for IF signals
 ;#implementation: Use [Pass_Through] or [Fir_Filter] or [Freq_Xlating_Fir_Filter]
 ;#[Pass_Through] disables this block
 ;#[Fir_Filter] enables a FIR Filter
 ;#[Freq_Xlating_Fir_Filter] enables FIR filter and a composite frequency translation that shifts IF down to zero Hz.
 ;InputFilter.implementation=Fir_Filter
 ;InputFilter.implementation=Freq_Xlating_Fir_Filter
 InputFilter.implementation=Pass_Through
 ;#dump: Dump the filtered data to a file.
 InputFilter.dump=false
 ;#dump_filename: Log path and filename.
 InputFilter.dump_filename=../data/input_filter.dat
 ;#The following options are used in the filter design of Fir_Filter and Freq_Xlating_Fir_Filter implementation. 
 ;#These options are based on parameters of gnuradio's function: gr_remez.
 ;#These function calculates the optimal (in the Chebyshev/minimax sense) FIR filter inpulse reponse given a set of band edges, the desired reponse on those bands, and the weight given to the error in those bands.
 ;#input_item_type: Type and resolution for input signal samples. Use only gr_complex in this version.
 InputFilter.input_item_type=gr_complex
 ;#outut_item_type: Type and resolution for output filtered signal samples. Use only gr_complex in this version.
 InputFilter.output_item_type=gr_complex
 ;#taps_item_type: Type and resolution for the taps of the filter. Use only float in this version.
 InputFilter.taps_item_type=float
 ;#number_of_taps: Number of taps in the filter. Increasing this parameter increases the processing time
 InputFilter.number_of_taps=5
 ;#number_of _bands: Number of frequency bands in the filter.
 InputFilter.number_of_bands=2
 ;#bands: frequency at the band edges [ b1 e1 b2 e2 b3 e3 ...].
 ;#Frequency is in the range [0, 1], with 1 being the Nyquist frequency (Fs/2)
 ;#The number of band_begin and band_end elements must match the number of bands
 InputFilter.band1_begin=0.0
 InputFilter.band1_end=0.45
 InputFilter.band2_begin=0.55
 InputFilter.band2_end=1.0
 ;#ampl: desired amplitude at the band edges [ a(b1) a(e1) a(b2) a(e2) ...].
 ;#The number of ampl_begin and ampl_end elements must match the number of bands
 InputFilter.ampl1_begin=1.0
 InputFilter.ampl1_end=1.0
 InputFilter.ampl2_begin=0.0
 InputFilter.ampl2_end=0.0
 ;#band_error: weighting applied to each band (usually 1).
 ;#The number of band_error elements must match the number of bands
 InputFilter.band1_error=1.0
 InputFilter.band2_error=1.0
 ;#filter_type: one of "bandpass", "hilbert" or "differentiator" 
 InputFilter.filter_type=bandpass
 ;#grid_density: determines how accurately the filter will be constructed.
 ;The minimum value is 16; higher values are slower to compute the filter.
 InputFilter.grid_density=16
 ;#The following options are used only in Freq_Xlating_Fir_Filter implementation.
 ;#InputFilter.IF is the intermediate frequency (in Hz) shifted down to zero Hz
 InputFilter.sampling_frequency=4000000
 InputFilter.IF=0
 ;######### RESAMPLER CONFIG ############
 ;## Resamples the input data. 
 ;#implementation: Use [Pass_Through] or [Direct_Resampler]
 ;#[Pass_Through] disables this block
 ;#[Direct_Resampler] enables a resampler that implements a nearest neigbourhood interpolation
 ;Resampler.implementation=Direct_Resampler
 Resampler.implementation=Pass_Through
 ;#dump: Dump the resamplered data to a file.
 Resampler.dump=false
 ;#dump_filename: Log path and filename.
 Resampler.dump_filename=../data/resampler.dat
 ;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
 Resampler.item_type=gr_complex
 ;#sample_freq_in: the sample frequency of the input signal
 Resampler.sample_freq_in=8000000
 ;#sample_freq_out: the desired sample frequency of the output signal
 Resampler.sample_freq_out=4000000
 ;######### CHANNELS GLOBAL CONFIG ############
 ;#count: Number of available satellite channels.
 Channels.count=5
 ;#in_acquisition: Number of channels simultaneously acquiring
 Channels.in_acquisition=1
 ;######### CHANNEL 0 CONFIG ############
 ;#system: GPS, GLONASS, GALILEO, SBAS or COMPASS
 ;#if the option is disabled by default is assigned GPS
 Channel0.system=GPS
 ;#signal: 
 ;# "1C" GPS L1 C/A
 ;# "1P" GPS L1 P
 ;# "1W" GPS L1 Z-tracking and similar (AS on)
 ;# "1Y" GPS L1 Y
 ;# "1M" GPS L1 M
 ;# "1N" GPS L1 codeless
 ;# "2C" GPS L2 C/A
 ;# "2D" GPS L2 L1(C/A)+(P2-P1) semi-codeless
 ;# "2S" GPS L2 L2C (M)
 ;# "2L" GPS L2 L2C (L)
 ;# "2X" GPS L2 L2C (M+L)
 ;# "2P" GPS L2 P
 ;# "2W" GPS L2 Z-tracking and similar (AS on)
 ;# "2Y" GPS L2 Y
 ;# "2M" GPS GPS L2 M
 ;# "2N" GPS L2 codeless
 ;# "5I" GPS L5 I
 ;# "5Q" GPS L5 Q
 ;# "5X" GPS L5 I+Q
 ;# "1C" GLONASS G1 C/A
 ;# "1P" GLONASS G1 P
 ;# "2C" GLONASS G2 C/A  (Glonass M)
 ;# "2P" GLONASS G2 P
 ;# "1A" GALILEO E1 A (PRS)
 ;# "1B" GALILEO E1 B (I/NAV OS/CS/SoL)
 ;# "1C" GALILEO E1 C (no data)
 ;# "1X" GALILEO E1 B+C
 ;# "1Z" GALILEO E1 A+B+C
 ;# "5I" GALILEO E5a I (F/NAV OS)
 ;# "5Q" GALILEO E5a Q  (no data)
 ;# "5X" GALILEO E5a I+Q
 ;# "7I" GALILEO E5b I
 ;# "7Q" GALILEO E5b Q
 ;# "7X" GALILEO E5b I+Q
 ;# "8I" GALILEO E5 I
 ;# "8Q" GALILEO E5 Q
 ;# "8X" GALILEO E5 I+Q
 ;# "6A" GALILEO E6 A
 ;# "6B" GALILEO E6 B
 ;# "6C" GALILEO E6 C
 ;# "6X" GALILEO E6 B+C
 ;# "6Z" GALILEO E6 A+B+C
 ;# "1C" SBAS L1 C/A
 ;# "5I" SBAS L5 I
 ;# "5Q" SBAS L5 Q
 ;# "5X" SBAS L5 I+Q
 ;# "2I" COMPASS E2 I
 ;# "2Q" COMPASS E2 Q
 ;# "2X" COMPASS E2 IQ
 ;# "7I" COMPASS E5b I
 ;# "7Q" COMPASS E5b Q
 ;# "7X" COMPASS E5b IQ
 ;# "6I" COMPASS E6 I
 ;# "6Q" COMPASS E6 Q
 ;# "6X" COMPASS E6 IQ
 ;#if the option is disabled by default is assigned "1C" GPS L1 C/A
 Channel0.signal=1C
 ;#satellite: Satellite PRN ID for this channel. Disable this option to random search
 Channel0.satellite=15
 Channel0.repeat_satellite=false
 ;######### CHANNEL 1 CONFIG ############
 Channel1.system=GPS
 Channel1.signal=1C
 Channel1.satellite=18
 Channel1.repeat_satellite=false
 ;######### CHANNEL 2 CONFIG ############
 Channel2.system=GPS
 Channel2.signal=1C
 Channel2.satellite=16
 Channel2.repeat_satellite=false
 ;######### CHANNEL 3 CONFIG ############
 Channel3.system=GPS
 Channel3.signal=1C
 Channel3.satellite=21
 Channel3.repeat_satellite=false
 ;######### CHANNEL 4 CONFIG ############
 Channel4.system=GPS
 Channel4.signal=1C
 Channel4.satellite=3
 Channel4.repeat_satellite=false
 ;######### CHANNEL 5 CONFIG ############
 Channel5.system=GPS
 Channel5.signal=1C
 ;Channel5.satellite=21
 ;Channel5.repeat_satellite=false
 ;######### ACQUISITION GLOBAL CONFIG ############
 ;#dump: Enable or disable the acquisition internal data file logging [true] or [false] 
 Acquisition.dump=false
 ;#filename: Log path and filename
 Acquisition.dump_filename=./acq_dump.dat
 ;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
 Acquisition.item_type=gr_complex
 ;#if: Signal intermediate frequency in [Hz] 
 Acquisition.if=0
 ;#sampled_ms: Signal block duration for the acquisition signal detection [ms]
 Acquisition.sampled_ms=1
 ;######### ACQUISITION CHANNELS CONFIG ######
 ;######### ACQUISITION CH 0 CONFIG ############
 ;#implementation: Acquisition algorithm selection for this channel: [GPS_L1_CA_PCPS_Acquisition]
 Acquisition0.implementation=GPS_L1_CA_PCPS_Acquisition
 ;#threshold: Acquisition threshold
 Acquisition0.threshold=70
 ;#doppler_max: Maximum expected Doppler shift [Hz]
 Acquisition0.doppler_max=10000
 ;#doppler_max: Doppler step in the grid search [Hz]
 Acquisition0.doppler_step=250
 ;#repeat_satellite: Use only jointly with the satellte PRN ID option. 
 ;######### ACQUISITION CH 1 CONFIG ############
 Acquisition1.implementation=GPS_L1_CA_PCPS_Acquisition
 Acquisition1.threshold=70
 Acquisition1.doppler_max=10000
 Acquisition1.doppler_step=250
 ;######### ACQUISITION CH 2 CONFIG ############
 Acquisition2.implementation=GPS_L1_CA_PCPS_Acquisition
 Acquisition2.threshold=70
 Acquisition2.doppler_max=10000
 Acquisition2.doppler_step=250
 ;######### ACQUISITION CH 3 CONFIG ############
 Acquisition3.implementation=GPS_L1_CA_PCPS_Acquisition
 Acquisition3.threshold=70
 Acquisition3.doppler_max=10000
 Acquisition3.doppler_step=250
 ;######### ACQUISITION CH 4 CONFIG ############
 Acquisition4.implementation=GPS_L1_CA_PCPS_Acquisition
 Acquisition4.threshold=70
 Acquisition4.doppler_max=10000
 Acquisition4.doppler_step=250
 ;######### ACQUISITION CH 5 CONFIG ############
 Acquisition5.implementation=GPS_L1_CA_PCPS_Acquisition
 Acquisition5.threshold=50
 Acquisition5.doppler_max=10000
 Acquisition5.doppler_step=250
 ;######### ACQUISITION CH 6 CONFIG ############
 Acquisition6.implementation=GPS_L1_CA_PCPS_Acquisition
 Acquisition6.threshold=70
 Acquisition6.doppler_max=10000
 Acquisition6.doppler_step=250
 ;######### ACQUISITION CH 7 CONFIG ############
 Acquisition7.implementation=GPS_L1_CA_PCPS_Acquisition
 Acquisition7.threshold=70
 Acquisition7.doppler_max=10000
 Acquisition7.doppler_step=250
 ;######### ACQUISITION CH 8 CONFIG ############
 Acquisition8.implementation=GPS_L1_CA_PCPS_Acquisition
 Acquisition8.threshold=70
 Acquisition8.doppler_max=10000
 Acquisition8.doppler_step=250
 ;######### TRACKING GLOBAL CONFIG ############
 ;#implementation: Selected tracking algorithm: [GPS_L1_CA_DLL_PLL_Tracking] or [GPS_L1_CA_DLL_FLL_PLL_Tracking]
 Tracking.implementation=GPS_L1_CA_DLL_PLL_Optim_Tracking
 ;#item_type: Type and resolution for each of the signal samples. Use only [gr_complex] in this version.
 Tracking.item_type=gr_complex
 ;#sampling_frequency: Signal Intermediate Frequency in [Hz] 
 Tracking.if=0
 ;#dump: Enable or disable the Tracking internal binary data file logging [true] or [false] 
 Tracking.dump=false
 ;#dump_filename: Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
 Tracking.dump_filename=./tracking_ch_
 ;#pll_bw_hz: PLL loop filter bandwidth [Hz]
 Tracking.pll_bw_hz=50.0;
 ;#dll_bw_hz: DLL loop filter bandwidth [Hz]
 Tracking.dll_bw_hz=2.0;
 ;#fll_bw_hz: FLL loop filter bandwidth [Hz]
 Tracking.fll_bw_hz=10.0;
 ;#order: PLL/DLL loop filter order [2] or [3]
 Tracking.order=3;
 ;#early_late_space_chips: correlator early-late space [chips]. Use [0.5]
 Tracking.early_late_space_chips=0.5;
 ;######### TELEMETRY DECODER CONFIG ############
 ;#implementation: Use [GPS_L1_CA_Telemetry_Decoder] for GPS L1 C/A.
 TelemetryDecoder.implementation=GPS_L1_CA_Telemetry_Decoder
 TelemetryDecoder.dump=false
 ;######### OBSERVABLES CONFIG ############
 ;#implementation: Use [GPS_L1_CA_Observables] for GPS L1 C/A.
 Observables.implementation=GPS_L1_CA_Observables
 ;#dump: Enable or disable the Observables internal binary data file logging [true] or [false] 
 Observables.dump=false
 ;#dump_filename: Log path and filename.
 Observables.dump_filename=./observables.dat
 ;######### PVT CONFIG ############
 ;#implementation: Position Velocity and Time (PVT) implementation algorithm: Use [GPS_L1_CA_PVT] in this version.
 PVT.implementation=GPS_L1_CA_PVT
 ;#averaging_depth: Number of PVT observations in the moving average algorithm
 PVT.averaging_depth=10
 ;#flag_average: Enables the PVT averaging between output intervals (arithmetic mean) [true] or [false] 
 PVT.flag_averaging=true
 ;#output_rate_ms: Period between two PVT outputs. Notice that the minimum period is equal to the tracking integration time (for GPS CA L1 is 1ms) [ms]
 PVT.output_rate_ms=100;
 ;#display_rate_ms: Position console print (std::out) interval [ms]. Notice that output_rate_ms<=display_rate_ms.
 PVT.display_rate_ms=500;
 ;#dump: Enable or disable the PVT internal binary data file logging [true] or [false] 
 PVT.dump=false
 ;#dump_filename: Log path and filename without extension. Notice that PVT will add ".dat" to the binary dump and ".kml" to GoogleEarth dump.
 PVT.dump_filename=./PVT
 ;######### OUTPUT_FILTER CONFIG ############
 ;# Receiver output filter: Leave this block disabled in this version
 OutputFilter.implementation=Null_Sink_Output_Filter
 OutputFilter.filename=data/gnss-sdr.dat
 OutputFilter.item_type=gr_complex
--- a/jamroot.jam
+++ b/jamroot.jam
@ -45,7 +45,7 @@ project
 project : requirements
 <define>OMNITHREAD_POSIX
-<cxxflags>"-std=c++0x -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free"
+<cxxflags>"-msse2 -mfpmath=sse -std=c++0x -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free"
 <linkflags>"-larmadillo -lboost_system -lboost_filesystem -lboost_thread -lboost_date_time -llapack -lblas -lprofiler -ltcmalloc -lvolk"
 <include>src/algorithms/acquisition/adapters
 <include>src/algorithms/acquisition/gnuradio_blocks
--- a/src/algorithms/libs/jamfile.jam
+++ b/src/algorithms/libs/jamfile.jam
@ -5,6 +5,8 @@ obj gps_sdr_signal_processing : gps_sdr_signal_processing.cc ;
 obj galileo_e1_signal_processing : galileo_e1_signal_processing.cc ;
 obj gnss_sdr_valve : gnss_sdr_valve.cc ;
 obj pass_through : pass_through.cc ;
 obj nco_lib : nco_lib.cc ;
 #obj gps_sdr_fft : gps_sdr_fft.cc : <toolset>darwin:<define>NO_SIMD <toolset>gcc:<define>USE_SIMD ;
 #obj gps_sdr_simd : gps_sdr_simd.cc : <toolset>darwin:<define>NO_SIMD <toolset>gcc:<define>USE_SIMD ;
 #obj gps_sdr_x86 : gps_sdr_x86.cc ;
--- a/src/algorithms/libs/nco_lib.cc
+++ b/src/algorithms/libs/nco_lib.cc
@ -0,0 +1,122 @@
 /*!
 * \file nco_lib.cc
 * \brief A set of Numeric Controlled Oscillator (NCO) functions to generate the carrier wipeoff signal,
 *  regardless of system used
 *
 * \author Javier Arribas 2012, jarribas(at)cttc.es
 *
 * Detailed description of the file here if needed.
 *
 * -------------------------------------------------------------------------
 *
 * Copyright (C) 2010-2012  (see AUTHORS file for a list of contributors)
 *
 * GNSS-SDR is a software defined Global Navigation
 *          Satellite Systems receiver
 *
 * This file is part of GNSS-SDR.
 *
 * GNSS-SDR is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * at your option) any later version.
 *
 * GNSS-SDR is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
 *
 * -------------------------------------------------------------------------
 */
 #include "nco_lib.h"
 typedef ALIGN16_BEG union {
  float f[4];
  int i[4];
  v4sf  v;
 } ALIGN16_END V4SF;
 void sse_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad)
 {
    //SSE NCO
    int sse_loops_four_op;
    int remnant_ops;
    sse_loops_four_op=(int)n_samples/4;
    remnant_ops=n_samples%4;
    V4SF vx, sin4, cos4;
    float phase_rad;
    phase_rad=start_phase_rad;
    int index=0;
    for(int i=0;i<sse_loops_four_op;i++)
    {
    	vx.f[0]=phase_rad;
    	phase_rad=phase_rad+phase_step_rad;
    	vx.f[1]=phase_rad;
    	phase_rad=phase_rad+phase_step_rad;
    	vx.f[2]=phase_rad;
    	phase_rad=phase_rad+phase_step_rad;
    	vx.f[3]=phase_rad;
    	phase_rad=phase_rad+phase_step_rad;
    	sincos_ps(vx.v, &sin4.v, &cos4.v);
    	dest[index] = std::complex<float>(cos4.f[0], -sin4.f[0]);
    	index++;
    	dest[index] = std::complex<float>(cos4.f[1], -sin4.f[1]);
    	index++;
    	dest[index] = std::complex<float>(cos4.f[2], -sin4.f[2]);
    	index++;
    	dest[index] = std::complex<float>(cos4.f[3], -sin4.f[3]);
    	index++;
    }
    for(int i=0;i<remnant_ops;i++)
    {
    	vx.f[i]=phase_rad;
    	phase_rad=phase_rad+phase_step_rad;
    }
    sincos_ps(vx.v, &sin4.v, &cos4.v);
    for(int i=0;i<remnant_ops;i++)
    {
    	dest[index] = std::complex<float>(cos4.f[i], -sin4.f[i]);
    	index++;
    }
 }
 void fxp_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad)
 {
 	int phase_rad_i;
 	phase_rad_i=gr_fxpt::float_to_fixed(start_phase_rad);
 	int phase_step_rad_i;
 	phase_step_rad_i=gr_fxpt::float_to_fixed(phase_step_rad);
 	float sin_f,cos_f;
 	for(int i = 0; i < n_samples; i++)
 		{
 			//using temp variables
 			gr_fxpt::sincos(phase_rad_i,&sin_f,&cos_f);
 			dest[i] = gr_complex(cos_f, -sin_f);
 			//using references (may be it can be a problem for c++11 standard
 			//gr_fxpt::sincos(phase_rad_i,&d_carr_sign[i].imag(),&d_carr_sign[i].real());
 			phase_rad_i += phase_step_rad_i;
 		}
 }
 void std_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad)
 {
 	float phase_rad;
 	phase_rad=start_phase_rad;
 	for(int i = 0; i < n_samples; i++)
 		{
 			// Using std::cos and std::sin
 			dest[i] = gr_complex(std::cos(phase_rad), -std::sin(phase_rad));
 	    	phase_rad=phase_rad+phase_step_rad;
 		}
 }
--- a/src/algorithms/libs/nco_lib.h
+++ b/src/algorithms/libs/nco_lib.h
@ -0,0 +1,55 @@
 /*!
 * \file nco_lib.h
 * \brief A set of Numeric Controlled Oscillator (NCO) functions to generate the carrier wipeoff signal,
 *  regardless of system used
 *
 * \author Javier Arribas 2012, jarribas(at)cttc.es
 *
 * Detailed description of the file here if needed.
 *
 * -------------------------------------------------------------------------
 *
 * Copyright (C) 2010-2012  (see AUTHORS file for a list of contributors)
 *
 * GNSS-SDR is a software defined Global Navigation
 *          Satellite Systems receiver
 *
 * This file is part of GNSS-SDR.
 *
 * GNSS-SDR is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * at your option) any later version.
 *
 * GNSS-SDR is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
 *
 * -------------------------------------------------------------------------
 */
 /*!
 * \brief Implements a conjugate complex exponential vector in std::complex<float> *d_carr_sign
 * containing int n_samples, with the starting phase .
 *
 */
 #ifndef NCO_LIB_CC_H
 #define	 NCO_LIB_CC_H
 #include <gr_fxpt.h>
 #include <xmmintrin.h>
 #include <sse_mathfun.h>
 #include <cmath>
 void sse_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad);
 void fxp_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad);
 void std_nco(std::complex<float> *dest, int n_samples,float start_phase_rad, float phase_step_rad);
 #endif //NCO_LIB_CC_H
--- a/src/algorithms/libs/sse_mathfun.h
+++ b/src/algorithms/libs/sse_mathfun.h
@ -0,0 +1,766 @@
 /* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log
   Inspired by Intel Approximate Math library, and based on the
   corresponding algorithms of the cephes math library
   The default is to use the SSE1 version. If you define USE_SSE2 the
   the SSE2 intrinsics will be used in place of the MMX intrinsics. Do
   not expect any significant performance improvement with SSE2.
 */
 /* Copyright (C) 2007  Julien Pommier
  This software is provided 'as-is', without any express or implied
  warranty.  In no event will the authors be held liable for any damages
  arising from the use of this software.
  Permission is granted to anyone to use this software for any purpose,
  including commercial applications, and to alter it and redistribute it
  freely, subject to the following restrictions:
  1. The origin of this software must not be misrepresented; you must not
     claim that you wrote the original software. If you use this software
     in a product, an acknowledgment in the product documentation would be
     appreciated but is not required.
  2. Altered source versions must be plainly marked as such, and must not be
     misrepresented as being the original software.
  3. This notice may not be removed or altered from any source distribution.
  (this is the zlib license)
 */
 #ifndef SSE_MATHFUN_H
 #define	 SSE_MATHFUN_H
 #include <xmmintrin.h>
 #define USE_SSE2
 /* yes I know, the top of this file is quite ugly */
 #ifdef _MSC_VER /* visual c++ */
 # define ALIGN16_BEG __declspec(align(16))
 # define ALIGN16_END
 #else /* gcc or icc */
 # define ALIGN16_BEG
 # define ALIGN16_END __attribute__((aligned(16)))
 #endif
 /* __m128 is ugly to write */
 typedef __m128 v4sf;  // vector of 4 float (sse1)
 #ifdef USE_SSE2
 # include <emmintrin.h>
 typedef __m128i v4si; // vector of 4 int (sse2)
 #else
 typedef __m64 v2si;   // vector of 2 int (mmx)
 #endif
 /* declare some SSE constants -- why can't I figure a better way to do that? */
 #define _PS_CONST(Name, Val)                                            \
  static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
 #define _PI32_CONST(Name, Val)                                            \
  static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
 #define _PS_CONST_TYPE(Name, Type, Val)                                 \
  static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
 _PS_CONST(1  , 1.0f);
 _PS_CONST(0p5, 0.5f);
 /* the smallest non denormalized float number */
 _PS_CONST_TYPE(min_norm_pos, int, 0x00800000);
 _PS_CONST_TYPE(mant_mask, int, 0x7f800000);
 _PS_CONST_TYPE(inv_mant_mask, int, ~0x7f800000);
 _PS_CONST_TYPE(sign_mask, int, 0x80000000);
 _PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000);
 _PI32_CONST(1, 1);
 _PI32_CONST(inv1, ~1);
 _PI32_CONST(2, 2);
 _PI32_CONST(4, 4);
 _PI32_CONST(0x7f, 0x7f);
 _PS_CONST(cephes_SQRTHF, 0.707106781186547524);
 _PS_CONST(cephes_log_p0, 7.0376836292E-2);
 _PS_CONST(cephes_log_p1, - 1.1514610310E-1);
 _PS_CONST(cephes_log_p2, 1.1676998740E-1);
 _PS_CONST(cephes_log_p3, - 1.2420140846E-1);
 _PS_CONST(cephes_log_p4, + 1.4249322787E-1);
 _PS_CONST(cephes_log_p5, - 1.6668057665E-1);
 _PS_CONST(cephes_log_p6, + 2.0000714765E-1);
 _PS_CONST(cephes_log_p7, - 2.4999993993E-1);
 _PS_CONST(cephes_log_p8, + 3.3333331174E-1);
 _PS_CONST(cephes_log_q1, -2.12194440e-4);
 _PS_CONST(cephes_log_q2, 0.693359375);
 #if defined (__MINGW32__)
 /* the ugly part below: many versions of gcc used to be completely buggy with respect to some intrinsics
   The movehl_ps is fixed in mingw 3.4.5, but I found out that all the _mm_cmp* intrinsics were completely
   broken on my mingw gcc 3.4.5 ...
   Note that the bug on _mm_cmp* does occur only at -O0 optimization level
 */
 inline __m128 my_movehl_ps(__m128 a, const __m128 b) {
 	asm (
 			"movhlps %2,%0\n\t"
 			: "=x" (a)
 			: "0" (a), "x"(b)
 	    );
 	return a;                                 }
 #warning "redefined _mm_movehl_ps (see gcc bug 21179)"
 #define _mm_movehl_ps my_movehl_ps
 inline __m128 my_cmplt_ps(__m128 a, const __m128 b) {
 	asm (
 			"cmpltps %2,%0\n\t"
 			: "=x" (a)
 			: "0" (a), "x"(b)
 	    );
 	return a;
                  }
 inline __m128 my_cmpgt_ps(__m128 a, const __m128 b) {
 	asm (
 			"cmpnleps %2,%0\n\t"
 			: "=x" (a)
 			: "0" (a), "x"(b)
 	    );
 	return a;
 }
 inline __m128 my_cmpeq_ps(__m128 a, const __m128 b) {
 	asm (
 			"cmpeqps %2,%0\n\t"
 			: "=x" (a)
 			: "0" (a), "x"(b)
 	    );
 	return a;
 }
 #warning "redefined _mm_cmpxx_ps functions..."
 #define _mm_cmplt_ps my_cmplt_ps
 #define _mm_cmpgt_ps my_cmpgt_ps
 #define _mm_cmpeq_ps my_cmpeq_ps
 #endif
 #ifndef USE_SSE2
 typedef union xmm_mm_union {
  __m128 xmm;
  __m64 mm[2];
 } xmm_mm_union;
 #define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) {          \
    xmm_mm_union u; u.xmm = xmm_;                   \
    mm0_ = u.mm[0];                                 \
    mm1_ = u.mm[1];                                 \
 }
 #define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) {                         \
    xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm;      \
  }
 #endif // USE_SSE2
 /* natural logarithm computed for 4 simultaneous float
   return NaN for x <= 0
 */
 inline v4sf log_ps(v4sf x) {
 #ifdef USE_SSE2
  v4si emm0;
 #else
  v2si mm0, mm1;
 #endif
  v4sf one = *(v4sf*)_ps_1;
  v4sf invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
  x = _mm_max_ps(x, *(v4sf*)_ps_min_norm_pos);  /* cut off denormalized stuff */
 #ifndef USE_SSE2
  /* part 1: x = frexpf(x, &e); */
  COPY_XMM_TO_MM(x, mm0, mm1);
  mm0 = _mm_srli_pi32(mm0, 23);
  mm1 = _mm_srli_pi32(mm1, 23);
 #else
  emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
 #endif
  /* keep only the fractional part */
  x = _mm_and_ps(x, *(v4sf*)_ps_inv_mant_mask);
  x = _mm_or_ps(x, *(v4sf*)_ps_0p5);
 #ifndef USE_SSE2
  /* now e=mm0:mm1 contain the really base-2 exponent */
  mm0 = _mm_sub_pi32(mm0, *(v2si*)_pi32_0x7f);
  mm1 = _mm_sub_pi32(mm1, *(v2si*)_pi32_0x7f);
  v4sf e = _mm_cvtpi32x2_ps(mm0, mm1);
  _mm_empty(); /* bye bye mmx */
 #else
  emm0 = _mm_sub_epi32(emm0, *(v4si*)_pi32_0x7f);
  v4sf e = _mm_cvtepi32_ps(emm0);
 #endif
  e = _mm_add_ps(e, one);
  /* part2:
     if( x < SQRTHF ) {
       e -= 1;
       x = x + x - 1.0;
     } else { x = x - 1.0; }
  */
  v4sf mask = _mm_cmplt_ps(x, *(v4sf*)_ps_cephes_SQRTHF);
  v4sf tmp = _mm_and_ps(x, mask);
  x = _mm_sub_ps(x, one);
  e = _mm_sub_ps(e, _mm_and_ps(one, mask));
  x = _mm_add_ps(x, tmp);
  v4sf z = _mm_mul_ps(x,x);
  v4sf y = *(v4sf*)_ps_cephes_log_p0;
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p1);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p2);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p3);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p4);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p5);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p6);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p7);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p8);
  y = _mm_mul_ps(y, x);
  y = _mm_mul_ps(y, z);
  tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q1);
  y = _mm_add_ps(y, tmp);
  tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
  y = _mm_sub_ps(y, tmp);
  tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q2);
  x = _mm_add_ps(x, y);
  x = _mm_add_ps(x, tmp);
  x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
  return x;
 }
 _PS_CONST(exp_hi,	88.3762626647949f);
 _PS_CONST(exp_lo,	-88.3762626647949f);
 _PS_CONST(cephes_LOG2EF, 1.44269504088896341);
 _PS_CONST(cephes_exp_C1, 0.693359375);
 _PS_CONST(cephes_exp_C2, -2.12194440e-4);
 _PS_CONST(cephes_exp_p0, 1.9875691500E-4);
 _PS_CONST(cephes_exp_p1, 1.3981999507E-3);
 _PS_CONST(cephes_exp_p2, 8.3334519073E-3);
 _PS_CONST(cephes_exp_p3, 4.1665795894E-2);
 _PS_CONST(cephes_exp_p4, 1.6666665459E-1);
 _PS_CONST(cephes_exp_p5, 5.0000001201E-1);
 inline v4sf exp_ps(v4sf x) {
  v4sf tmp = _mm_setzero_ps(), fx;
 #ifdef USE_SSE2
  v4si emm0;
 #else
  v2si mm0, mm1;
 #endif
  v4sf one = *(v4sf*)_ps_1;
  x = _mm_min_ps(x, *(v4sf*)_ps_exp_hi);
  x = _mm_max_ps(x, *(v4sf*)_ps_exp_lo);
  /* express exp(x) as exp(g + n*log(2)) */
  fx = _mm_mul_ps(x, *(v4sf*)_ps_cephes_LOG2EF);
  fx = _mm_add_ps(fx, *(v4sf*)_ps_0p5);
  /* how to perform a floorf with SSE: just below */
 #ifndef USE_SSE2
  /* step 1 : cast to int */
  tmp = _mm_movehl_ps(tmp, fx);
  mm0 = _mm_cvttps_pi32(fx);
  mm1 = _mm_cvttps_pi32(tmp);
  /* step 2 : cast back to float */
  tmp = _mm_cvtpi32x2_ps(mm0, mm1);
 #else
  emm0 = _mm_cvttps_epi32(fx);
  tmp  = _mm_cvtepi32_ps(emm0);
 #endif
  /* if greater, substract 1 */
  v4sf mask = _mm_cmpgt_ps(tmp, fx);
  mask = _mm_and_ps(mask, one);
  fx = _mm_sub_ps(tmp, mask);
  tmp = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C1);
  v4sf z = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C2);
  x = _mm_sub_ps(x, tmp);
  x = _mm_sub_ps(x, z);
  z = _mm_mul_ps(x,x);
  v4sf y = *(v4sf*)_ps_cephes_exp_p0;
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p1);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p2);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p3);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p4);
  y = _mm_mul_ps(y, x);
  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p5);
  y = _mm_mul_ps(y, z);
  y = _mm_add_ps(y, x);
  y = _mm_add_ps(y, one);
  /* build 2^n */
 #ifndef USE_SSE2
  z = _mm_movehl_ps(z, fx);
  mm0 = _mm_cvttps_pi32(fx);
  mm1 = _mm_cvttps_pi32(z);
  mm0 = _mm_add_pi32(mm0, *(v2si*)_pi32_0x7f);
  mm1 = _mm_add_pi32(mm1, *(v2si*)_pi32_0x7f);
  mm0 = _mm_slli_pi32(mm0, 23);
  mm1 = _mm_slli_pi32(mm1, 23);
  v4sf pow2n;
  COPY_MM_TO_XMM(mm0, mm1, pow2n);
  _mm_empty();
 #else
  emm0 = _mm_cvttps_epi32(fx);
  emm0 = _mm_add_epi32(emm0, *(v4si*)_pi32_0x7f);
  emm0 = _mm_slli_epi32(emm0, 23);
  v4sf pow2n = _mm_castsi128_ps(emm0);
 #endif
  y = _mm_mul_ps(y, pow2n);
  return y;
 }
 _PS_CONST(minus_cephes_DP1, -0.78515625);
 _PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
 _PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
 _PS_CONST(sincof_p0, -1.9515295891E-4);
 _PS_CONST(sincof_p1,  8.3321608736E-3);
 _PS_CONST(sincof_p2, -1.6666654611E-1);
 _PS_CONST(coscof_p0,  2.443315711809948E-005);
 _PS_CONST(coscof_p1, -1.388731625493765E-003);
 _PS_CONST(coscof_p2,  4.166664568298827E-002);
 _PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
 /* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
   it runs also on old athlons XPs and the pentium III of your grand
   mother.
   The code is the exact rewriting of the cephes sinf function.
   Precision is excellent as long as x < 8192 (I did not bother to
   take into account the special handling they have for greater values
   -- it does not return garbage for arguments over 8192, though, but
   the extra precision is missing).
   Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
   surprising but correct result.
   Performance is also surprisingly good, 1.33 times faster than the
   macos vsinf SSE2 function, and 1.5 times faster than the
   __vrs4_sinf of amd's ACML (which is only available in 64 bits). Not
   too bad for an SSE1 function (with no special tuning) !
   However the latter libraries probably have a much better handling of NaN,
   Inf, denormalized and other special arguments..
   On my core 1 duo, the execution of this function takes approximately 95 cycles.
   From what I have observed on the experiments with Intel AMath lib, switching to an
   SSE2 version would improve the perf by only 10%.
   Since it is based on SSE intrinsics, it has to be compiled at -O2 to
   deliver full speed.
 */
 inline v4sf sin_ps(v4sf x) { // any x
  v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
 #ifdef USE_SSE2
  v4si emm0, emm2;
 #else
  v2si mm0, mm1, mm2, mm3;
 #endif
  sign_bit = x;
  /* take the absolute value */
  x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
  /* extract the sign bit (upper one) */
  sign_bit = _mm_and_ps(sign_bit, *(v4sf*)_ps_sign_mask);
  /* scale by 4/Pi */
  y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
  //printf("plop:"); print4(y);
 #ifdef USE_SSE2
  /* store the integer part of y in mm0 */
  emm2 = _mm_cvttps_epi32(y);
  /* j=(j+1) & (~1) (see the cephes sources) */
  emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
  y = _mm_cvtepi32_ps(emm2);
  /* get the swap sign flag */
  emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
  emm0 = _mm_slli_epi32(emm0, 29);
  /* get the polynom selection mask
     there is one polynom for 0 <= x <= Pi/4
     and another one for Pi/4<x<=Pi/2
     Both branches will be computed.
  */
  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
  v4sf swap_sign_bit = _mm_castsi128_ps(emm0);
  v4sf poly_mask = _mm_castsi128_ps(emm2);
  sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
 #else
  /* store the integer part of y in mm0:mm1 */
  xmm2 = _mm_movehl_ps(xmm2, y);
  mm2 = _mm_cvttps_pi32(y);
  mm3 = _mm_cvttps_pi32(xmm2);
  /* j=(j+1) & (~1) (see the cephes sources) */
  mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
  mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
  y = _mm_cvtpi32x2_ps(mm2, mm3);
  /* get the swap sign flag */
  mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
  mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
  mm0 = _mm_slli_pi32(mm0, 29);
  mm1 = _mm_slli_pi32(mm1, 29);
  /* get the polynom selection mask */
  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
  mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
  mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
  v4sf swap_sign_bit, poly_mask;
  COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit);
  COPY_MM_TO_XMM(mm2, mm3, poly_mask);
  sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
  _mm_empty(); /* good-bye mmx */
 #endif
  /* The magic pass: "Extended precision modular arithmetic"
     x = ((x - y * DP1) - y * DP2) - y * DP3; */
  xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
  xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
  xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
  xmm1 = _mm_mul_ps(y, xmm1);
  xmm2 = _mm_mul_ps(y, xmm2);
  xmm3 = _mm_mul_ps(y, xmm3);
  x = _mm_add_ps(x, xmm1);
  x = _mm_add_ps(x, xmm2);
  x = _mm_add_ps(x, xmm3);
  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
  y = *(v4sf*)_ps_coscof_p0;
  v4sf z = _mm_mul_ps(x,x);
  y = _mm_mul_ps(y, z);
  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
  y = _mm_mul_ps(y, z);
  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
  y = _mm_mul_ps(y, z);
  y = _mm_mul_ps(y, z);
  v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
  y = _mm_sub_ps(y, tmp);
  y = _mm_add_ps(y, *(v4sf*)_ps_1);
  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
  v4sf y2 = *(v4sf*)_ps_sincof_p0;
  y2 = _mm_mul_ps(y2, z);
  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
  y2 = _mm_mul_ps(y2, z);
  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
  y2 = _mm_mul_ps(y2, z);
  y2 = _mm_mul_ps(y2, x);
  y2 = _mm_add_ps(y2, x);
  /* select the correct result from the two polynoms */
  xmm3 = poly_mask;
  y2 = _mm_and_ps(xmm3, y2); //, xmm3);
  y = _mm_andnot_ps(xmm3, y);
  y = _mm_add_ps(y,y2);
  /* update the sign */
  y = _mm_xor_ps(y, sign_bit);
  return y;
 }
 /* almost the same as sin_ps */
 inline v4sf cos_ps(v4sf x) { // any x
  v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
 #ifdef USE_SSE2
  v4si emm0, emm2;
 #else
  v2si mm0, mm1, mm2, mm3;
 #endif
  /* take the absolute value */
  x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
  /* scale by 4/Pi */
  y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
 #ifdef USE_SSE2
  /* store the integer part of y in mm0 */
  emm2 = _mm_cvttps_epi32(y);
  /* j=(j+1) & (~1) (see the cephes sources) */
  emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
  y = _mm_cvtepi32_ps(emm2);
  emm2 = _mm_sub_epi32(emm2, *(v4si*)_pi32_2);
  /* get the swap sign flag */
  emm0 = _mm_andnot_si128(emm2, *(v4si*)_pi32_4);
  emm0 = _mm_slli_epi32(emm0, 29);
  /* get the polynom selection mask */
  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
  v4sf sign_bit = _mm_castsi128_ps(emm0);
  v4sf poly_mask = _mm_castsi128_ps(emm2);
 #else
  /* store the integer part of y in mm0:mm1 */
  xmm2 = _mm_movehl_ps(xmm2, y);
  mm2 = _mm_cvttps_pi32(y);
  mm3 = _mm_cvttps_pi32(xmm2);
  /* j=(j+1) & (~1) (see the cephes sources) */
  mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
  mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
  y = _mm_cvtpi32x2_ps(mm2, mm3);
  mm2 = _mm_sub_pi32(mm2, *(v2si*)_pi32_2);
  mm3 = _mm_sub_pi32(mm3, *(v2si*)_pi32_2);
  /* get the swap sign flag in mm0:mm1 and the
     polynom selection mask in mm2:mm3 */
  mm0 = _mm_andnot_si64(mm2, *(v2si*)_pi32_4);
  mm1 = _mm_andnot_si64(mm3, *(v2si*)_pi32_4);
  mm0 = _mm_slli_pi32(mm0, 29);
  mm1 = _mm_slli_pi32(mm1, 29);
  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
  mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
  mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
  v4sf sign_bit, poly_mask;
  COPY_MM_TO_XMM(mm0, mm1, sign_bit);
  COPY_MM_TO_XMM(mm2, mm3, poly_mask);
  _mm_empty(); /* good-bye mmx */
 #endif
  /* The magic pass: "Extended precision modular arithmetic"
     x = ((x - y * DP1) - y * DP2) - y * DP3; */
  xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
  xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
  xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
  xmm1 = _mm_mul_ps(y, xmm1);
  xmm2 = _mm_mul_ps(y, xmm2);
  xmm3 = _mm_mul_ps(y, xmm3);
  x = _mm_add_ps(x, xmm1);
  x = _mm_add_ps(x, xmm2);
  x = _mm_add_ps(x, xmm3);
  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
  y = *(v4sf*)_ps_coscof_p0;
  v4sf z = _mm_mul_ps(x,x);
  y = _mm_mul_ps(y, z);
  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
  y = _mm_mul_ps(y, z);
  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
  y = _mm_mul_ps(y, z);
  y = _mm_mul_ps(y, z);
  v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
  y = _mm_sub_ps(y, tmp);
  y = _mm_add_ps(y, *(v4sf*)_ps_1);
  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
  v4sf y2 = *(v4sf*)_ps_sincof_p0;
  y2 = _mm_mul_ps(y2, z);
  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
  y2 = _mm_mul_ps(y2, z);
  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
  y2 = _mm_mul_ps(y2, z);
  y2 = _mm_mul_ps(y2, x);
  y2 = _mm_add_ps(y2, x);
  /* select the correct result from the two polynoms */
  xmm3 = poly_mask;
  y2 = _mm_and_ps(xmm3, y2); //, xmm3);
  y = _mm_andnot_ps(xmm3, y);
  y = _mm_add_ps(y,y2);
  /* update the sign */
  y = _mm_xor_ps(y, sign_bit);
  return y;
 }
 /* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
   it is almost as fast, and gives you a free cosine with your sine */
 inline void sincos_ps(v4sf x, v4sf *s, v4sf *c) {
  v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
 #ifdef USE_SSE2
  v4si emm0, emm2, emm4;
 #else
  v2si mm0, mm1, mm2, mm3, mm4, mm5;
 #endif
  sign_bit_sin = x;
  /* take the absolute value */
  x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
  /* extract the sign bit (upper one) */
  sign_bit_sin = _mm_and_ps(sign_bit_sin, *(v4sf*)_ps_sign_mask);
  /* scale by 4/Pi */
  y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
 #ifdef USE_SSE2
  /* store the integer part of y in emm2 */
  emm2 = _mm_cvttps_epi32(y);
  /* j=(j+1) & (~1) (see the cephes sources) */
  emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
  y = _mm_cvtepi32_ps(emm2);
  emm4 = emm2;
  /* get the swap sign flag for the sine */
  emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
  emm0 = _mm_slli_epi32(emm0, 29);
  v4sf swap_sign_bit_sin = _mm_castsi128_ps(emm0);
  /* get the polynom selection mask for the sine*/
  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
  v4sf poly_mask = _mm_castsi128_ps(emm2);
 #else
  /* store the integer part of y in mm2:mm3 */
  xmm3 = _mm_movehl_ps(xmm3, y);
  mm2 = _mm_cvttps_pi32(y);
  mm3 = _mm_cvttps_pi32(xmm3);
  /* j=(j+1) & (~1) (see the cephes sources) */
  mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
  mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
  y = _mm_cvtpi32x2_ps(mm2, mm3);
  mm4 = mm2;
  mm5 = mm3;
  /* get the swap sign flag for the sine */
  mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
  mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
  mm0 = _mm_slli_pi32(mm0, 29);
  mm1 = _mm_slli_pi32(mm1, 29);
  v4sf swap_sign_bit_sin;
  COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit_sin);
  /* get the polynom selection mask for the sine */
  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
  mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
  mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
  v4sf poly_mask;
  COPY_MM_TO_XMM(mm2, mm3, poly_mask);
 #endif
  /* The magic pass: "Extended precision modular arithmetic"
     x = ((x - y * DP1) - y * DP2) - y * DP3; */
  xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
  xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
  xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
  xmm1 = _mm_mul_ps(y, xmm1);
  xmm2 = _mm_mul_ps(y, xmm2);
  xmm3 = _mm_mul_ps(y, xmm3);
  x = _mm_add_ps(x, xmm1);
  x = _mm_add_ps(x, xmm2);
  x = _mm_add_ps(x, xmm3);
 #ifdef USE_SSE2
  emm4 = _mm_sub_epi32(emm4, *(v4si*)_pi32_2);
  emm4 = _mm_andnot_si128(emm4, *(v4si*)_pi32_4);
  emm4 = _mm_slli_epi32(emm4, 29);
  v4sf sign_bit_cos = _mm_castsi128_ps(emm4);
 #else
  /* get the sign flag for the cosine */
  mm4 = _mm_sub_pi32(mm4, *(v2si*)_pi32_2);
  mm5 = _mm_sub_pi32(mm5, *(v2si*)_pi32_2);
  mm4 = _mm_andnot_si64(mm4, *(v2si*)_pi32_4);
  mm5 = _mm_andnot_si64(mm5, *(v2si*)_pi32_4);
  mm4 = _mm_slli_pi32(mm4, 29);
  mm5 = _mm_slli_pi32(mm5, 29);
  v4sf sign_bit_cos;
  COPY_MM_TO_XMM(mm4, mm5, sign_bit_cos);
  _mm_empty(); /* good-bye mmx */
 #endif
  sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
  v4sf z = _mm_mul_ps(x,x);
  y = *(v4sf*)_ps_coscof_p0;
  y = _mm_mul_ps(y, z);
  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
  y = _mm_mul_ps(y, z);
  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
  y = _mm_mul_ps(y, z);
  y = _mm_mul_ps(y, z);
  v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
  y = _mm_sub_ps(y, tmp);
  y = _mm_add_ps(y, *(v4sf*)_ps_1);
  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
  v4sf y2 = *(v4sf*)_ps_sincof_p0;
  y2 = _mm_mul_ps(y2, z);
  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
  y2 = _mm_mul_ps(y2, z);
  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
  y2 = _mm_mul_ps(y2, z);
  y2 = _mm_mul_ps(y2, x);
  y2 = _mm_add_ps(y2, x);
  /* select the correct result from the two polynoms */
  xmm3 = poly_mask;
  v4sf ysin2 = _mm_and_ps(xmm3, y2);
  v4sf ysin1 = _mm_andnot_ps(xmm3, y);
  y2 = _mm_sub_ps(y2,ysin2);
  y = _mm_sub_ps(y, ysin1);
  xmm1 = _mm_add_ps(ysin1,ysin2);
  xmm2 = _mm_add_ps(y,y2);
  /* update the sign */
  *s = _mm_xor_ps(xmm1, sign_bit_sin);
  *c = _mm_xor_ps(xmm2, sign_bit_cos);
 }
 #endif
--- a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_optim_tracking_cc.cc
+++ b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_optim_tracking_cc.cc
@ -40,16 +40,15 @@
 #include "tracking_discriminators.h"
 #include "lock_detectors.h"
 #include "GPS_L1_CA.h"
 #include "nco_lib.h"
 #include "control_message_factory.h"
 #include <boost/lexical_cast.hpp>
 #include <iostream>
 #include <sstream>
 #include <cmath>
 #include "math.h"
 #include <gnuradio/gr_io_signature.h>
 #include <glog/log_severity.h>
 #include <glog/logging.h>
-#include <gr_fxpt.h>
+
 /*!
 * \todo Include in definition header file
@ -59,7 +58,6 @@
 #define MAXIMUM_LOCK_FAIL_COUNTER 50
 #define CARRIER_LOCK_THRESHOLD 0.85
 using google::LogMessage;
 gps_l1_ca_dll_pll_optim_tracking_cc_sptr
@ -126,9 +124,6 @@ Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(
    d_ca_code = new gr_complex[(int)GPS_L1_CA_CODE_LENGTH_CHIPS + 2];
    d_carr_sign = new gr_complex[d_vector_length*2];
    /* If an array is partitioned for more than one thread to operate on,
     * having the sub-array boundaries unaligned to cache lines could lead
     * to performance degradation. Here we allocate memory
@ -149,7 +144,7 @@ Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(
    //--- Perform initializations ------------------------------
    // define initial code frequency basis of NCO
-    d_code_freq_hz = GPS_L1_CA_CODE_RATE_HZ;
+    d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ;
    // define residual code phase (in chips)
    d_rem_code_phase_samples = 0.0;
    // define residual carrier phase
@ -204,12 +199,12 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking()
    float T_chip_mod_seconds;
    float T_prn_mod_seconds;
    float T_prn_mod_samples;
-    d_code_freq_hz = radial_velocity * GPS_L1_CA_CODE_RATE_HZ;
+    d_code_freq_chips = radial_velocity * GPS_L1_CA_CODE_RATE_HZ;
-    T_chip_mod_seconds = 1/d_code_freq_hz;
+    T_chip_mod_seconds = 1/d_code_freq_chips;
    T_prn_mod_seconds = T_chip_mod_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
    T_prn_mod_samples = T_prn_mod_seconds * (float)d_fs_in;
-    d_next_prn_length_samples = round(T_prn_mod_samples);
+    d_current_prn_length_samples = round(T_prn_mod_samples);
    float T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ;
    float T_prn_true_samples = T_prn_true_seconds * (float)d_fs_in;
@ -242,22 +237,25 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking()
    // Experimental: pre-sampled local signal replica at nominal code frequency.
    // No code doppler correction
-    // unified loop for E, P, L code vectors
+    double tcode_chips;
    double code_phase_step_chips;
    code_phase_step_chips = (GPS_L1_CA_CODE_RATE_HZ / (double)d_fs_in);
    double tcode_chips = 0;
    // Alternative EPL code generation (40% of speed improvement!)
    int early_late_spc_samples;
    early_late_spc_samples=round(d_early_late_spc_chips/code_phase_step_chips);
    double epl_loop_length_samples;
    epl_loop_length_samples=d_current_prn_length_samples+early_late_spc_samples*2;
    int associated_chip_index;
    int code_length_chips = (int)GPS_L1_CA_CODE_LENGTH_CHIPS;
    double code_phase_step_chips;
 	int early_late_spc_samples;
 	int epl_loop_length_samples;
    // unified loop for E, P, L code vectors
    code_phase_step_chips = ((double)d_code_freq_chips) / ((double)d_fs_in);
    tcode_chips = 0;
    // Alternative EPL code generation (40% of speed improvement!)
    early_late_spc_samples=round(d_early_late_spc_chips/code_phase_step_chips);
    epl_loop_length_samples=d_current_prn_length_samples+early_late_spc_samples*2;
    for (int i=0; i<epl_loop_length_samples; i++)
 		{
    	    associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips));
            d_early_code[i] = d_ca_code[associated_chip_index];
-            tcode_chips = tcode_chips + d_code_phase_step_chips;
+            tcode_chips = tcode_chips + code_phase_step_chips;
 		}
    memcpy(d_prompt_code,&d_early_code[early_late_spc_samples],d_current_prn_length_samples* sizeof(gr_complex));
@ -268,8 +266,6 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking()
    d_carrier_lock_fail_counter = 0;
    d_rem_code_phase_samples = 0;
    d_rem_carr_phase_rad = 0;
    d_rem_code_phase_samples = 0;
    d_next_rem_code_phase_samples = 0;
    d_acc_carrier_phase_rad = 0;
    d_code_phase_samples = d_acq_code_phase_samples;
@ -291,6 +287,7 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking()
 }
 void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_code()
 {
    double tcode_chips;
@ -302,18 +299,18 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_code()
 	int epl_loop_length_samples;
    // unified loop for E, P, L code vectors
-    code_phase_step_chips = ((double)d_code_freq_hz) / ((double)d_fs_in);
+    code_phase_step_chips = ((double)d_code_freq_chips) / ((double)d_fs_in);
-    rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_hz / d_fs_in);
+    rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
    tcode_chips = -rem_code_phase_chips;
-    // Alternative EPL code generation (40% of speed improvement!)
+    //EPL code generation
    early_late_spc_samples=round(d_early_late_spc_chips/code_phase_step_chips);
    epl_loop_length_samples=d_current_prn_length_samples+early_late_spc_samples*2;
    for (int i=0; i<epl_loop_length_samples; i++)
 		{
    	    associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips));
            d_early_code[i] = d_ca_code[associated_chip_index];
-            tcode_chips = tcode_chips + d_code_phase_step_chips;
+            tcode_chips = tcode_chips + code_phase_step_chips;
 		}
    memcpy(d_prompt_code,&d_early_code[early_late_spc_samples],d_current_prn_length_samples* sizeof(gr_complex));
@ -321,43 +318,14 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_code()
 }
 void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_carrier()
 {
-    float phase_rad, phase_step_rad;
+    float phase_step_rad;
    phase_step_rad = (float)GPS_TWO_PI*d_carrier_doppler_hz / (float)d_fs_in;
-    phase_rad = d_rem_carr_phase_rad;
+    fxp_nco(d_carr_sign, d_current_prn_length_samples,d_rem_carr_phase_rad, phase_step_rad);
-
+    //sse_nco(d_carr_sign, d_current_prn_length_samples,d_rem_carr_phase_rad, phase_step_rad);
    int phase_rad_i;
    phase_rad_i=gr_fxpt::float_to_fixed(phase_rad);
    int phase_step_rad_i;
    phase_step_rad_i=gr_fxpt::float_to_fixed(phase_step_rad);
    float sin_f,cos_f;
    for(int i = 0; i < d_current_prn_length_samples; i++)
        {
    		//using temp variables
    		gr_fxpt::sincos(phase_rad_i,&sin_f,&cos_f);
    	    d_carr_sign[i] = gr_complex(cos_f, -sin_f);
 			//using references (may be it can be a problem for c++11 standard
 			//gr_fxpt::sincos(phase_rad_i,&d_carr_sign[i].imag(),&d_carr_sign[i].real());
            phase_rad_i += phase_step_rad_i;
            // Using std::cos and std::sin
    		//d_carr_sign[i] = gr_complex(cos(phase_rad), -sin(phase_rad));
        }
    d_rem_carr_phase_rad = fmod(gr_fxpt::fixed_to_float(phase_rad_i), GPS_TWO_PI);
    d_acc_carrier_phase_rad = d_acc_carrier_phase_rad + d_rem_carr_phase_rad;
 }
 Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::~Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc()
 {
    d_dump_file.close();
@ -384,51 +352,49 @@ int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vec
 {
    // process vars
-    float carr_error;
+    float carr_error_hz;
-    float carr_nco;
+    float carr_error_filt_hz;
-    float code_error;
+    float code_error_chips;
-    float code_nco;
+    float code_error_filt_chips;
    if (d_enable_tracking == true)
        {
-            /*
+        /*
-             * Receiver signal alignment
+         * Receiver signal alignment
-             */
+         */
-            if (d_pull_in == true)
+        if (d_pull_in == true)
-                {
+            {
-                    int samples_offset;
+                int samples_offset;
-                    // 28/11/2011 ACQ to TRK transition BUG CORRECTION
+                // 28/11/2011 ACQ to TRK transition BUG CORRECTION
-                    float acq_trk_shif_correction_samples;
+                float acq_trk_shif_correction_samples;
-                    int acq_to_trk_delay_samples;
+                int acq_to_trk_delay_samples;
-                    acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp;
+                acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp;
-                    acq_trk_shif_correction_samples = d_next_prn_length_samples - fmod((float)acq_to_trk_delay_samples, (float)d_next_prn_length_samples);
+                acq_trk_shif_correction_samples = d_current_prn_length_samples - fmod((float)acq_to_trk_delay_samples, (float)d_current_prn_length_samples);
-                    //std::cout<<"acq_trk_shif_correction="<<acq_trk_shif_correction_samples<<"\r\n";
+                //std::cout<<"acq_trk_shif_correction="<<acq_trk_shif_correction_samples<<"\r\n";
-                    samples_offset = round(d_acq_code_phase_samples + acq_trk_shif_correction_samples);
+                samples_offset = round(d_acq_code_phase_samples + acq_trk_shif_correction_samples);
-                    // /todo: Check if the sample counter sent to the next block as a time reference should be incremented AFTER sended or BEFORE
+                // /todo: Check if the sample counter sent to the next block as a time reference should be incremented AFTER sended or BEFORE
-                    //d_sample_counter_seconds = d_sample_counter_seconds + (((double)samples_offset) / (double)d_fs_in);
+                //d_sample_counter_seconds = d_sample_counter_seconds + (((double)samples_offset) / (double)d_fs_in);
-                    d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples
+                d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples
-                    d_pull_in = false;
+                d_pull_in = false;
-                    //std::cout<<" samples_offset="<<samples_offset<<"\r\n";
+                //std::cout<<" samples_offset="<<samples_offset<<"\r\n";
-                    consume_each(samples_offset); //shift input to perform alignement with local replica
+                consume_each(samples_offset); //shift input to perform alignement with local replica
-                    return 1;
+                return 1;
-                }
+            }
 			// GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
 			Gnss_Synchro current_synchro_data;
 			// Fill the acquisition data
 			current_synchro_data = *d_acquisition_gnss_synchro;
-            // GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
+			// Block input data and block output stream pointers
-            Gnss_Synchro current_synchro_data;
+			const gr_complex* in = (gr_complex*) input_items[0]; //PRN start block alignement
-            // Fill the acquisition data
+			Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0];
            current_synchro_data = *d_acquisition_gnss_synchro;
-            const gr_complex* in = (gr_complex*) input_items[0]; //PRN start block alignement
+			// Generate local code and carrier replicas (using \hat{f}_d(k-1))
            Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0];
-            // Update the prn length based on code freq (variable) and
+			//update_local_code(); //disabled in the speed optimized tracking!
            // sampling frequency (fixed)
            // variable code PRN sample block size
            d_current_prn_length_samples = d_next_prn_length_samples;
-            update_local_code();
+			update_local_carrier();
            update_local_carrier();
            // perform Early, Prompt and Late correlation
            d_correlator.Carrier_wipeoff_and_EPL_volk(d_current_prn_length_samples,
@ -442,111 +408,91 @@ int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vec
                    d_Late,
                    is_unaligned());
-            // check for samples consistency (this should be done before in the receiver / here only if the source is a file)
+            // ################## PLL ##########################################################
-            if (std::isnan((*d_Prompt).real()) == true or std::isnan((*d_Prompt).imag()) == true )// or std::isinf(in[i].real())==true or std::isinf(in[i].imag())==true)
+            // PLL discriminator
-			{
+            carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / (float)GPS_TWO_PI;
-				const int samples_available = ninput_items[0];
+            // Carrier discriminator filter
-				d_sample_counter = d_sample_counter + samples_available;
+            carr_error_filt_hz = d_carrier_loop_filter.get_carrier_nco(carr_error_hz);
-				LOG_AT_LEVEL(WARNING) << "Detected NaN samples at sample number " << d_sample_counter;
+            // New carrier Doppler frequency estimation
-				consume_each(samples_available);
+            d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_error_filt_hz;
            // New code Doppler frequency estimation
            d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ);
            //carrier phase accumulator for (K) doppler estimation
            d_acc_carrier_phase_rad=d_acc_carrier_phase_rad+GPS_TWO_PI*d_carrier_doppler_hz*GPS_L1_CA_CODE_PERIOD;
            //remanent carrier phase to prevent overflow in the code NCO
            d_rem_carr_phase_rad=d_rem_carr_phase_rad+GPS_TWO_PI*d_carrier_doppler_hz*GPS_L1_CA_CODE_PERIOD;
            d_rem_carr_phase_rad=fmod(d_rem_carr_phase_rad,GPS_TWO_PI);
-				// make an output to not stop the rest of the processing blocks
+            // ################## DLL ##########################################################
-	            current_synchro_data.Prompt_I=0.0;
+            // DLL discriminator
-	            current_synchro_data.Prompt_Q=0.0;
+            code_error_chips =dll_nc_e_minus_l_normalized(*d_Early, *d_Late); //[chips/Ti]
-	            current_synchro_data.Tracking_timestamp_secs=(double)d_sample_counter/(double)d_fs_in;
+            // Code discriminator filter
-	            current_synchro_data.Carrier_phase_rads=0.0;
+            code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips); //[chips/second]
-	            current_synchro_data.Code_phase_secs=0.0;
+            //Code phase accumulator
-	            current_synchro_data.CN0_dB_hz=0.0;
+            float code_error_filt_secs;
-	            current_synchro_data.Flag_valid_tracking=false;
+            code_error_filt_secs=(GPS_L1_CA_CODE_PERIOD*code_error_filt_chips)/GPS_L1_CA_CODE_RATE_HZ; //[seconds]
            d_acc_code_phase_secs=d_acc_code_phase_secs+code_error_filt_secs;
 	            *out[0] =current_synchro_data;
-				return 1;
+            // ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT #######################
-			}
+            // keep alignment parameters for the next input buffer
            // Compute PLL error and update carrier NCO -
            carr_error = pll_cloop_two_quadrant_atan(*d_Prompt) / (float)GPS_TWO_PI;
            // Implement carrier loop filter and generate NCO command
            carr_nco = d_carrier_loop_filter.get_carrier_nco(carr_error);
            // Modify carrier freq based on NCO command
            d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_nco;
            // Compute DLL error and update code NCO
            code_error = dll_nc_e_minus_l_normalized(*d_Early, *d_Late);
            // Implement code loop filter and generate NCO command
            code_nco = d_code_loop_filter.get_code_nco(code_error);
            // Modify code freq based on NCO command
            d_code_freq_hz = GPS_L1_CA_CODE_RATE_HZ - code_nco;
            // Update the phasestep based on code freq (variable) and
            // sampling frequency (fixed)
            d_code_phase_step_chips = d_code_freq_hz / (float)d_fs_in; //[chips]
            // variable code PRN sample block size
            float T_chip_seconds;
            float T_prn_seconds;
            float T_prn_samples;
            float K_blk_samples;
-            T_chip_seconds = 1 / d_code_freq_hz;
+            // Compute the next buffer lenght based in the new period of the PRN sequence and the code phase error estimation
            T_chip_seconds = 1 / d_code_freq_chips;
            T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
-            T_prn_samples = T_prn_seconds * d_fs_in;
+            T_prn_samples = T_prn_seconds * (float)d_fs_in;
-            d_rem_code_phase_samples = d_next_rem_code_phase_samples;
+            K_blk_samples = T_prn_samples + d_rem_code_phase_samples + code_error_filt_secs*(float)d_fs_in;
-            K_blk_samples = T_prn_samples + d_rem_code_phase_samples;
+            d_current_prn_length_samples = round(K_blk_samples); //round to a discrete samples
-            d_next_prn_length_samples = round(K_blk_samples); //round to a discrete samples
+            d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
-            d_next_rem_code_phase_samples = K_blk_samples - d_next_prn_length_samples; //rounding error
+
            /*!
             * \todo Improve the lock detection algorithm!
             */
            // ####### CN0 ESTIMATION AND LOCK DETECTORS ######
-            if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
+			if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
-                {
+				{
-                    // fill buffer with prompt correlator output values
+					// fill buffer with prompt correlator output values
-                    d_Prompt_buffer[d_cn0_estimation_counter] = *d_Prompt;
+					d_Prompt_buffer[d_cn0_estimation_counter] = *d_Prompt;
-                    d_cn0_estimation_counter++;
+					d_cn0_estimation_counter++;
-                }
+				}
-            else
+			else
-                {
+				{
-                    d_cn0_estimation_counter = 0;
+					d_cn0_estimation_counter = 0;
-                    d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
+					// Code lock indicator
-                    d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
+					d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
-
+					// Carrier lock indicator
-                    // ###### TRACKING UNLOCK NOTIFICATION #####
+					d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
-                    if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
+					// Loss of lock detection
-                        {
+					if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
-                            d_carrier_lock_fail_counter++;
+						{
-                        }
+							d_carrier_lock_fail_counter++;
-                    else
+						}
-                        {
+					else
-                            if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
+						{
-                        }
+							if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
-                    if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
+						}
-                        {
+					if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
-                            std::cout << "Channel " << d_channel << " loss of lock!" << std::endl ;
+						{
-                            //tracking_message = 3; //loss of lock
+							std::cout << "Channel " << d_channel << " loss of lock!" << std::endl ;
-                            //d_channel_internal_queue->push(tracking_message);
+							ControlMessageFactory* cmf = new ControlMessageFactory();
-                        	ControlMessageFactory* cmf = new ControlMessageFactory();
+							if (d_queue != gr_msg_queue_sptr())
-                        	if (d_queue != gr_msg_queue_sptr())
+								{
-                        	{
+									d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
-                        		d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
+								}
-                        	}
+							delete cmf;
-                        	delete cmf;
+							d_carrier_lock_fail_counter = 0;
-                            d_carrier_lock_fail_counter = 0;
+							d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
-                            d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
+						}
-
+				}
                        }
                    //std::cout<<"d_carrier_lock_fail_counter"<<d_carrier_lock_fail_counter<<"\r\n";
                }
            // ########### Output the tracking data to navigation and PVT ##########
            current_synchro_data.Prompt_I = (double)(*d_Prompt).real();
            current_synchro_data.Prompt_Q = (double)(*d_Prompt).imag();
            // Tracking_timestamp_secs is aligned with the PRN start sample
-            current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter +
+            current_synchro_data.Tracking_timestamp_secs=((double)d_sample_counter+(double)d_current_prn_length_samples+(double)d_rem_code_phase_samples)/(double)d_fs_in;
-            		(double)d_next_prn_length_samples + (double)d_next_rem_code_phase_samples) / (double)d_fs_in;
+            // This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
-            // This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN,
+            current_synchro_data.Code_phase_secs=0;
            // thus, Code_phase_secs = 0
            current_synchro_data.Code_phase_secs = 0;
            current_synchro_data.Carrier_phase_rads = (double)d_acc_carrier_phase_rad;
            current_synchro_data.CN0_dB_hz = (double)d_CN0_SNV_dB_Hz;
            *out[0] = current_synchro_data;
@ -585,7 +531,6 @@ int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vec
            *d_Prompt = gr_complex(0,0);
            *d_Late = gr_complex(0,0);
            Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0]; //block output streams pointer
            //std::cout<<output_items.size()<<std::endl;
            // GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
            Gnss_Synchro current_synchro_data;
            *out[0] = current_synchro_data;
@ -621,15 +566,15 @@ int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vec
                    // carrier and code frequency
                    d_dump_file.write((char*)&d_carrier_doppler_hz, sizeof(float));
-                    d_dump_file.write((char*)&d_code_freq_hz, sizeof(float));
+                    d_dump_file.write((char*)&d_code_freq_chips, sizeof(float));
                    //PLL commands
-                    d_dump_file.write((char*)&carr_error, sizeof(float));
+                    d_dump_file.write((char*)&carr_error_hz, sizeof(float));
-                    d_dump_file.write((char*)&carr_nco, sizeof(float));
+                    d_dump_file.write((char*)&carr_error_filt_hz, sizeof(float));
                    //DLL commands
-                    d_dump_file.write((char*)&code_error, sizeof(float));
+                    d_dump_file.write((char*)&code_error_chips, sizeof(float));
-                    d_dump_file.write((char*)&code_nco, sizeof(float));
+                    d_dump_file.write((char*)&code_error_filt_chips, sizeof(float));
                    // CN0 and carrier lock test
                    d_dump_file.write((char*)&d_CN0_SNV_dB_Hz, sizeof(float));
@ -641,14 +586,13 @@ int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vec
                    tmp_double=(double)(d_sample_counter+d_current_prn_length_samples);
                    d_dump_file.write((char*)&tmp_double, sizeof(double));
            }
-            catch (std::ifstream::failure e)
+            catch (std::ifstream::failure& e)
            {
                    std::cout << "Exception writing trk dump file " << e.what() << std::endl;
            }
        }
    consume_each(d_current_prn_length_samples); // this is necesary in gr_block derivates
    //d_sample_counter_seconds = d_sample_counter_seconds + ( ((double)d_current_prn_length_samples) / (double)d_fs_in );
    d_sample_counter += d_current_prn_length_samples; //count for the processed samples
    return 1; //output tracking result ALWAYS even in the case of d_enable_tracking==false
 }
@ -672,7 +616,7 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel(unsigned int channel)
                            d_dump_file.open(d_dump_filename.c_str(), std::ios::out | std::ios::binary);
                            std::cout << "Tracking dump enabled on channel " << d_channel << " Log file: " << d_dump_filename.c_str() << std::endl;
                    }
-                    catch (std::ifstream::failure e)
+                    catch (std::ifstream::failure& e)
                    {
                            std::cout << "channel " << d_channel << " Exception opening trk dump file " << e.what() << std::endl;
                    }
@ -681,7 +625,6 @@ void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel(unsigned int channel)
 }
 void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel_queue(concurrent_queue<int> *channel_internal_queue)
 {
    d_channel_internal_queue = channel_internal_queue;
--- a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_optim_tracking_cc.h
+++ b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_optim_tracking_cc.h
@ -35,7 +35,7 @@
 */
 #ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H
-#define	GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_CC_H
+#define	 GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H
 #include <fstream>
 #include <queue>
@ -56,7 +56,7 @@
 class Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc;
 typedef boost::shared_ptr<Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc>
-        gps_l1_ca_dll_pll_optim_tracking_cc_sptr;
+	gps_l1_ca_dll_pll_optim_tracking_cc_sptr;
 gps_l1_ca_dll_pll_optim_tracking_cc_sptr
 gps_l1_ca_dll_pll_make_optim_tracking_cc(long if_freq,
@ -135,7 +135,6 @@ private:
    long d_fs_in;
    double d_early_late_spc_chips;
    double d_code_phase_step_chips;
    gr_complex* d_ca_code;
@ -150,7 +149,6 @@ private:
    // remaining code phase and carrier phase between tracking loops
    float d_rem_code_phase_samples;
    float d_next_rem_code_phase_samples;
    float d_rem_carr_phase_rad;
    // PLL and DLL filter library
@ -164,15 +162,14 @@ private:
    Correlator d_correlator;
    // tracking vars
-    float d_code_freq_hz;
+    float d_code_freq_chips;
    float d_carrier_doppler_hz;
    float d_acc_carrier_phase_rad;
    float d_code_phase_samples;
    float d_acc_code_phase_secs;
    //PRN period in samples
    int d_current_prn_length_samples;
    int d_next_prn_length_samples;
    //double d_sample_counter_seconds;
    //processing samples counters
    unsigned long int d_sample_counter;
@ -187,9 +184,9 @@ private:
    int d_carrier_lock_fail_counter;
    // control vars
    int d_gnuradio_forecast_samples;
    bool d_enable_tracking;
    bool d_pull_in;
    int d_gnuradio_forecast_samples;
    // file dump
    std::string d_dump_filename;
--- a/src/main/jamfile.jam
+++ b/src/main/jamfile.jam
@ -28,6 +28,7 @@ exe gnss-sdr : main.cc
 ../algorithms/data_type_adapter/adapters//ishort_to_complex
 ../algorithms/input_filter/adapters//fir_filter
 ../algorithms/input_filter/adapters//freq_xlating_fir_filter
 ../algorithms/libs//nco_lib
 ../algorithms/libs//gnss_signal_processing
 ../algorithms/libs//gps_sdr_signal_processing
 ../algorithms/libs//galileo_e1_signal_processing
--- a/src/tests/arithmetic/cordic_test.cc
+++ b/src/tests/arithmetic/cordic_test.cc
@ -2,6 +2,7 @@
 * \file cordic_test.cc
 * \brief Test of the CORDIC (COordinate Rotation DIgital Computer) algorithm
 * \author Carles Fernandez-Prades, 2012. cfernandez(at)cttc.es
 * 		   Javier Arribas, 2012. jarribas(at)cttc.es
 *
 *
 * -------------------------------------------------------------------------
@ -30,79 +31,105 @@
 */
 #include "GPS_L1_CA.h"
 #include <iostream>
-#include <math.h>
+#include <cmath>
 #include <gtest/gtest.h>
 #include "cordic.h"
 #include <sys/time.h>
 #include <algorithm>
 #include <cstdlib> // for RAND_MAX
 #include <gnuradio/gr_fxpt_nco.h>
-
+#include "nco_lib.h"
 TEST(Cordic_Test, StandardCIsFasterThanCordic)
 {
    int largest_k = 10;
    Cordic* cordicPtr;
    cordicPtr = new Cordic(largest_k);
    double phase = rand();
    phase = (phase/RAND_MAX) * 3.141592;
    double  cos_phase1 = 0;
    double  sin_phase1 = 0;
    double  cos_phase2 = 0;
    double  sin_phase2 = 0;
    double  cos_phase3 = 0;
    double  sin_phase3 = 0;
-    gr_fxpt_nco* nco;
+    std::complex<float> *d_carr_sign;
-    nco = new gr_fxpt_nco();
+    // carrier parameters
    int d_vector_length=4000;
    float phase_rad;
    float phase_step_rad;
    float carrier_freq=2000;
    float d_fs_in=4000000;
    phase_step_rad = (float)GPS_TWO_PI*carrier_freq / (float)d_fs_in;
-    double niter = 10000000;
+    // space for carrier wipeoff and signal baseband vectors
    if (posix_memalign((void**)&d_carr_sign, 16, d_vector_length * sizeof(std::complex<float>) * 2) == 0){};
    double sin_d,cos_d;
    double sin_f,cos_f;
    double niter = 10000;
    struct timeval tv;
-
+    //*** NON-OPTIMIZED CORDIC *****
    gettimeofday(&tv, NULL);
    long long int begin1 = tv.tv_sec * 1000000 + tv.tv_usec;
    for(int i=0; i<niter; i++)
        {
-            cordicPtr->cordic_get_cos_sin(phase, cos_phase1, sin_phase1);
+    	phase_rad=0;
            for(int j=0;j<d_vector_length;j++)
            {
            	cordicPtr->cordic_get_cos_sin(phase_rad, cos_d, sin_d);
            	d_carr_sign[j]=std::complex<float>(cos_d,-sin_d);
            	phase_rad=phase_rad+phase_step_rad;
            }
        }
    gettimeofday(&tv, NULL);
    long long int end1 = tv.tv_sec *1000000 + tv.tv_usec;
-
+    //*** STD COS, SIN standalone *****
    gettimeofday(&tv, NULL);
    long long int begin2 = tv.tv_sec * 1000000 + tv.tv_usec;
    for(int i=0; i<niter; i++)
        {
-            cos_phase2 = cos(phase);
+			for(int j=0;j<d_vector_length;j++)
-            sin_phase2 = sin(phase);
+			{
 				cos_f = std::cos(phase_rad);
 				sin_f = std::sin(phase_rad);
 				d_carr_sign[j]=std::complex<float>(cos_f,-sin_f);
 				phase_rad=phase_rad+phase_step_rad;
 			}
        }
    gettimeofday(&tv, NULL);
    long long int end2 = tv.tv_sec *1000000 + tv.tv_usec;
-
+    //*** GNU RADIO FIXED POINT ARITHMETIC ********
    float phase_f;
    phase_f = (float)phase;
    gettimeofday(&tv, NULL);
    long long int begin3 = tv.tv_sec * 1000000 + tv.tv_usec;
    for(int i=0; i<niter; i++)
        {
-            nco->set_phase(phase_f);
+        	fxp_nco(d_carr_sign, d_vector_length,0, phase_step_rad);
            cos_phase3 = nco->cos();
            sin_phase3 = nco->sin();
        }
    gettimeofday(&tv, NULL);
    long long int end3 = tv.tv_sec *1000000 + tv.tv_usec;
-    delete nco;
+
    //*** SSE2 NCO ****************
     gettimeofday(&tv, NULL);
    long long int begin4 = tv.tv_sec * 1000000 + tv.tv_usec;
    for(int i=0; i<niter; i++)
        {
        	sse_nco(d_carr_sign, d_vector_length,0, phase_step_rad);
        }
    gettimeofday(&tv, NULL);
    long long int end4 = tv.tv_sec *1000000 + tv.tv_usec;
    delete cordicPtr;
-    std::cout << "CORDIC sin =" << sin_phase1 << ", cos =" << cos_phase1 << " computed " << niter << " times in " << (end1-begin1) << " microseconds" << std::endl;
+    std::cout << "NON-OPTIMIZED CORDIC computed " << niter << " times in " << (end1-begin1) << " microseconds" << std::endl;
-    std::cout << "STD    sin =" << sin_phase2 << ", cos =" << cos_phase2 << " computed " << niter << " times in " << (end2-begin2) << " microseconds" << std::endl;
+    std::cout << "STD LIB ARITHM computed " << niter << " times in " << (end2-begin2) << " microseconds" << std::endl;
-    std::cout << "FXPT   sin =" << sin_phase3 << ", cos =" << cos_phase3 << " computed " << niter << " times in " << (end3-begin3) << " microseconds" << std::endl;
+    std::cout << "FXPT CORDIC computed " << niter << " times in " << (end3-begin3) << " microseconds" << std::endl;
    std::cout << "SSE CORDIC computed " << niter << " times in " << (end4-begin4) << " microseconds" << std::endl;
    EXPECT_TRUE((end2-begin2) < (end1-begin1)); // if true, standard C++ is faster than the cordic implementation
 }
--- a/src/tests/jamfile.jam
+++ b/src/tests/jamfile.jam
@ -25,6 +25,7 @@ exe run_tests : test_main.cc
 ../algorithms/data_type_adapter/adapters//ishort_to_complex
 ../algorithms/input_filter/adapters//fir_filter
 ../algorithms/input_filter/adapters//freq_xlating_fir_filter
 ../algorithms/libs//nco_lib
 ../algorithms/libs//gnss_signal_processing
 ../algorithms/libs//gps_sdr_signal_processing
 ../algorithms/libs//galileo_e1_signal_processing