Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next

2025-10-29 22:42:59 +00:00 · 2019-03-19 20:45:31 +01:00
parent f605f4e081 10d73da839
commit a0ed4c1500
14 changed files with 295 additions and 298 deletions
--- a/src/algorithms/observables/gnuradio_blocks/hybrid_observables_gs.cc
+++ b/src/algorithms/observables/gnuradio_blocks/hybrid_observables_gs.cc
@@ -512,7 +512,6 @@ int hybrid_observables_gs::general_work(int noutput_items __attribute__((unused)
                {
                    T_rx_clock_step_samples = std::round(static_cast<double>(in[d_nchannels_in - 1][0].fs) * 1e-3);  // 1 ms
                    LOG(INFO) << "Observables clock step samples set to " << T_rx_clock_step_samples;
-                    usleep(1000000);
                }

            // Consume one item from the clock channel (last of the input channels)
--- a/src/algorithms/tracking/libs/cpu_multicorrelator.cc
+++ b/src/algorithms/tracking/libs/cpu_multicorrelator.cc
@@ -1,11 +1,11 @@
 /*!
 * \file cpu_multicorrelator.cc
- * \brief High optimized CPU vector multiTAP correlator class
+ * \brief Highly optimized CPU vector multiTAP correlator class
 * \authors <ul>
 *          <li> Javier Arribas, 2015. jarribas(at)cttc.es
 *          </ul>
 *
- * Class that implements a high optimized vector multiTAP correlator class for CPUs
+ * Class that implements a highly optimized vector multiTAP correlator class for CPUs
 *
 * -------------------------------------------------------------------------
 *
--- a/src/algorithms/tracking/libs/cpu_multicorrelator.h
+++ b/src/algorithms/tracking/libs/cpu_multicorrelator.h
@@ -65,4 +65,4 @@ private:
 };


-#endif /* CPU_MULTICORRELATOR_H_ */
+#endif /* GNSS_SDR_CPU_MULTICORRELATOR_H_ */
--- a/src/algorithms/tracking/libs/cpu_multicorrelator_16sc.cc
+++ b/src/algorithms/tracking/libs/cpu_multicorrelator_16sc.cc
@@ -1,11 +1,11 @@
 /*!
 * \file cpu_multicorrelator_16sc.cc
- * \brief High optimized CPU vector multiTAP correlator class
+ * \brief Highly optimized CPU vector multiTAP correlator class
 * \authors <ul>
 *          <li> Javier Arribas, 2015. jarribas(at)cttc.es
 *          </ul>
 *
- * Class that implements a high optimized vector multiTAP correlator class for CPUs
+ * Class that implements a highly optimized vector multiTAP correlator class for CPUs
 *
 * -------------------------------------------------------------------------
 *
--- a/src/algorithms/tracking/libs/cpu_multicorrelator_16sc.h
+++ b/src/algorithms/tracking/libs/cpu_multicorrelator_16sc.h
@@ -1,11 +1,11 @@
 /*!
 * \file cpu_multicorrelator_16sc.h
- * \brief High optimized CPU vector multiTAP correlator class for lv_16sc_t (short int complex)
+ * \brief Highly optimized CPU vector multiTAP correlator class for lv_16sc_t (short int complex)
 * \authors <ul>
 *          <li> Javier Arribas, 2016. jarribas(at)cttc.es
 *          </ul>
 *
- * Class that implements a high optimized vector multiTAP correlator class for CPUs
+ * Class that implements a highly optimized vector multiTAP correlator class for CPUs
 *
 * -------------------------------------------------------------------------
 *
--- a/src/algorithms/tracking/libs/cpu_multicorrelator_real_codes.cc
+++ b/src/algorithms/tracking/libs/cpu_multicorrelator_real_codes.cc
@@ -6,7 +6,7 @@
 *          <li> Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com
 *          </ul>
 *
- * Class that implements a high optimized vector multiTAP correlator class for CPUs
+ * Class that implements a highly optimized vector multiTAP correlator class for CPUs
 *
 * -------------------------------------------------------------------------
 *
@@ -125,7 +125,7 @@ void Cpu_Multicorrelator_Real_Codes::update_local_code(int correlator_length_sam
        }
 }

-// Overload Carrier_wipeoff_multicorrelator_resampler to ensure back compatibility
+
 bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler(
    float rem_carrier_phase_in_rad,
    float phase_step_rad,
@@ -150,7 +150,8 @@ bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler(
        }
    return true;
 }
-// Overload Carrier_wipeoff_multicorrelator_resampler to ensure back compatibility
+
+
 bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler(
    float rem_carrier_phase_in_rad,
    float phase_step_rad,
--- a/src/algorithms/tracking/libs/cpu_multicorrelator_real_codes.h
+++ b/src/algorithms/tracking/libs/cpu_multicorrelator_real_codes.h
@@ -6,7 +6,7 @@
 *          <li> Cillian O'Driscoll, 2017, cillian.odriscoll(at)gmail.com
 *          </ul>
 *
- * Class that implements a high optimized vector multiTAP correlator class for CPUs
+ * Class that implements a highly optimized vector multiTAP correlator class for CPUs
 *
 * -------------------------------------------------------------------------
 *
@@ -52,7 +52,6 @@ public:
    bool set_local_code_and_taps(int code_length_chips, const float *local_code_in, float *shifts_chips);
    bool set_input_output_vectors(std::complex<float> *corr_out, const std::complex<float> *sig_in);
    void update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips = 0.0);
-    // Overload Carrier_wipeoff_multicorrelator_resampler to ensure back compatibility
    bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float phase_rate_step_rad, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, int signal_length_samples);
    bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, int signal_length_samples);
    bool free();
@@ -70,4 +69,4 @@ private:
 };


-#endif /* CPU_MULTICORRELATOR_REAL_CODES_H_ */
+#endif /* GNSS_SDR_CPU_MULTICORRELATOR_REAL_CODES_H_ */
--- a/src/algorithms/tracking/libs/cuda_multicorrelator.cu
+++ b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
@@ -1,11 +1,11 @@
 /*!
 * \file cuda_multicorrelator.cu
- * \brief High optimized CUDA GPU vector multiTAP correlator class
+ * \brief Highly optimized CUDA GPU vector multiTAP correlator class
 * \authors <ul>
 *          <li> Javier Arribas, 2015. jarribas(at)cttc.es
 *          </ul>
 *
- * Class that implements a high optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
+ * Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
 *
 * -------------------------------------------------------------------------
 *
@@ -33,9 +33,8 @@
 */

 #include "cuda_multicorrelator.h"
-
-#include <stdio.h>
 #include <iostream>
+#include <stdio.h>
 // For the CUDA runtime routines (prefixed with "cuda_")
 #include <cuda_runtime.h>

@@ -53,8 +52,7 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
    int vectorN,
    int elementN,
    float rem_carrier_phase_in_rad,
-    float phase_step_rad
-)
+    float phase_step_rad)
 {
    //Accumulators cache
    __shared__ GPU_Complex accumResult[ACCUM_N];
@@ -66,8 +64,8 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
         i < elementN;
         i += blockDim.x * gridDim.x)
        {
-    	__sincosf(rem_carrier_phase_in_rad + i*phase_step_rad, &sin, &cos);
-    	d_sig_wiped[i] =  d_sig_in[i] * GPU_Complex(cos,-sin);
+            __sincosf(rem_carrier_phase_in_rad + i * phase_step_rad, &sin, &cos);
+            d_sig_wiped[i] = d_sig_in[i] * GPU_Complex(cos, -sin);
        }

    __syncthreads();
@@ -89,8 +87,9 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
            ////////////////////////////////////////////////////////////////////////
            for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
                {
-        	GPU_Complex sum = GPU_Complex(0,0);
-            float local_code_chip_index=0.0;;
+                    GPU_Complex sum = GPU_Complex(0, 0);
+                    float local_code_chip_index = 0.0;
+                    ;
                    //float code_phase;
                    for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
                        {
@@ -102,14 +101,13 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
                            //custom code for multitap correlator
                            // 1.resample local code for the current shift

-            	local_code_chip_index= fmodf(code_phase_step_chips*__int2float_rd(pos)+ d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips);
+                            local_code_chip_index = fmodf(code_phase_step_chips * __int2float_rd(pos) + d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips);

                            //Take into account that in multitap correlators, the shifts can be negative!
-            	if (local_code_chip_index<0.0) local_code_chip_index+=(code_length_chips-1);
+                            if (local_code_chip_index < 0.0) local_code_chip_index += (code_length_chips - 1);
                            //printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index);
                            // 2.correlate
-            	sum.multiply_acc(d_sig_wiped[pos],d_local_code_in[__float2int_rd(local_code_chip_index)]);
-
+                            sum.multiply_acc(d_sig_wiped[pos], d_local_code_in[__float2int_rd(local_code_chip_index)]);
                        }
                    accumResult[iAccum] = sum;
                }
@@ -135,59 +133,66 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
        }
 }

+
 bool cuda_multicorrelator::init_cuda_integrated_resampler(
    int signal_length_samples,
    int code_length_chips,
-		int n_correlators
-		)
+    int n_correlators)
 {
    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
-//	findCudaDevice(argc, (const char **)argv);
+    //	findCudaDevice(argc, (const char **)argv);
    cudaDeviceProp prop;
    int num_devices, device;
    cudaGetDeviceCount(&num_devices);
-    if (num_devices > 1) {
+    if (num_devices > 1)
+        {
            int max_multiprocessors = 0, max_device = 0;
-          for (device = 0; device < num_devices; device++) {
+            for (device = 0; device < num_devices; device++)
+                {
                    cudaDeviceProp properties;
                    cudaGetDeviceProperties(&properties, device);
-                  if (max_multiprocessors < properties.multiProcessorCount) {
+                    if (max_multiprocessors < properties.multiProcessorCount)
+                        {
                            max_multiprocessors = properties.multiProcessorCount;
                            max_device = device;
                        }
-                  printf("Found GPU device # %i\n",device);
+                    printf("Found GPU device # %i\n", device);
                }
            //cudaSetDevice(max_device);

            //set random device!
-	  selected_gps_device=rand() % num_devices;//generates a random number between 0 and num_devices to split the threads between GPUs
+            selected_gps_device = rand() % num_devices;  //generates a random number between 0 and num_devices to split the threads between GPUs
            cudaSetDevice(selected_gps_device);

-          cudaGetDeviceProperties( &prop, max_device );
+            cudaGetDeviceProperties(&prop, max_device);
            //debug code
-          if (prop.canMapHostMemory != 1) {
-              printf( "Device can not map memory.\n" );
+            if (prop.canMapHostMemory != 1)
+                {
+                    printf("Device can not map memory.\n");
                }
-          printf("L2 Cache size= %u \n",prop.l2CacheSize);
-          printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
-          printf("maxGridSize= %i \n",prop.maxGridSize[0]);
-          printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
-          printf("deviceOverlap= %i \n",prop.deviceOverlap);
-  	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
-    }else{
-    	    cudaGetDevice( &selected_gps_device);
-    	    cudaGetDeviceProperties( &prop, selected_gps_device );
+            printf("L2 Cache size= %u \n", prop.l2CacheSize);
+            printf("maxThreadsPerBlock= %u \n", prop.maxThreadsPerBlock);
+            printf("maxGridSize= %i \n", prop.maxGridSize[0]);
+            printf("sharedMemPerBlock= %lu \n", prop.sharedMemPerBlock);
+            printf("deviceOverlap= %i \n", prop.deviceOverlap);
+            printf("multiProcessorCount= %i \n", prop.multiProcessorCount);
+        }
+    else
+        {
+            cudaGetDevice(&selected_gps_device);
+            cudaGetDeviceProperties(&prop, selected_gps_device);
            //debug code
-    	    if (prop.canMapHostMemory != 1) {
-    	        printf( "Device can not map memory.\n" );
+            if (prop.canMapHostMemory != 1)
+                {
+                    printf("Device can not map memory.\n");
                }

-    	    printf("L2 Cache size= %u \n",prop.l2CacheSize);
-    	    printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
-    	    printf("maxGridSize= %i \n",prop.maxGridSize[0]);
-    	    printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
-    	    printf("deviceOverlap= %i \n",prop.deviceOverlap);
-    	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
+            printf("L2 Cache size= %u \n", prop.l2CacheSize);
+            printf("maxThreadsPerBlock= %u \n", prop.maxThreadsPerBlock);
+            printf("maxGridSize= %i \n", prop.maxGridSize[0]);
+            printf("sharedMemPerBlock= %lu \n", prop.sharedMemPerBlock);
+            printf("deviceOverlap= %i \n", prop.deviceOverlap);
+            printf("multiProcessorCount= %i \n", prop.multiProcessorCount);
        }

    // (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
@@ -208,18 +213,18 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(

    // Doppler-free signal (internal GPU memory)
    cudaMalloc((void **)&d_sig_doppler_wiped, size);
-	cudaMemset(d_sig_doppler_wiped,0,size);
+    cudaMemset(d_sig_doppler_wiped, 0, size);

    // Local code GPU memory (can be mapped to CPU memory in shared memory devices!)
-	cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips);
-	cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips);
+    cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>) * code_length_chips);
+    cudaMemset(d_local_codes_in, 0, sizeof(std::complex<float>) * code_length_chips);

-    d_code_length_chips=code_length_chips;
+    d_code_length_chips = code_length_chips;

    // Vector with the chip shifts for each correlator tap
    //GPU memory (can be mapped to CPU memory in shared memory devices!)
-	cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators);
-	cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators);
+    cudaMalloc((void **)&d_shifts_chips, sizeof(float) * n_correlators);
+    cudaMemset(d_shifts_chips, 0, sizeof(float) * n_correlators);

    //scalars
    //cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators);
@@ -228,84 +233,85 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
    // Launch the Vector Add CUDA Kernel
    // TODO: write a smart load balance using device info!
    threadsPerBlock = 64;
-    blocksPerGrid = 128;//(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock;
+    blocksPerGrid = 128;  //(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock;

-	cudaStreamCreate (&stream1) ;
+    cudaStreamCreate(&stream1);
    //cudaStreamCreate (&stream2) ;
    return true;
 }

+
 bool cuda_multicorrelator::set_local_code_and_taps(
    int code_length_chips,
-		const std::complex<float>* local_codes_in,
+    const std::complex<float> *local_codes_in,
    float *shifts_chips,
-		int n_correlators
-		)
+    int n_correlators)
 {
-
    cudaSetDevice(selected_gps_device);
    //********* ZERO COPY VERSION ************
-//	// Get device pointer from host memory. No allocation or memcpy
-//	cudaError_t code;
-//	// local code CPU -> GPU copy memory
-//	code=cudaHostGetDevicePointer((void **)&d_local_codes_in,  (void *) local_codes_in, 0);
-//	if (code!=cudaSuccess)
-//	{
-//		printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n");
-//	}
-//	// Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
-//	code=cudaHostGetDevicePointer((void **)&d_shifts_chips,  (void *) shifts_chips, 0);
-//	if (code!=cudaSuccess)
-//	{
-//		printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n");
-//	}
+    //	// Get device pointer from host memory. No allocation or memcpy
+    //	cudaError_t code;
+    //	// local code CPU -> GPU copy memory
+    //	code=cudaHostGetDevicePointer((void **)&d_local_codes_in,  (void *) local_codes_in, 0);
+    //	if (code!=cudaSuccess)
+    //	{
+    //		printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n");
+    //	}
+    //	// Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
+    //	code=cudaHostGetDevicePointer((void **)&d_shifts_chips,  (void *) shifts_chips, 0);
+    //	if (code!=cudaSuccess)
+    //	{
+    //		printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n");
+    //	}

    //******** CudaMalloc version ***********
    //local code CPU -> GPU copy memory
-    cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1);
-    d_code_length_chips=code_length_chips;
+    cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex) * code_length_chips, cudaMemcpyHostToDevice, stream1);
+    d_code_length_chips = code_length_chips;

    //Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
-    cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
-                                    cudaMemcpyHostToDevice,stream1);
+    cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float) * n_correlators,
+        cudaMemcpyHostToDevice, stream1);

    return true;
 }

-bool cuda_multicorrelator::set_input_output_vectors(
-		std::complex<float>* corr_out,
-		std::complex<float>* sig_in
-		)
-{

+bool cuda_multicorrelator::set_input_output_vectors(
+    std::complex<float> *corr_out,
+    std::complex<float> *sig_in)
+{
    cudaSetDevice(selected_gps_device);
    // Save CPU pointers
-	d_sig_in_cpu =sig_in;
+    d_sig_in_cpu = sig_in;
    d_corr_out_cpu = corr_out;

    // Zero Copy version
    // Get device pointer from host memory. No allocation or memcpy
    cudaError_t code;
-	code=cudaHostGetDevicePointer((void **)&d_sig_in,  (void *) sig_in, 0);
-	code=cudaHostGetDevicePointer((void **)&d_corr_out,  (void *) corr_out, 0);
-	if (code!=cudaSuccess)
+    code = cudaHostGetDevicePointer((void **)&d_sig_in, (void *)sig_in, 0);
+    code = cudaHostGetDevicePointer((void **)&d_corr_out, (void *)corr_out, 0);
+    if (code != cudaSuccess)
        {
            printf("cuda cudaHostGetDevicePointer error \r\n");
        }
    return true;
-
 }

-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
+#define gpuErrchk(ans)                        \
+    {                                         \
+        gpuAssert((ans), __FILE__, __LINE__); \
+    }
+inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
 {
    if (code != cudaSuccess)
        {
-      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
+            fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
            if (abort) exit(code);
        }
 }

+
 bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
    float rem_carrier_phase_in_rad,
    float phase_step_rad,
@@ -313,8 +319,7 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
    float rem_code_phase_chips,
    int signal_length_samples,
    int n_correlators)
-	{
-
+{
    cudaSetDevice(selected_gps_device);
    // cudaMemCpy version
    //size_t memSize = signal_length_samples * sizeof(std::complex<float>);
@@ -325,7 +330,7 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(

    //launch the multitap correlator with integrated local code resampler!

-    Doppler_wippe_scalarProdGPUCPXxN_shifts_chips<<<blocksPerGrid, threadsPerBlock,0 ,stream1>>>(
+    Doppler_wippe_scalarProdGPUCPXxN_shifts_chips<<<blocksPerGrid, threadsPerBlock, 0, stream1>>>(
        d_corr_out,
        d_sig_in,
        d_sig_doppler_wiped,
@@ -337,11 +342,10 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
        n_correlators,
        signal_length_samples,
        rem_carrier_phase_in_rad,
-			phase_step_rad
-			);
+        phase_step_rad);

-    gpuErrchk( cudaPeekAtLastError() );
-    gpuErrchk( cudaStreamSynchronize(stream1));
+    gpuErrchk(cudaPeekAtLastError());
+    gpuErrchk(cudaStreamSynchronize(stream1));

    // cudaMemCpy version
    // Copy the device result vector in device memory to the host result vector
@@ -352,30 +356,32 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
    return true;
 }

+
 cuda_multicorrelator::cuda_multicorrelator()
 {
-	d_sig_in=NULL;
-	d_nco_in=NULL;
-	d_sig_doppler_wiped=NULL;
-	d_local_codes_in=NULL;
-	d_shifts_samples=NULL;
-	d_shifts_chips=NULL;
-	d_corr_out=NULL;
-	threadsPerBlock=0;
-	blocksPerGrid=0;
-	d_code_length_chips=0;
+    d_sig_in = NULL;
+    d_nco_in = NULL;
+    d_sig_doppler_wiped = NULL;
+    d_local_codes_in = NULL;
+    d_shifts_samples = NULL;
+    d_shifts_chips = NULL;
+    d_corr_out = NULL;
+    threadsPerBlock = 0;
+    blocksPerGrid = 0;
+    d_code_length_chips = 0;
 }

+
 bool cuda_multicorrelator::free_cuda()
 {
    // Free device global memory
-	if (d_sig_in!=NULL) cudaFree(d_sig_in);
-	if (d_nco_in!=NULL) cudaFree(d_nco_in);
-	if (d_sig_doppler_wiped!=NULL) cudaFree(d_sig_doppler_wiped);
-	if (d_local_codes_in!=NULL) cudaFree(d_local_codes_in);
-	if (d_corr_out!=NULL) cudaFree(d_corr_out);
-	if (d_shifts_samples!=NULL) cudaFree(d_shifts_samples);
-	if (d_shifts_chips!=NULL) cudaFree(d_shifts_chips);
+    if (d_sig_in != NULL) cudaFree(d_sig_in);
+    if (d_nco_in != NULL) cudaFree(d_nco_in);
+    if (d_sig_doppler_wiped != NULL) cudaFree(d_sig_doppler_wiped);
+    if (d_local_codes_in != NULL) cudaFree(d_local_codes_in);
+    if (d_corr_out != NULL) cudaFree(d_corr_out);
+    if (d_shifts_samples != NULL) cudaFree(d_shifts_samples);
+    if (d_shifts_chips != NULL) cudaFree(d_shifts_chips);
    // Reset the device and exit
    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
@@ -385,4 +391,3 @@ bool cuda_multicorrelator::free_cuda()
    cudaDeviceReset();
    return true;
 }
-
--- a/src/algorithms/tracking/libs/cuda_multicorrelator.h
+++ b/src/algorithms/tracking/libs/cuda_multicorrelator.h
@@ -1,11 +1,11 @@
 /*!
 * \file cuda_multicorrelator.h
- * \brief High optimized CUDA GPU vector multiTAP correlator class
+ * \brief Highly optimized CUDA GPU vector multiTAP correlator class
 * \authors <ul>
 *          <li> Javier Arribas, 2015. jarribas(at)cttc.es
 *          </ul>
 *
- * Class that implements a high optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
+ * Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
 *
 * -------------------------------------------------------------------------
 *
@@ -92,6 +92,7 @@ struct GPU_Complex
    }
 };

+
 struct GPU_Complex_Short
 {
    float r;
@@ -149,7 +150,6 @@ private:
    GPU_Complex* d_local_codes_in;
    GPU_Complex* d_corr_out;

-    //
    std::complex<float>* d_sig_in_cpu;
    std::complex<float>* d_corr_out_cpu;

--- a/src/algorithms/tracking/libs/tracking_FLL_PLL_filter.cc
+++ b/src/algorithms/tracking/libs/tracking_FLL_PLL_filter.cc
@@ -32,7 +32,26 @@
 */

 #include "tracking_FLL_PLL_filter.h"
-#include <iostream>
+
+
+Tracking_FLL_PLL_filter::Tracking_FLL_PLL_filter()
+{
+    d_order = 0;
+    d_pll_w = 0.0;
+    d_pll_w0p3 = 0.0;
+    d_pll_w0f2 = 0.0;
+    d_pll_x = 0.0;
+    d_pll_a2 = 0.0;
+    d_pll_w0f = 0.0;
+    d_pll_a3 = 0.0;
+    d_pll_w0p2 = 0.0;
+    d_pll_b3 = 0.0;
+    d_pll_w0p = 0.0;
+}
+
+
+Tracking_FLL_PLL_filter::~Tracking_FLL_PLL_filter() = default;
+

 void Tracking_FLL_PLL_filter::set_params(float fll_bw_hz, float pll_bw_hz, int order)
 {
@@ -104,31 +123,11 @@ float Tracking_FLL_PLL_filter::get_carrier_error(float FLL_discriminator, float
            pll_w_new = d_pll_w + PLL_discriminator * d_pll_w0p2 * correlation_time_s + FLL_discriminator * d_pll_w0f * correlation_time_s;
            carrier_error_hz = 0.5 * (pll_w_new + d_pll_w) + d_pll_a2 * d_pll_w0p * PLL_discriminator;
            d_pll_w = pll_w_new;
-            /*std::cout<<" d_pll_w = "<<carrier_error_hz<<
-               ", pll_w_new = "<<pll_w_new
-               <<", PLL_discriminator=" <<PLL_discriminator
-               <<" FLL_discriminator ="<<FLL_discriminator
-               <<" correlation_time_s = "<<correlation_time_s<<"\r\n";*/
+            /* std::cout << " d_pll_w = " << carrier_error_hz << ", pll_w_new = " << pll_w_new
+                      << ", PLL_discriminator=" << PLL_discriminator
+                      << " FLL_discriminator =" << FLL_discriminator
+                      << " correlation_time_s = " << correlation_time_s << "\r\n"; */
        }

    return carrier_error_hz;
 }
-
-
-Tracking_FLL_PLL_filter::Tracking_FLL_PLL_filter()
-{
-    d_order = 0;
-    d_pll_w = 0;
-    d_pll_w0p3 = 0;
-    d_pll_w0f2 = 0;
-    d_pll_x = 0;
-    d_pll_a2 = 0;
-    d_pll_w0f = 0;
-    d_pll_a3 = 0;
-    d_pll_w0p2 = 0;
-    d_pll_b3 = 0;
-    d_pll_w0p = 0;
-}
-
-
-Tracking_FLL_PLL_filter::~Tracking_FLL_PLL_filter() = default;
--- a/src/algorithms/tracking/libs/tracking_discriminators.cc
+++ b/src/algorithms/tracking/libs/tracking_discriminators.cc
@@ -36,6 +36,7 @@
 #include <cmath>

 //  All the outputs are in RADIANS
+
 /*
 * FLL four quadrant arctan discriminator:
 * \f{equation}
@@ -45,7 +46,6 @@
 * \f$I_{PS1},Q_{PS1}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_1\f$, and
 * \f$I_{PS2},Q_{PS2}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_2\f$. The output is in [radians/second].
 */
-
 double fll_four_quadrant_atan(gr_complex prompt_s1, gr_complex prompt_s2, double t1, double t2)
 {
    double cross, dot;
@@ -105,6 +105,7 @@ double dll_nc_e_minus_l_normalized(gr_complex early_s1, gr_complex late_s1)
    return 0.5 * (P_early - P_late) / (P_early + P_late);
 }

+
 /*
 * DLL Noncoherent Very Early Minus Late Power (VEMLP) normalized discriminator, using the outputs
 * of four correlators, Very Early (VE), Early (E), Late (L) and Very Late (VL):
--- a/src/algorithms/tracking/libs/tracking_loop_filter.cc
+++ b/src/algorithms/tracking/libs/tracking_loop_filter.cc
@@ -4,7 +4,7 @@
 * \author Cillian O'Driscoll, 2015. cillian.odriscoll(at)gmail.com
 *
 * Class implementing a generic 1st, 2nd or 3rd order loop filter. Based
- * on the bilinear transform of the standard Weiner filter.
+ * on the bilinear transform of the standard Wiener filter.
 *
 * -------------------------------------------------------------------------
 *
@@ -36,6 +36,8 @@
 #include <glog/logging.h>
 #include <cmath>

+const int MAX_LOOP_ORDER = 3;
+const int MAX_LOOP_HISTORY_LENGTH = 4;

 Tracking_loop_filter::Tracking_loop_filter(float update_interval,
    float noise_bandwidth,
@@ -74,7 +76,7 @@ float Tracking_loop_filter::apply(float current_input)
    // Now apply the filter coefficients:
    float result = 0.0;

-    // Hanlde the old outputs first:
+    // Handle the old outputs first:
    for (unsigned int ii = 0; ii < d_output_coefficients.size(); ++ii)
        {
            result += d_output_coefficients[ii] * d_outputs[(d_current_index + ii) % MAX_LOOP_HISTORY_LENGTH];
@@ -95,16 +97,13 @@ float Tracking_loop_filter::apply(float current_input)

    d_inputs[d_current_index] = current_input;

-
    for (unsigned int ii = 0; ii < d_input_coefficients.size(); ++ii)
        {
            result += d_input_coefficients[ii] * d_inputs[(d_current_index + ii) % MAX_LOOP_HISTORY_LENGTH];
        }

-
    d_outputs[d_current_index] = result;

-
    return result;
 }

@@ -179,7 +178,6 @@ void Tracking_loop_filter::update_coefficients(void)
                    d_output_coefficients[0] = 1.0;
                }
            break;
-
        case 3:
            wn = d_noise_bandwidth / 0.7845;  // From Kaplan
            float a3 = 1.1;
@@ -208,7 +206,6 @@ void Tracking_loop_filter::update_coefficients(void)
                    d_input_coefficients[1] = g1 * T * T / 2.0 - 2.0 * g3;
                    d_input_coefficients[2] = g3 + T / 2.0 * (-g2 + T / 2.0 * g1);

-
                    d_output_coefficients.resize(2);
                    d_output_coefficients[0] = 2.0;
                    d_output_coefficients[1] = -1.0;
@@ -260,10 +257,9 @@ void Tracking_loop_filter::set_order(int loop_order)
 {
    if (loop_order < 1 or loop_order > MAX_LOOP_ORDER)
        {
-            LOG(ERROR) << "Ignoring attempt to set loop order to " << loop_order
+            LOG(WARNING) << "Ignoring attempt to set loop order to " << loop_order
                         << ". Maximum allowed order is: " << MAX_LOOP_ORDER
                         << ". Not changing current value of " << d_loop_order;
-
            return;
        }

--- a/src/algorithms/tracking/libs/tracking_loop_filter.h
+++ b/src/algorithms/tracking/libs/tracking_loop_filter.h
@@ -4,7 +4,7 @@
 * \author Cillian O'Driscoll, 2015. cillian.odriscoll(at)gmail.com
 *
 * Class implementing a generic 1st, 2nd or 3rd order loop filter. Based
- * on the bilinear transform of the standard Weiner filter.
+ * on the bilinear transform of the standard Wiener filter.
 *
 * -------------------------------------------------------------------------
 *
@@ -33,8 +33,6 @@

 #ifndef GNSS_SDR_TRACKING_LOOP_FILTER_H_
 #define GNSS_SDR_TRACKING_LOOP_FILTER_H_
-#define MAX_LOOP_ORDER 3
-#define MAX_LOOP_HISTORY_LENGTH 4

 #include <vector>

@@ -74,7 +72,6 @@ private:
    // Compute the filter coefficients:
    void update_coefficients(void);

-
 public:
    float get_noise_bandwidth(void) const;
    float get_update_interval(void) const;
--- a/src/core/system_parameters/beidou_dnav_navigation_message.cc
+++ b/src/core/system_parameters/beidou_dnav_navigation_message.cc
@@ -830,7 +830,7 @@ int32_t Beidou_Dnav_Navigation_Message::d2_subframe_decoder(std::string const& s
                    d_eccentricity_msb = static_cast<double>(read_navigation_unsigned(subframe_bits, D2_E_MSB));
                    d_eccentricity_msb_bits = (read_navigation_unsigned(subframe_bits, D2_E_MSB));
                    // Adjust for lsb in next page (shift number of lsb to the left)
-                    d_eccentricity_msb = static_cast<uint64_t>((static_cast<int>(d_eccentricity_msb) << 22));
+                    d_eccentricity_msb = static_cast<uint64_t>((static_cast<uint64_t>(d_eccentricity_msb) << 22));
                    d_eccentricity_msb_bits = d_eccentricity_msb_bits << 22;

                    // Set system flags for message reception