mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-30 23:03:05 +00:00 
			
		
		
		
	Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next
This commit is contained in:
		| @@ -512,7 +512,6 @@ int hybrid_observables_gs::general_work(int noutput_items __attribute__((unused) | |||||||
|                 { |                 { | ||||||
|                     T_rx_clock_step_samples = std::round(static_cast<double>(in[d_nchannels_in - 1][0].fs) * 1e-3);  // 1 ms |                     T_rx_clock_step_samples = std::round(static_cast<double>(in[d_nchannels_in - 1][0].fs) * 1e-3);  // 1 ms | ||||||
|                     LOG(INFO) << "Observables clock step samples set to " << T_rx_clock_step_samples; |                     LOG(INFO) << "Observables clock step samples set to " << T_rx_clock_step_samples; | ||||||
|                     usleep(1000000); |  | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|             // Consume one item from the clock channel (last of the input channels) |             // Consume one item from the clock channel (last of the input channels) | ||||||
|   | |||||||
| @@ -1,11 +1,11 @@ | |||||||
| /*! | /*! | ||||||
|  * \file cpu_multicorrelator.cc |  * \file cpu_multicorrelator.cc | ||||||
|  * \brief High optimized CPU vector multiTAP correlator class |  * \brief Highly optimized CPU vector multiTAP correlator class | ||||||
|  * \authors <ul> |  * \authors <ul> | ||||||
|  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es |  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es | ||||||
|  *          </ul> |  *          </ul> | ||||||
|  * |  * | ||||||
|  * Class that implements a high optimized vector multiTAP correlator class for CPUs |  * Class that implements a highly optimized vector multiTAP correlator class for CPUs | ||||||
|  * |  * | ||||||
|  * ------------------------------------------------------------------------- |  * ------------------------------------------------------------------------- | ||||||
|  * |  * | ||||||
|   | |||||||
| @@ -65,4 +65,4 @@ private: | |||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif /* CPU_MULTICORRELATOR_H_ */ | #endif /* GNSS_SDR_CPU_MULTICORRELATOR_H_ */ | ||||||
|   | |||||||
| @@ -1,11 +1,11 @@ | |||||||
| /*! | /*! | ||||||
|  * \file cpu_multicorrelator_16sc.cc |  * \file cpu_multicorrelator_16sc.cc | ||||||
|  * \brief High optimized CPU vector multiTAP correlator class |  * \brief Highly optimized CPU vector multiTAP correlator class | ||||||
|  * \authors <ul> |  * \authors <ul> | ||||||
|  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es |  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es | ||||||
|  *          </ul> |  *          </ul> | ||||||
|  * |  * | ||||||
|  * Class that implements a high optimized vector multiTAP correlator class for CPUs |  * Class that implements a highly optimized vector multiTAP correlator class for CPUs | ||||||
|  * |  * | ||||||
|  * ------------------------------------------------------------------------- |  * ------------------------------------------------------------------------- | ||||||
|  * |  * | ||||||
|   | |||||||
| @@ -1,11 +1,11 @@ | |||||||
| /*! | /*! | ||||||
|  * \file cpu_multicorrelator_16sc.h |  * \file cpu_multicorrelator_16sc.h | ||||||
|  * \brief High optimized CPU vector multiTAP correlator class for lv_16sc_t (short int complex) |  * \brief Highly optimized CPU vector multiTAP correlator class for lv_16sc_t (short int complex) | ||||||
|  * \authors <ul> |  * \authors <ul> | ||||||
|  *          <li> Javier Arribas, 2016. jarribas(at)cttc.es |  *          <li> Javier Arribas, 2016. jarribas(at)cttc.es | ||||||
|  *          </ul> |  *          </ul> | ||||||
|  * |  * | ||||||
|  * Class that implements a high optimized vector multiTAP correlator class for CPUs |  * Class that implements a highly optimized vector multiTAP correlator class for CPUs | ||||||
|  * |  * | ||||||
|  * ------------------------------------------------------------------------- |  * ------------------------------------------------------------------------- | ||||||
|  * |  * | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ | |||||||
|  *          <li> Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com |  *          <li> Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com | ||||||
|  *          </ul> |  *          </ul> | ||||||
|  * |  * | ||||||
|  * Class that implements a high optimized vector multiTAP correlator class for CPUs |  * Class that implements a highly optimized vector multiTAP correlator class for CPUs | ||||||
|  * |  * | ||||||
|  * ------------------------------------------------------------------------- |  * ------------------------------------------------------------------------- | ||||||
|  * |  * | ||||||
| @@ -125,7 +125,7 @@ void Cpu_Multicorrelator_Real_Codes::update_local_code(int correlator_length_sam | |||||||
|         } |         } | ||||||
| } | } | ||||||
|  |  | ||||||
| // Overload Carrier_wipeoff_multicorrelator_resampler to ensure back compatibility |  | ||||||
| bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler( | bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler( | ||||||
|     float rem_carrier_phase_in_rad, |     float rem_carrier_phase_in_rad, | ||||||
|     float phase_step_rad, |     float phase_step_rad, | ||||||
| @@ -150,7 +150,8 @@ bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler( | |||||||
|         } |         } | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| // Overload Carrier_wipeoff_multicorrelator_resampler to ensure back compatibility |  | ||||||
|  |  | ||||||
| bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler( | bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler( | ||||||
|     float rem_carrier_phase_in_rad, |     float rem_carrier_phase_in_rad, | ||||||
|     float phase_step_rad, |     float phase_step_rad, | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ | |||||||
|  *          <li> Cillian O'Driscoll, 2017, cillian.odriscoll(at)gmail.com |  *          <li> Cillian O'Driscoll, 2017, cillian.odriscoll(at)gmail.com | ||||||
|  *          </ul> |  *          </ul> | ||||||
|  * |  * | ||||||
|  * Class that implements a high optimized vector multiTAP correlator class for CPUs |  * Class that implements a highly optimized vector multiTAP correlator class for CPUs | ||||||
|  * |  * | ||||||
|  * ------------------------------------------------------------------------- |  * ------------------------------------------------------------------------- | ||||||
|  * |  * | ||||||
| @@ -52,7 +52,6 @@ public: | |||||||
|     bool set_local_code_and_taps(int code_length_chips, const float *local_code_in, float *shifts_chips); |     bool set_local_code_and_taps(int code_length_chips, const float *local_code_in, float *shifts_chips); | ||||||
|     bool set_input_output_vectors(std::complex<float> *corr_out, const std::complex<float> *sig_in); |     bool set_input_output_vectors(std::complex<float> *corr_out, const std::complex<float> *sig_in); | ||||||
|     void update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips = 0.0); |     void update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips = 0.0); | ||||||
|     // Overload Carrier_wipeoff_multicorrelator_resampler to ensure back compatibility |  | ||||||
|     bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float phase_rate_step_rad, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, int signal_length_samples); |     bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float phase_rate_step_rad, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, int signal_length_samples); | ||||||
|     bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, int signal_length_samples); |     bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, int signal_length_samples); | ||||||
|     bool free(); |     bool free(); | ||||||
| @@ -70,4 +69,4 @@ private: | |||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif /* CPU_MULTICORRELATOR_REAL_CODES_H_ */ | #endif /* GNSS_SDR_CPU_MULTICORRELATOR_REAL_CODES_H_ */ | ||||||
|   | |||||||
| @@ -1,11 +1,11 @@ | |||||||
| /*! | /*! | ||||||
|  * \file cuda_multicorrelator.cu |  * \file cuda_multicorrelator.cu | ||||||
|  * \brief High optimized CUDA GPU vector multiTAP correlator class |  * \brief Highly optimized CUDA GPU vector multiTAP correlator class | ||||||
|  * \authors <ul> |  * \authors <ul> | ||||||
|  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es |  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es | ||||||
|  *          </ul> |  *          </ul> | ||||||
|  * |  * | ||||||
|  * Class that implements a high optimized vector multiTAP correlator class for NVIDIA CUDA GPUs |  * Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs | ||||||
|  * |  * | ||||||
|  * ------------------------------------------------------------------------- |  * ------------------------------------------------------------------------- | ||||||
|  * |  * | ||||||
| @@ -33,9 +33,8 @@ | |||||||
|  */ |  */ | ||||||
|  |  | ||||||
| #include "cuda_multicorrelator.h" | #include "cuda_multicorrelator.h" | ||||||
|  |  | ||||||
| #include <stdio.h> |  | ||||||
| #include <iostream> | #include <iostream> | ||||||
|  | #include <stdio.h> | ||||||
| // For the CUDA runtime routines (prefixed with "cuda_") | // For the CUDA runtime routines (prefixed with "cuda_") | ||||||
| #include <cuda_runtime.h> | #include <cuda_runtime.h> | ||||||
|  |  | ||||||
| @@ -53,22 +52,21 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips( | |||||||
|     int vectorN, |     int vectorN, | ||||||
|     int elementN, |     int elementN, | ||||||
|     float rem_carrier_phase_in_rad, |     float rem_carrier_phase_in_rad, | ||||||
|     float phase_step_rad |     float phase_step_rad) | ||||||
| ) |  | ||||||
| { | { | ||||||
|     //Accumulators cache |     //Accumulators cache | ||||||
|     __shared__ GPU_Complex accumResult[ACCUM_N]; |     __shared__ GPU_Complex accumResult[ACCUM_N]; | ||||||
|  |  | ||||||
| 	// CUDA version of floating point NCO and vector dot product integrated |     // CUDA version of floating point NCO and vector dot product integrated | ||||||
|     float sin; |     float sin; | ||||||
|     float cos; |     float cos; | ||||||
|     for (int i = blockIdx.x * blockDim.x + threadIdx.x; |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; | ||||||
|          i < elementN; |          i < elementN; | ||||||
|          i += blockDim.x * gridDim.x) |          i += blockDim.x * gridDim.x) | ||||||
|     { |         { | ||||||
|     	__sincosf(rem_carrier_phase_in_rad + i*phase_step_rad, &sin, &cos); |             __sincosf(rem_carrier_phase_in_rad + i * phase_step_rad, &sin, &cos); | ||||||
|     	d_sig_wiped[i] =  d_sig_in[i] * GPU_Complex(cos,-sin); |             d_sig_wiped[i] = d_sig_in[i] * GPU_Complex(cos, -sin); | ||||||
|     } |         } | ||||||
|  |  | ||||||
|     __syncthreads(); |     __syncthreads(); | ||||||
|     //////////////////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -77,273 +75,279 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips( | |||||||
|     // from total number of thread blocks |     // from total number of thread blocks | ||||||
|     //////////////////////////////////////////////////////////////////////////// |     //////////////////////////////////////////////////////////////////////////// | ||||||
|     for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x) |     for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x) | ||||||
|     { |  | ||||||
|         //int vectorBase = IMUL(elementN, vec); |  | ||||||
|         //int vectorEnd  = elementN; |  | ||||||
|  |  | ||||||
|         //////////////////////////////////////////////////////////////////////// |  | ||||||
|         // Each accumulator cycles through vectors with |  | ||||||
|         // stride equal to number of total number of accumulators ACCUM_N |  | ||||||
|         // At this stage ACCUM_N is only preferred be a multiple of warp size |  | ||||||
|         // to meet memory coalescing alignment constraints. |  | ||||||
|         //////////////////////////////////////////////////////////////////////// |  | ||||||
|         for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x) |  | ||||||
|         { |         { | ||||||
|         	GPU_Complex sum = GPU_Complex(0,0); |             //int vectorBase = IMUL(elementN, vec); | ||||||
|             float local_code_chip_index=0.0;; |             //int vectorEnd  = elementN; | ||||||
|             //float code_phase; |  | ||||||
|             for (int pos = iAccum; pos < elementN; pos += ACCUM_N) |  | ||||||
|             { |  | ||||||
|             	//original sample code |  | ||||||
|                 //sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos]; |  | ||||||
|             	//sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos]; |  | ||||||
|             	//sum.multiply_acc(d_sig_in[pos],d_local_codes_in[pos+d_shifts_samples[vec]]); |  | ||||||
|  |  | ||||||
|             	//custom code for multitap correlator |             //////////////////////////////////////////////////////////////////////// | ||||||
|             	// 1.resample local code for the current shift |             // Each accumulator cycles through vectors with | ||||||
|  |             // stride equal to number of total number of accumulators ACCUM_N | ||||||
|  |             // At this stage ACCUM_N is only preferred be a multiple of warp size | ||||||
|  |             // to meet memory coalescing alignment constraints. | ||||||
|  |             //////////////////////////////////////////////////////////////////////// | ||||||
|  |             for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x) | ||||||
|  |                 { | ||||||
|  |                     GPU_Complex sum = GPU_Complex(0, 0); | ||||||
|  |                     float local_code_chip_index = 0.0; | ||||||
|  |                     ; | ||||||
|  |                     //float code_phase; | ||||||
|  |                     for (int pos = iAccum; pos < elementN; pos += ACCUM_N) | ||||||
|  |                         { | ||||||
|  |                             //original sample code | ||||||
|  |                             //sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos]; | ||||||
|  |                             //sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos]; | ||||||
|  |                             //sum.multiply_acc(d_sig_in[pos],d_local_codes_in[pos+d_shifts_samples[vec]]); | ||||||
|  |  | ||||||
|             	local_code_chip_index= fmodf(code_phase_step_chips*__int2float_rd(pos)+ d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips); |                             //custom code for multitap correlator | ||||||
|  |                             // 1.resample local code for the current shift | ||||||
|  |  | ||||||
|             	//Take into account that in multitap correlators, the shifts can be negative! |                             local_code_chip_index = fmodf(code_phase_step_chips * __int2float_rd(pos) + d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips); | ||||||
|             	if (local_code_chip_index<0.0) local_code_chip_index+=(code_length_chips-1); |  | ||||||
|             	//printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index); |  | ||||||
|             	// 2.correlate |  | ||||||
|             	sum.multiply_acc(d_sig_wiped[pos],d_local_code_in[__float2int_rd(local_code_chip_index)]); |  | ||||||
|  |  | ||||||
|             } |                             //Take into account that in multitap correlators, the shifts can be negative! | ||||||
|             accumResult[iAccum] = sum; |                             if (local_code_chip_index < 0.0) local_code_chip_index += (code_length_chips - 1); | ||||||
|  |                             //printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index); | ||||||
|  |                             // 2.correlate | ||||||
|  |                             sum.multiply_acc(d_sig_wiped[pos], d_local_code_in[__float2int_rd(local_code_chip_index)]); | ||||||
|  |                         } | ||||||
|  |                     accumResult[iAccum] = sum; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |             //////////////////////////////////////////////////////////////////////// | ||||||
|  |             // Perform tree-like reduction of accumulators' results. | ||||||
|  |             // ACCUM_N has to be power of two at this stage | ||||||
|  |             //////////////////////////////////////////////////////////////////////// | ||||||
|  |             for (int stride = ACCUM_N / 2; stride > 0; stride >>= 1) | ||||||
|  |                 { | ||||||
|  |                     __syncthreads(); | ||||||
|  |  | ||||||
|  |                     for (int iAccum = threadIdx.x; iAccum < stride; iAccum += blockDim.x) | ||||||
|  |                         { | ||||||
|  |                             accumResult[iAccum] += accumResult[stride + iAccum]; | ||||||
|  |                         } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |             if (threadIdx.x == 0) | ||||||
|  |                 { | ||||||
|  |                     d_corr_out[vec] = accumResult[0]; | ||||||
|  |                 } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         //////////////////////////////////////////////////////////////////////// |  | ||||||
|         // Perform tree-like reduction of accumulators' results. |  | ||||||
|         // ACCUM_N has to be power of two at this stage |  | ||||||
|         //////////////////////////////////////////////////////////////////////// |  | ||||||
|         for (int stride = ACCUM_N / 2; stride > 0; stride >>= 1) |  | ||||||
|         { |  | ||||||
|             __syncthreads(); |  | ||||||
|  |  | ||||||
|             for (int iAccum = threadIdx.x; iAccum < stride; iAccum += blockDim.x) |  | ||||||
|             { |  | ||||||
|                 accumResult[iAccum] += accumResult[stride + iAccum]; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (threadIdx.x == 0) |  | ||||||
|         	{ |  | ||||||
|         		d_corr_out[vec] = accumResult[0]; |  | ||||||
|         	} |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| bool cuda_multicorrelator::init_cuda_integrated_resampler( | bool cuda_multicorrelator::init_cuda_integrated_resampler( | ||||||
| 		int signal_length_samples, |     int signal_length_samples, | ||||||
| 		int code_length_chips, |     int code_length_chips, | ||||||
| 		int n_correlators |     int n_correlators) | ||||||
| 		) |  | ||||||
| { | { | ||||||
| 	// use command-line specified CUDA device, otherwise use device with highest Gflops/s |     // use command-line specified CUDA device, otherwise use device with highest Gflops/s | ||||||
| //	findCudaDevice(argc, (const char **)argv); |     //	findCudaDevice(argc, (const char **)argv); | ||||||
|       cudaDeviceProp  prop; |     cudaDeviceProp prop; | ||||||
|     int num_devices, device; |     int num_devices, device; | ||||||
|     cudaGetDeviceCount(&num_devices); |     cudaGetDeviceCount(&num_devices); | ||||||
|     if (num_devices > 1) { |     if (num_devices > 1) | ||||||
|           int max_multiprocessors = 0, max_device = 0; |         { | ||||||
|           for (device = 0; device < num_devices; device++) { |             int max_multiprocessors = 0, max_device = 0; | ||||||
|                   cudaDeviceProp properties; |             for (device = 0; device < num_devices; device++) | ||||||
|                   cudaGetDeviceProperties(&properties, device); |                 { | ||||||
|                   if (max_multiprocessors < properties.multiProcessorCount) { |                     cudaDeviceProp properties; | ||||||
|                           max_multiprocessors = properties.multiProcessorCount; |                     cudaGetDeviceProperties(&properties, device); | ||||||
|                           max_device = device; |                     if (max_multiprocessors < properties.multiProcessorCount) | ||||||
|                   } |                         { | ||||||
|                   printf("Found GPU device # %i\n",device); |                             max_multiprocessors = properties.multiProcessorCount; | ||||||
|           } |                             max_device = device; | ||||||
|           //cudaSetDevice(max_device); |                         } | ||||||
|  |                     printf("Found GPU device # %i\n", device); | ||||||
|  |                 } | ||||||
|  |             //cudaSetDevice(max_device); | ||||||
|  |  | ||||||
|           //set random device! |             //set random device! | ||||||
| 	  selected_gps_device=rand() % num_devices;//generates a random number between 0 and num_devices to split the threads between GPUs |             selected_gps_device = rand() % num_devices;  //generates a random number between 0 and num_devices to split the threads between GPUs | ||||||
|           cudaSetDevice(selected_gps_device);  |             cudaSetDevice(selected_gps_device); | ||||||
|  |  | ||||||
|           cudaGetDeviceProperties( &prop, max_device ); |             cudaGetDeviceProperties(&prop, max_device); | ||||||
|           //debug code |             //debug code | ||||||
|           if (prop.canMapHostMemory != 1) { |             if (prop.canMapHostMemory != 1) | ||||||
|               printf( "Device can not map memory.\n" ); |                 { | ||||||
|           } |                     printf("Device can not map memory.\n"); | ||||||
|           printf("L2 Cache size= %u \n",prop.l2CacheSize); |                 } | ||||||
|           printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock); |             printf("L2 Cache size= %u \n", prop.l2CacheSize); | ||||||
|           printf("maxGridSize= %i \n",prop.maxGridSize[0]); |             printf("maxThreadsPerBlock= %u \n", prop.maxThreadsPerBlock); | ||||||
|           printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock); |             printf("maxGridSize= %i \n", prop.maxGridSize[0]); | ||||||
|           printf("deviceOverlap= %i \n",prop.deviceOverlap); |             printf("sharedMemPerBlock= %lu \n", prop.sharedMemPerBlock); | ||||||
|   	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount); |             printf("deviceOverlap= %i \n", prop.deviceOverlap); | ||||||
|     }else{ |             printf("multiProcessorCount= %i \n", prop.multiProcessorCount); | ||||||
|     	    cudaGetDevice( &selected_gps_device); |         } | ||||||
|     	    cudaGetDeviceProperties( &prop, selected_gps_device ); |     else | ||||||
|     	    //debug code |         { | ||||||
|     	    if (prop.canMapHostMemory != 1) { |             cudaGetDevice(&selected_gps_device); | ||||||
|     	        printf( "Device can not map memory.\n" ); |             cudaGetDeviceProperties(&prop, selected_gps_device); | ||||||
|     	    } |             //debug code | ||||||
|  |             if (prop.canMapHostMemory != 1) | ||||||
|  |                 { | ||||||
|  |                     printf("Device can not map memory.\n"); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|     	    printf("L2 Cache size= %u \n",prop.l2CacheSize); |             printf("L2 Cache size= %u \n", prop.l2CacheSize); | ||||||
|     	    printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock); |             printf("maxThreadsPerBlock= %u \n", prop.maxThreadsPerBlock); | ||||||
|     	    printf("maxGridSize= %i \n",prop.maxGridSize[0]); |             printf("maxGridSize= %i \n", prop.maxGridSize[0]); | ||||||
|     	    printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock); |             printf("sharedMemPerBlock= %lu \n", prop.sharedMemPerBlock); | ||||||
|     	    printf("deviceOverlap= %i \n",prop.deviceOverlap); |             printf("deviceOverlap= %i \n", prop.deviceOverlap); | ||||||
|     	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount); |             printf("multiProcessorCount= %i \n", prop.multiProcessorCount); | ||||||
|     } |         } | ||||||
|  |  | ||||||
| 	// (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared)); |     // (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared)); | ||||||
|  |  | ||||||
|     // ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors |     // ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors | ||||||
|     size_t size = signal_length_samples * sizeof(GPU_Complex); |     size_t size = signal_length_samples * sizeof(GPU_Complex); | ||||||
|  |  | ||||||
| 	//********* ZERO COPY VERSION ************ |     //********* ZERO COPY VERSION ************ | ||||||
| 	// Set flag to enable zero copy access |     // Set flag to enable zero copy access | ||||||
|     // Optimal in shared memory devices (like Jetson K1) |     // Optimal in shared memory devices (like Jetson K1) | ||||||
| 	//cudaSetDeviceFlags(cudaDeviceMapHost); |     //cudaSetDeviceFlags(cudaDeviceMapHost); | ||||||
|  |  | ||||||
| 	//******** CudaMalloc version *********** |     //******** CudaMalloc version *********** | ||||||
|  |  | ||||||
| 	// input signal GPU memory (can be mapped to CPU memory in shared memory devices!) |     // input signal GPU memory (can be mapped to CPU memory in shared memory devices!) | ||||||
| 	//	cudaMalloc((void **)&d_sig_in, size); |     //	cudaMalloc((void **)&d_sig_in, size); | ||||||
| 	//	cudaMemset(d_sig_in,0,size); |     //	cudaMemset(d_sig_in,0,size); | ||||||
|  |  | ||||||
| 	// Doppler-free signal (internal GPU memory) |     // Doppler-free signal (internal GPU memory) | ||||||
| 	cudaMalloc((void **)&d_sig_doppler_wiped, size); |     cudaMalloc((void **)&d_sig_doppler_wiped, size); | ||||||
| 	cudaMemset(d_sig_doppler_wiped,0,size); |     cudaMemset(d_sig_doppler_wiped, 0, size); | ||||||
|  |  | ||||||
| 	// Local code GPU memory (can be mapped to CPU memory in shared memory devices!) |     // Local code GPU memory (can be mapped to CPU memory in shared memory devices!) | ||||||
| 	cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips); |     cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>) * code_length_chips); | ||||||
| 	cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips); |     cudaMemset(d_local_codes_in, 0, sizeof(std::complex<float>) * code_length_chips); | ||||||
|  |  | ||||||
|     d_code_length_chips=code_length_chips; |     d_code_length_chips = code_length_chips; | ||||||
|  |  | ||||||
| 	// Vector with the chip shifts for each correlator tap |     // Vector with the chip shifts for each correlator tap | ||||||
|     //GPU memory (can be mapped to CPU memory in shared memory devices!) |     //GPU memory (can be mapped to CPU memory in shared memory devices!) | ||||||
| 	cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators); |     cudaMalloc((void **)&d_shifts_chips, sizeof(float) * n_correlators); | ||||||
| 	cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators); |     cudaMemset(d_shifts_chips, 0, sizeof(float) * n_correlators); | ||||||
|  |  | ||||||
| 	//scalars |     //scalars | ||||||
| 	//cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators); |     //cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators); | ||||||
| 	//cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators); |     //cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators); | ||||||
|  |  | ||||||
|     // Launch the Vector Add CUDA Kernel |     // Launch the Vector Add CUDA Kernel | ||||||
|     // TODO: write a smart load balance using device info! |     // TODO: write a smart load balance using device info! | ||||||
| 	threadsPerBlock = 64; |     threadsPerBlock = 64; | ||||||
|     blocksPerGrid = 128;//(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock; |     blocksPerGrid = 128;  //(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock; | ||||||
|  |  | ||||||
| 	cudaStreamCreate (&stream1) ; |     cudaStreamCreate(&stream1); | ||||||
| 	//cudaStreamCreate (&stream2) ; |     //cudaStreamCreate (&stream2) ; | ||||||
| 	return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| bool cuda_multicorrelator::set_local_code_and_taps( | bool cuda_multicorrelator::set_local_code_and_taps( | ||||||
| 		int code_length_chips, |     int code_length_chips, | ||||||
| 		const std::complex<float>* local_codes_in, |     const std::complex<float> *local_codes_in, | ||||||
| 		float *shifts_chips, |     float *shifts_chips, | ||||||
| 		int n_correlators |     int n_correlators) | ||||||
| 		) |  | ||||||
| { | { | ||||||
|  |     cudaSetDevice(selected_gps_device); | ||||||
|  |     //********* ZERO COPY VERSION ************ | ||||||
|  |     //	// Get device pointer from host memory. No allocation or memcpy | ||||||
|  |     //	cudaError_t code; | ||||||
|  |     //	// local code CPU -> GPU copy memory | ||||||
|  |     //	code=cudaHostGetDevicePointer((void **)&d_local_codes_in,  (void *) local_codes_in, 0); | ||||||
|  |     //	if (code!=cudaSuccess) | ||||||
|  |     //	{ | ||||||
|  |     //		printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n"); | ||||||
|  |     //	} | ||||||
|  |     //	// Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!) | ||||||
|  |     //	code=cudaHostGetDevicePointer((void **)&d_shifts_chips,  (void *) shifts_chips, 0); | ||||||
|  |     //	if (code!=cudaSuccess) | ||||||
|  |     //	{ | ||||||
|  |     //		printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n"); | ||||||
|  |     //	} | ||||||
|  |  | ||||||
|           cudaSetDevice(selected_gps_device); |     //******** CudaMalloc version *********** | ||||||
| 	//********* ZERO COPY VERSION ************ |  | ||||||
| //	// Get device pointer from host memory. No allocation or memcpy |  | ||||||
| //	cudaError_t code; |  | ||||||
| //	// local code CPU -> GPU copy memory |  | ||||||
| //	code=cudaHostGetDevicePointer((void **)&d_local_codes_in,  (void *) local_codes_in, 0); |  | ||||||
| //	if (code!=cudaSuccess) |  | ||||||
| //	{ |  | ||||||
| //		printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n"); |  | ||||||
| //	} |  | ||||||
| //	// Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!) |  | ||||||
| //	code=cudaHostGetDevicePointer((void **)&d_shifts_chips,  (void *) shifts_chips, 0); |  | ||||||
| //	if (code!=cudaSuccess) |  | ||||||
| //	{ |  | ||||||
| //		printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n"); |  | ||||||
| //	} |  | ||||||
|  |  | ||||||
| 	//******** CudaMalloc version *********** |  | ||||||
|     //local code CPU -> GPU copy memory |     //local code CPU -> GPU copy memory | ||||||
|     cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1); |     cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex) * code_length_chips, cudaMemcpyHostToDevice, stream1); | ||||||
|     d_code_length_chips=code_length_chips; |     d_code_length_chips = code_length_chips; | ||||||
|  |  | ||||||
|     //Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!) |     //Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!) | ||||||
|     cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators, |     cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float) * n_correlators, | ||||||
|                                     cudaMemcpyHostToDevice,stream1); |         cudaMemcpyHostToDevice, stream1); | ||||||
|  |  | ||||||
| 	return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| bool cuda_multicorrelator::set_input_output_vectors( | bool cuda_multicorrelator::set_input_output_vectors( | ||||||
| 		std::complex<float>* corr_out, |     std::complex<float> *corr_out, | ||||||
| 		std::complex<float>* sig_in |     std::complex<float> *sig_in) | ||||||
| 		) |  | ||||||
| { | { | ||||||
|  |     cudaSetDevice(selected_gps_device); | ||||||
|  |     // Save CPU pointers | ||||||
|  |     d_sig_in_cpu = sig_in; | ||||||
|  |     d_corr_out_cpu = corr_out; | ||||||
|  |  | ||||||
|          cudaSetDevice(selected_gps_device); |     // Zero Copy version | ||||||
| 	// Save CPU pointers |     // Get device pointer from host memory. No allocation or memcpy | ||||||
| 	d_sig_in_cpu =sig_in; |     cudaError_t code; | ||||||
| 	d_corr_out_cpu = corr_out; |     code = cudaHostGetDevicePointer((void **)&d_sig_in, (void *)sig_in, 0); | ||||||
|  |     code = cudaHostGetDevicePointer((void **)&d_corr_out, (void *)corr_out, 0); | ||||||
| 	// Zero Copy version |     if (code != cudaSuccess) | ||||||
| 	// Get device pointer from host memory. No allocation or memcpy |         { | ||||||
| 	cudaError_t code; |             printf("cuda cudaHostGetDevicePointer error \r\n"); | ||||||
| 	code=cudaHostGetDevicePointer((void **)&d_sig_in,  (void *) sig_in, 0); |         } | ||||||
| 	code=cudaHostGetDevicePointer((void **)&d_corr_out,  (void *) corr_out, 0); |     return true; | ||||||
| 	if (code!=cudaSuccess) |  | ||||||
| 	{ |  | ||||||
| 		printf("cuda cudaHostGetDevicePointer error \r\n"); |  | ||||||
| 	} |  | ||||||
| 	return true; |  | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } | #define gpuErrchk(ans)                        \ | ||||||
| inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) |     {                                         \ | ||||||
|  |         gpuAssert((ans), __FILE__, __LINE__); \ | ||||||
|  |     } | ||||||
|  | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) | ||||||
| { | { | ||||||
|    if (code != cudaSuccess) |     if (code != cudaSuccess) | ||||||
|    { |         { | ||||||
|       fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); |             fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); | ||||||
|       if (abort) exit(code); |             if (abort) exit(code); | ||||||
|    } |         } | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda( | bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda( | ||||||
| 		float rem_carrier_phase_in_rad, |     float rem_carrier_phase_in_rad, | ||||||
| 		float phase_step_rad, |     float phase_step_rad, | ||||||
|         float code_phase_step_chips, |     float code_phase_step_chips, | ||||||
|         float rem_code_phase_chips, |     float rem_code_phase_chips, | ||||||
| 		int signal_length_samples, |     int signal_length_samples, | ||||||
| 		int n_correlators) |     int n_correlators) | ||||||
| 	{ | { | ||||||
|  |  | ||||||
|     cudaSetDevice(selected_gps_device); |     cudaSetDevice(selected_gps_device); | ||||||
| 	// cudaMemCpy version |     // cudaMemCpy version | ||||||
| 	//size_t memSize = signal_length_samples * sizeof(std::complex<float>); |     //size_t memSize = signal_length_samples * sizeof(std::complex<float>); | ||||||
| 	// input signal CPU -> GPU copy memory |     // input signal CPU -> GPU copy memory | ||||||
|     //cudaMemcpyAsync(d_sig_in, d_sig_in_cpu, memSize, |     //cudaMemcpyAsync(d_sig_in, d_sig_in_cpu, memSize, | ||||||
|     //                               cudaMemcpyHostToDevice, stream2); |     //                               cudaMemcpyHostToDevice, stream2); | ||||||
|     //***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! **** |     //***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! **** | ||||||
|  |  | ||||||
|     //launch the multitap correlator with integrated local code resampler! |     //launch the multitap correlator with integrated local code resampler! | ||||||
|  |  | ||||||
|     Doppler_wippe_scalarProdGPUCPXxN_shifts_chips<<<blocksPerGrid, threadsPerBlock,0 ,stream1>>>( |     Doppler_wippe_scalarProdGPUCPXxN_shifts_chips<<<blocksPerGrid, threadsPerBlock, 0, stream1>>>( | ||||||
| 			d_corr_out, |         d_corr_out, | ||||||
| 			d_sig_in, |         d_sig_in, | ||||||
| 			d_sig_doppler_wiped, |         d_sig_doppler_wiped, | ||||||
| 			d_local_codes_in, |         d_local_codes_in, | ||||||
| 			d_shifts_chips, |         d_shifts_chips, | ||||||
| 			d_code_length_chips, |         d_code_length_chips, | ||||||
| 	        code_phase_step_chips, |         code_phase_step_chips, | ||||||
| 	        rem_code_phase_chips, |         rem_code_phase_chips, | ||||||
| 			n_correlators, |         n_correlators, | ||||||
| 			signal_length_samples, |         signal_length_samples, | ||||||
| 			rem_carrier_phase_in_rad, |         rem_carrier_phase_in_rad, | ||||||
| 			phase_step_rad |         phase_step_rad); | ||||||
| 			); |  | ||||||
|  |  | ||||||
|     gpuErrchk( cudaPeekAtLastError() ); |     gpuErrchk(cudaPeekAtLastError()); | ||||||
|     gpuErrchk( cudaStreamSynchronize(stream1)); |     gpuErrchk(cudaStreamSynchronize(stream1)); | ||||||
|  |  | ||||||
| 	// cudaMemCpy version |     // cudaMemCpy version | ||||||
|     // Copy the device result vector in device memory to the host result vector |     // Copy the device result vector in device memory to the host result vector | ||||||
|     // in host memory. |     // in host memory. | ||||||
|     //scalar products (correlators outputs) |     //scalar products (correlators outputs) | ||||||
| @@ -352,37 +356,38 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda( | |||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| cuda_multicorrelator::cuda_multicorrelator() | cuda_multicorrelator::cuda_multicorrelator() | ||||||
| { | { | ||||||
| 	d_sig_in=NULL; |     d_sig_in = NULL; | ||||||
| 	d_nco_in=NULL; |     d_nco_in = NULL; | ||||||
| 	d_sig_doppler_wiped=NULL; |     d_sig_doppler_wiped = NULL; | ||||||
| 	d_local_codes_in=NULL; |     d_local_codes_in = NULL; | ||||||
| 	d_shifts_samples=NULL; |     d_shifts_samples = NULL; | ||||||
| 	d_shifts_chips=NULL; |     d_shifts_chips = NULL; | ||||||
| 	d_corr_out=NULL; |     d_corr_out = NULL; | ||||||
| 	threadsPerBlock=0; |     threadsPerBlock = 0; | ||||||
| 	blocksPerGrid=0; |     blocksPerGrid = 0; | ||||||
| 	d_code_length_chips=0; |     d_code_length_chips = 0; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| bool cuda_multicorrelator::free_cuda() | bool cuda_multicorrelator::free_cuda() | ||||||
| { | { | ||||||
| 	// Free device global memory |     // Free device global memory | ||||||
| 	if (d_sig_in!=NULL) cudaFree(d_sig_in); |     if (d_sig_in != NULL) cudaFree(d_sig_in); | ||||||
| 	if (d_nco_in!=NULL) cudaFree(d_nco_in); |     if (d_nco_in != NULL) cudaFree(d_nco_in); | ||||||
| 	if (d_sig_doppler_wiped!=NULL) cudaFree(d_sig_doppler_wiped); |     if (d_sig_doppler_wiped != NULL) cudaFree(d_sig_doppler_wiped); | ||||||
| 	if (d_local_codes_in!=NULL) cudaFree(d_local_codes_in); |     if (d_local_codes_in != NULL) cudaFree(d_local_codes_in); | ||||||
| 	if (d_corr_out!=NULL) cudaFree(d_corr_out); |     if (d_corr_out != NULL) cudaFree(d_corr_out); | ||||||
| 	if (d_shifts_samples!=NULL) cudaFree(d_shifts_samples); |     if (d_shifts_samples != NULL) cudaFree(d_shifts_samples); | ||||||
| 	if (d_shifts_chips!=NULL) cudaFree(d_shifts_chips); |     if (d_shifts_chips != NULL) cudaFree(d_shifts_chips); | ||||||
|     // Reset the device and exit |     // Reset the device and exit | ||||||
|     // cudaDeviceReset causes the driver to clean up all state. While |     // cudaDeviceReset causes the driver to clean up all state. While | ||||||
|     // not mandatory in normal operation, it is good practice.  It is also |     // not mandatory in normal operation, it is good practice.  It is also | ||||||
|     // needed to ensure correct operation when the application is being |     // needed to ensure correct operation when the application is being | ||||||
|     // profiled. Calling cudaDeviceReset causes all profile data to be |     // profiled. Calling cudaDeviceReset causes all profile data to be | ||||||
|     // flushed before the application exits |     // flushed before the application exits | ||||||
| 	cudaDeviceReset(); |     cudaDeviceReset(); | ||||||
| 	return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,11 +1,11 @@ | |||||||
| /*! | /*! | ||||||
|  * \file cuda_multicorrelator.h |  * \file cuda_multicorrelator.h | ||||||
|  * \brief High optimized CUDA GPU vector multiTAP correlator class |  * \brief Highly optimized CUDA GPU vector multiTAP correlator class | ||||||
|  * \authors <ul> |  * \authors <ul> | ||||||
|  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es |  *          <li> Javier Arribas, 2015. jarribas(at)cttc.es | ||||||
|  *          </ul> |  *          </ul> | ||||||
|  * |  * | ||||||
|  * Class that implements a high optimized vector multiTAP correlator class for NVIDIA CUDA GPUs |  * Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs | ||||||
|  * |  * | ||||||
|  * ------------------------------------------------------------------------- |  * ------------------------------------------------------------------------- | ||||||
|  * |  * | ||||||
| @@ -92,6 +92,7 @@ struct GPU_Complex | |||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
| struct GPU_Complex_Short | struct GPU_Complex_Short | ||||||
| { | { | ||||||
|     float r; |     float r; | ||||||
| @@ -149,7 +150,6 @@ private: | |||||||
|     GPU_Complex* d_local_codes_in; |     GPU_Complex* d_local_codes_in; | ||||||
|     GPU_Complex* d_corr_out; |     GPU_Complex* d_corr_out; | ||||||
|  |  | ||||||
|     // |  | ||||||
|     std::complex<float>* d_sig_in_cpu; |     std::complex<float>* d_sig_in_cpu; | ||||||
|     std::complex<float>* d_corr_out_cpu; |     std::complex<float>* d_corr_out_cpu; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -32,7 +32,26 @@ | |||||||
|  */ |  */ | ||||||
|  |  | ||||||
| #include "tracking_FLL_PLL_filter.h" | #include "tracking_FLL_PLL_filter.h" | ||||||
| #include <iostream> |  | ||||||
|  |  | ||||||
|  | Tracking_FLL_PLL_filter::Tracking_FLL_PLL_filter() | ||||||
|  | { | ||||||
|  |     d_order = 0; | ||||||
|  |     d_pll_w = 0.0; | ||||||
|  |     d_pll_w0p3 = 0.0; | ||||||
|  |     d_pll_w0f2 = 0.0; | ||||||
|  |     d_pll_x = 0.0; | ||||||
|  |     d_pll_a2 = 0.0; | ||||||
|  |     d_pll_w0f = 0.0; | ||||||
|  |     d_pll_a3 = 0.0; | ||||||
|  |     d_pll_w0p2 = 0.0; | ||||||
|  |     d_pll_b3 = 0.0; | ||||||
|  |     d_pll_w0p = 0.0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Tracking_FLL_PLL_filter::~Tracking_FLL_PLL_filter() = default; | ||||||
|  |  | ||||||
|  |  | ||||||
| void Tracking_FLL_PLL_filter::set_params(float fll_bw_hz, float pll_bw_hz, int order) | void Tracking_FLL_PLL_filter::set_params(float fll_bw_hz, float pll_bw_hz, int order) | ||||||
| { | { | ||||||
| @@ -89,7 +108,7 @@ float Tracking_FLL_PLL_filter::get_carrier_error(float FLL_discriminator, float | |||||||
|     if (d_order == 3) |     if (d_order == 3) | ||||||
|         { |         { | ||||||
|             /* |             /* | ||||||
|              *  3rd order PLL with 2nd order FLL assist |              * 3rd order PLL with 2nd order FLL assist | ||||||
|              */ |              */ | ||||||
|             d_pll_w = d_pll_w + correlation_time_s * (d_pll_w0p3 * PLL_discriminator + d_pll_w0f2 * FLL_discriminator); |             d_pll_w = d_pll_w + correlation_time_s * (d_pll_w0p3 * PLL_discriminator + d_pll_w0f2 * FLL_discriminator); | ||||||
|             d_pll_x = d_pll_x + correlation_time_s * (0.5 * d_pll_w + d_pll_a2 * d_pll_w0f * FLL_discriminator + d_pll_a3 * d_pll_w0p2 * PLL_discriminator); |             d_pll_x = d_pll_x + correlation_time_s * (0.5 * d_pll_w + d_pll_a2 * d_pll_w0f * FLL_discriminator + d_pll_a3 * d_pll_w0p2 * PLL_discriminator); | ||||||
| @@ -104,31 +123,11 @@ float Tracking_FLL_PLL_filter::get_carrier_error(float FLL_discriminator, float | |||||||
|             pll_w_new = d_pll_w + PLL_discriminator * d_pll_w0p2 * correlation_time_s + FLL_discriminator * d_pll_w0f * correlation_time_s; |             pll_w_new = d_pll_w + PLL_discriminator * d_pll_w0p2 * correlation_time_s + FLL_discriminator * d_pll_w0f * correlation_time_s; | ||||||
|             carrier_error_hz = 0.5 * (pll_w_new + d_pll_w) + d_pll_a2 * d_pll_w0p * PLL_discriminator; |             carrier_error_hz = 0.5 * (pll_w_new + d_pll_w) + d_pll_a2 * d_pll_w0p * PLL_discriminator; | ||||||
|             d_pll_w = pll_w_new; |             d_pll_w = pll_w_new; | ||||||
|             /*std::cout<<" d_pll_w = "<<carrier_error_hz<< |             /* std::cout << " d_pll_w = " << carrier_error_hz << ", pll_w_new = " << pll_w_new | ||||||
|                ", pll_w_new = "<<pll_w_new |                       << ", PLL_discriminator=" << PLL_discriminator | ||||||
|                <<", PLL_discriminator=" <<PLL_discriminator |                       << " FLL_discriminator =" << FLL_discriminator | ||||||
|                <<" FLL_discriminator ="<<FLL_discriminator |                       << " correlation_time_s = " << correlation_time_s << "\r\n"; */ | ||||||
|                <<" correlation_time_s = "<<correlation_time_s<<"\r\n";*/ |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|     return carrier_error_hz; |     return carrier_error_hz; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| Tracking_FLL_PLL_filter::Tracking_FLL_PLL_filter() |  | ||||||
| { |  | ||||||
|     d_order = 0; |  | ||||||
|     d_pll_w = 0; |  | ||||||
|     d_pll_w0p3 = 0; |  | ||||||
|     d_pll_w0f2 = 0; |  | ||||||
|     d_pll_x = 0; |  | ||||||
|     d_pll_a2 = 0; |  | ||||||
|     d_pll_w0f = 0; |  | ||||||
|     d_pll_a3 = 0; |  | ||||||
|     d_pll_w0p2 = 0; |  | ||||||
|     d_pll_b3 = 0; |  | ||||||
|     d_pll_w0p = 0; |  | ||||||
| } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| Tracking_FLL_PLL_filter::~Tracking_FLL_PLL_filter() = default; |  | ||||||
|   | |||||||
| @@ -36,6 +36,7 @@ | |||||||
| #include <cmath> | #include <cmath> | ||||||
|  |  | ||||||
| //  All the outputs are in RADIANS | //  All the outputs are in RADIANS | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * FLL four quadrant arctan discriminator: |  * FLL four quadrant arctan discriminator: | ||||||
|  * \f{equation} |  * \f{equation} | ||||||
| @@ -45,7 +46,6 @@ | |||||||
|  * \f$I_{PS1},Q_{PS1}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_1\f$, and |  * \f$I_{PS1},Q_{PS1}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_1\f$, and | ||||||
|  * \f$I_{PS2},Q_{PS2}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_2\f$. The output is in [radians/second]. |  * \f$I_{PS2},Q_{PS2}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_2\f$. The output is in [radians/second]. | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| double fll_four_quadrant_atan(gr_complex prompt_s1, gr_complex prompt_s2, double t1, double t2) | double fll_four_quadrant_atan(gr_complex prompt_s1, gr_complex prompt_s2, double t1, double t2) | ||||||
| { | { | ||||||
|     double cross, dot; |     double cross, dot; | ||||||
| @@ -105,6 +105,7 @@ double dll_nc_e_minus_l_normalized(gr_complex early_s1, gr_complex late_s1) | |||||||
|     return 0.5 * (P_early - P_late) / (P_early + P_late); |     return 0.5 * (P_early - P_late) / (P_early + P_late); | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * DLL Noncoherent Very Early Minus Late Power (VEMLP) normalized discriminator, using the outputs |  * DLL Noncoherent Very Early Minus Late Power (VEMLP) normalized discriminator, using the outputs | ||||||
|  * of four correlators, Very Early (VE), Early (E), Late (L) and Very Late (VL): |  * of four correlators, Very Early (VE), Early (E), Late (L) and Very Late (VL): | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ | |||||||
|  * \author Cillian O'Driscoll, 2015. cillian.odriscoll(at)gmail.com |  * \author Cillian O'Driscoll, 2015. cillian.odriscoll(at)gmail.com | ||||||
|  * |  * | ||||||
|  * Class implementing a generic 1st, 2nd or 3rd order loop filter. Based |  * Class implementing a generic 1st, 2nd or 3rd order loop filter. Based | ||||||
|  * on the bilinear transform of the standard Weiner filter. |  * on the bilinear transform of the standard Wiener filter. | ||||||
|  * |  * | ||||||
|  * ------------------------------------------------------------------------- |  * ------------------------------------------------------------------------- | ||||||
|  * |  * | ||||||
| @@ -36,6 +36,8 @@ | |||||||
| #include <glog/logging.h> | #include <glog/logging.h> | ||||||
| #include <cmath> | #include <cmath> | ||||||
|  |  | ||||||
|  | const int MAX_LOOP_ORDER = 3; | ||||||
|  | const int MAX_LOOP_HISTORY_LENGTH = 4; | ||||||
|  |  | ||||||
| Tracking_loop_filter::Tracking_loop_filter(float update_interval, | Tracking_loop_filter::Tracking_loop_filter(float update_interval, | ||||||
|     float noise_bandwidth, |     float noise_bandwidth, | ||||||
| @@ -74,7 +76,7 @@ float Tracking_loop_filter::apply(float current_input) | |||||||
|     // Now apply the filter coefficients: |     // Now apply the filter coefficients: | ||||||
|     float result = 0.0; |     float result = 0.0; | ||||||
|  |  | ||||||
|     // Hanlde the old outputs first: |     // Handle the old outputs first: | ||||||
|     for (unsigned int ii = 0; ii < d_output_coefficients.size(); ++ii) |     for (unsigned int ii = 0; ii < d_output_coefficients.size(); ++ii) | ||||||
|         { |         { | ||||||
|             result += d_output_coefficients[ii] * d_outputs[(d_current_index + ii) % MAX_LOOP_HISTORY_LENGTH]; |             result += d_output_coefficients[ii] * d_outputs[(d_current_index + ii) % MAX_LOOP_HISTORY_LENGTH]; | ||||||
| @@ -95,16 +97,13 @@ float Tracking_loop_filter::apply(float current_input) | |||||||
|  |  | ||||||
|     d_inputs[d_current_index] = current_input; |     d_inputs[d_current_index] = current_input; | ||||||
|  |  | ||||||
|  |  | ||||||
|     for (unsigned int ii = 0; ii < d_input_coefficients.size(); ++ii) |     for (unsigned int ii = 0; ii < d_input_coefficients.size(); ++ii) | ||||||
|         { |         { | ||||||
|             result += d_input_coefficients[ii] * d_inputs[(d_current_index + ii) % MAX_LOOP_HISTORY_LENGTH]; |             result += d_input_coefficients[ii] * d_inputs[(d_current_index + ii) % MAX_LOOP_HISTORY_LENGTH]; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|     d_outputs[d_current_index] = result; |     d_outputs[d_current_index] = result; | ||||||
|  |  | ||||||
|  |  | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -179,7 +178,6 @@ void Tracking_loop_filter::update_coefficients(void) | |||||||
|                     d_output_coefficients[0] = 1.0; |                     d_output_coefficients[0] = 1.0; | ||||||
|                 } |                 } | ||||||
|             break; |             break; | ||||||
|  |  | ||||||
|         case 3: |         case 3: | ||||||
|             wn = d_noise_bandwidth / 0.7845;  // From Kaplan |             wn = d_noise_bandwidth / 0.7845;  // From Kaplan | ||||||
|             float a3 = 1.1; |             float a3 = 1.1; | ||||||
| @@ -208,7 +206,6 @@ void Tracking_loop_filter::update_coefficients(void) | |||||||
|                     d_input_coefficients[1] = g1 * T * T / 2.0 - 2.0 * g3; |                     d_input_coefficients[1] = g1 * T * T / 2.0 - 2.0 * g3; | ||||||
|                     d_input_coefficients[2] = g3 + T / 2.0 * (-g2 + T / 2.0 * g1); |                     d_input_coefficients[2] = g3 + T / 2.0 * (-g2 + T / 2.0 * g1); | ||||||
|  |  | ||||||
|  |  | ||||||
|                     d_output_coefficients.resize(2); |                     d_output_coefficients.resize(2); | ||||||
|                     d_output_coefficients[0] = 2.0; |                     d_output_coefficients[0] = 2.0; | ||||||
|                     d_output_coefficients[1] = -1.0; |                     d_output_coefficients[1] = -1.0; | ||||||
| @@ -260,10 +257,9 @@ void Tracking_loop_filter::set_order(int loop_order) | |||||||
| { | { | ||||||
|     if (loop_order < 1 or loop_order > MAX_LOOP_ORDER) |     if (loop_order < 1 or loop_order > MAX_LOOP_ORDER) | ||||||
|         { |         { | ||||||
|             LOG(ERROR) << "Ignoring attempt to set loop order to " << loop_order |             LOG(WARNING) << "Ignoring attempt to set loop order to " << loop_order | ||||||
|                        << ". Maximum allowed order is: " << MAX_LOOP_ORDER |                          << ". Maximum allowed order is: " << MAX_LOOP_ORDER | ||||||
|                        << ". Not changing current value of " << d_loop_order; |                          << ". Not changing current value of " << d_loop_order; | ||||||
|  |  | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ | |||||||
|  * \author Cillian O'Driscoll, 2015. cillian.odriscoll(at)gmail.com |  * \author Cillian O'Driscoll, 2015. cillian.odriscoll(at)gmail.com | ||||||
|  * |  * | ||||||
|  * Class implementing a generic 1st, 2nd or 3rd order loop filter. Based |  * Class implementing a generic 1st, 2nd or 3rd order loop filter. Based | ||||||
|  * on the bilinear transform of the standard Weiner filter. |  * on the bilinear transform of the standard Wiener filter. | ||||||
|  * |  * | ||||||
|  * ------------------------------------------------------------------------- |  * ------------------------------------------------------------------------- | ||||||
|  * |  * | ||||||
| @@ -33,8 +33,6 @@ | |||||||
|  |  | ||||||
| #ifndef GNSS_SDR_TRACKING_LOOP_FILTER_H_ | #ifndef GNSS_SDR_TRACKING_LOOP_FILTER_H_ | ||||||
| #define GNSS_SDR_TRACKING_LOOP_FILTER_H_ | #define GNSS_SDR_TRACKING_LOOP_FILTER_H_ | ||||||
| #define MAX_LOOP_ORDER 3 |  | ||||||
| #define MAX_LOOP_HISTORY_LENGTH 4 |  | ||||||
|  |  | ||||||
| #include <vector> | #include <vector> | ||||||
|  |  | ||||||
| @@ -74,7 +72,6 @@ private: | |||||||
|     // Compute the filter coefficients: |     // Compute the filter coefficients: | ||||||
|     void update_coefficients(void); |     void update_coefficients(void); | ||||||
|  |  | ||||||
|  |  | ||||||
| public: | public: | ||||||
|     float get_noise_bandwidth(void) const; |     float get_noise_bandwidth(void) const; | ||||||
|     float get_update_interval(void) const; |     float get_update_interval(void) const; | ||||||
|   | |||||||
| @@ -830,7 +830,7 @@ int32_t Beidou_Dnav_Navigation_Message::d2_subframe_decoder(std::string const& s | |||||||
|                     d_eccentricity_msb = static_cast<double>(read_navigation_unsigned(subframe_bits, D2_E_MSB)); |                     d_eccentricity_msb = static_cast<double>(read_navigation_unsigned(subframe_bits, D2_E_MSB)); | ||||||
|                     d_eccentricity_msb_bits = (read_navigation_unsigned(subframe_bits, D2_E_MSB)); |                     d_eccentricity_msb_bits = (read_navigation_unsigned(subframe_bits, D2_E_MSB)); | ||||||
|                     // Adjust for lsb in next page (shift number of lsb to the left) |                     // Adjust for lsb in next page (shift number of lsb to the left) | ||||||
|                     d_eccentricity_msb = static_cast<uint64_t>((static_cast<int>(d_eccentricity_msb) << 22)); |                     d_eccentricity_msb = static_cast<uint64_t>((static_cast<uint64_t>(d_eccentricity_msb) << 22)); | ||||||
|                     d_eccentricity_msb_bits = d_eccentricity_msb_bits << 22; |                     d_eccentricity_msb_bits = d_eccentricity_msb_bits << 22; | ||||||
|  |  | ||||||
|                     // Set system flags for message reception |                     // Set system flags for message reception | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Carles Fernandez
					Carles Fernandez