mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-31 07:13:03 +00:00 
			
		
		
		
	Updated GPU tracking implementation. Bug fixed in cuda correlator and
performance improvements
This commit is contained in:
		| @@ -1,13 +1,8 @@ | ||||
| /*! | ||||
|  * \file gps_l1_ca_dll_pll_tracking_gpu_cc.cc | ||||
|  * \brief Implementation of a code DLL + carrier PLL tracking block, GPU ACCELERATED | ||||
|  * \brief Implementation of a code DLL + carrier PLL tracking block GPU ACCELERATED | ||||
|  * \author Javier Arribas, 2015. jarribas(at)cttc.es | ||||
|  * | ||||
|  * Code DLL + carrier PLL according to the algorithms described in: | ||||
|  * [1] K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen, | ||||
|  * A Software-Defined GPS and Galileo Receiver. A Single-Frequency | ||||
|  * Approach, Birkhauser, 2007 | ||||
|  * | ||||
|  * ------------------------------------------------------------------------- | ||||
|  * | ||||
|  * Copyright (C) 2010-2015  (see AUTHORS file for a list of contributors) | ||||
| @@ -40,6 +35,7 @@ | ||||
| #include <sstream> | ||||
| #include <boost/lexical_cast.hpp> | ||||
| #include <gnuradio/io_signature.h> | ||||
| #include <volk/volk.h> | ||||
| #include <glog/logging.h> | ||||
| #include "gnss_synchro.h" | ||||
| #include "gps_sdr_signal_processing.h" | ||||
| @@ -47,7 +43,6 @@ | ||||
| #include "lock_detectors.h" | ||||
| #include "GPS_L1_CA.h" | ||||
| #include "control_message_factory.h" | ||||
| #include <volk/volk.h> //volk_alignement | ||||
| // includes | ||||
| #include <cuda_profiler_api.h> | ||||
|  | ||||
| @@ -80,10 +75,14 @@ gps_l1_ca_dll_pll_make_tracking_gpu_cc( | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::forecast (int noutput_items, | ||||
|         gr_vector_int &ninput_items_required) | ||||
| { | ||||
|     ninput_items_required[0] = static_cast<int>(d_vector_length) * 2; //set the required available samples in each call | ||||
|     if (noutput_items != 0) | ||||
|         { | ||||
|             ninput_items_required[0] = static_cast<int>(d_vector_length) * 2; //set the required available samples in each call | ||||
|         } | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -108,10 +107,11 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc( | ||||
|     d_fs_in = fs_in; | ||||
|     d_vector_length = vector_length; | ||||
|     d_dump_filename = dump_filename; | ||||
|     d_correlation_length_samples = static_cast<int>(d_vector_length); | ||||
|  | ||||
|     // Initialize tracking  ========================================== | ||||
|     d_code_loop_filter.set_DLL_BW(dll_bw_hz); | ||||
|     d_carrier_loop_filter.set_PLL_BW(pll_bw_hz); | ||||
|     d_carrier_loop_filter.set_params(10.0, pll_bw_hz,2); | ||||
|  | ||||
|     //--- DLL variables -------------------------------------------------------- | ||||
|     d_early_late_spc_chips = early_late_space_chips; // Define early-late offset (in chips) | ||||
| @@ -120,32 +120,33 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc( | ||||
|     cudaSetDeviceFlags(cudaDeviceMapHost); | ||||
|     //allocate host memory | ||||
|     //pinned memory mode - use special function to get OS-pinned memory | ||||
|     int N_CORRELATORS = 3; | ||||
|     d_n_correlator_taps = 3; // Early, Prompt, and Late | ||||
|     // Get space for a vector with the C/A code replica sampled 1x/chip | ||||
|     cudaHostAlloc((void**)&d_ca_code, (GPS_L1_CA_CODE_LENGTH_CHIPS* sizeof(gr_complex)), cudaHostAllocMapped || cudaHostAllocWriteCombined); | ||||
|     cudaHostAlloc((void**)&d_ca_code, (static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS)* sizeof(gr_complex)), cudaHostAllocMapped || cudaHostAllocWriteCombined); | ||||
|     // Get space for the resampled early / prompt / late local replicas | ||||
|     cudaHostAlloc((void**)&d_local_code_shift_chips, N_CORRELATORS * sizeof(float),  cudaHostAllocMapped || cudaHostAllocWriteCombined); | ||||
|     cudaHostAlloc((void**)&d_local_code_shift_chips, d_n_correlator_taps * sizeof(float),  cudaHostAllocMapped || cudaHostAllocWriteCombined); | ||||
|     cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped || cudaHostAllocWriteCombined); | ||||
|     // correlator outputs (scalar) | ||||
|     cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS, cudaHostAllocMapped ||  cudaHostAllocWriteCombined ); | ||||
|     cudaHostAlloc((void**)&d_correlator_outs ,sizeof(gr_complex)*d_n_correlator_taps, cudaHostAllocMapped ||  cudaHostAllocWriteCombined ); | ||||
|  | ||||
|     // Set TAPs delay values [chips] | ||||
|     d_local_code_shift_chips[0] = - d_early_late_spc_chips; | ||||
|     d_local_code_shift_chips[1] = 0.0; | ||||
|     d_local_code_shift_chips[2] = d_early_late_spc_chips; | ||||
|  | ||||
|     //map to EPL pointers | ||||
|     d_Early = &d_corr_outs_gpu[0]; | ||||
|     d_Prompt =  &d_corr_outs_gpu[1]; | ||||
|     d_Late = &d_corr_outs_gpu[2]; | ||||
|  | ||||
|     //--- Perform initializations ------------------------------ | ||||
|     multicorrelator_gpu = new cuda_multicorrelator(); | ||||
|     //local code resampler on GPU | ||||
|     multicorrelator_gpu->init_cuda_integrated_resampler(2 * d_vector_length, GPS_L1_CA_CODE_LENGTH_CHIPS, 3); | ||||
|     multicorrelator_gpu->set_input_output_vectors(d_corr_outs_gpu, in_gpu); | ||||
|     multicorrelator_gpu->init_cuda_integrated_resampler(2 * d_vector_length, GPS_L1_CA_CODE_LENGTH_CHIPS, d_n_correlator_taps); | ||||
|     multicorrelator_gpu->set_input_output_vectors(d_correlator_outs, in_gpu); | ||||
|  | ||||
|     // define initial code frequency basis of NCO | ||||
|     d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ; | ||||
|     // define residual code phase (in chips) | ||||
|     d_rem_code_phase_samples = 0.0; | ||||
|     // define residual carrier phase | ||||
|     d_rem_carr_phase_rad = 0.0; | ||||
|     d_rem_carrier_phase_rad = 0.0; | ||||
|  | ||||
|     // sample synchronization | ||||
|     d_sample_counter = 0; | ||||
| @@ -156,8 +157,6 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc( | ||||
|     d_pull_in = false; | ||||
|     d_last_seg = 0; | ||||
|  | ||||
|     d_current_prn_length_samples = static_cast<int>(d_vector_length); | ||||
|  | ||||
|     // CN0 estimation and lock detector buffers | ||||
|     d_cn0_estimation_counter = 0; | ||||
|     d_Prompt_buffer = new gr_complex[CN0_ESTIMATION_SAMPLES]; | ||||
| @@ -169,8 +168,7 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc( | ||||
|     systemName["G"] = std::string("GPS"); | ||||
|     systemName["S"] = std::string("SBAS"); | ||||
|  | ||||
|  | ||||
|     set_relative_rate(1.0/((double)d_vector_length*2)); | ||||
|     set_relative_rate(1.0 / (static_cast<double>(d_vector_length) * 2.0)); | ||||
|  | ||||
|     d_channel_internal_queue = 0; | ||||
|     d_acquisition_gnss_synchro = 0; | ||||
| @@ -178,9 +176,13 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc( | ||||
|     d_acq_code_phase_samples = 0.0; | ||||
|     d_acq_carrier_doppler_hz = 0.0; | ||||
|     d_carrier_doppler_hz = 0.0; | ||||
|     d_acc_carrier_phase_rad = 0.0; | ||||
|     d_acc_carrier_phase_cycles = 0.0; | ||||
|     d_code_phase_samples = 0.0; | ||||
|     d_acc_code_phase_secs = 0.0; | ||||
|  | ||||
|     d_pll_to_dll_assist_secs_Ti = 0.0; | ||||
|     d_rem_code_phase_chips = 0.0; | ||||
|     d_code_phase_step_chips = 0.0; | ||||
|     d_carrier_phase_step_rad = 0.0; | ||||
|     //set_min_output_buffer((long int)300); | ||||
| } | ||||
|  | ||||
| @@ -192,7 +194,7 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking() | ||||
|      */ | ||||
|     d_acq_code_phase_samples = d_acquisition_gnss_synchro->Acq_delay_samples; | ||||
|     d_acq_carrier_doppler_hz = d_acquisition_gnss_synchro->Acq_doppler_hz; | ||||
|     d_acq_sample_stamp =  d_acquisition_gnss_synchro->Acq_samplestamp_samples; | ||||
|     d_acq_sample_stamp = d_acquisition_gnss_synchro->Acq_samplestamp_samples; | ||||
|  | ||||
|     long int acq_trk_diff_samples; | ||||
|     double acq_trk_diff_seconds; | ||||
| @@ -207,15 +209,16 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking() | ||||
|     double T_prn_mod_seconds; | ||||
|     double T_prn_mod_samples; | ||||
|     d_code_freq_chips = radial_velocity * GPS_L1_CA_CODE_RATE_HZ; | ||||
|     T_chip_mod_seconds = 1.0/d_code_freq_chips; | ||||
|     d_code_phase_step_chips = static_cast<double>(d_code_freq_chips) / static_cast<double>(d_fs_in); | ||||
|     T_chip_mod_seconds = 1/d_code_freq_chips; | ||||
|     T_prn_mod_seconds = T_chip_mod_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS; | ||||
|     T_prn_mod_samples = T_prn_mod_seconds * static_cast<double>(d_fs_in); | ||||
|  | ||||
|     d_current_prn_length_samples = round(T_prn_mod_samples); | ||||
|     d_correlation_length_samples = round(T_prn_mod_samples); | ||||
|  | ||||
|     double T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ; | ||||
|     double T_prn_true_samples = T_prn_true_seconds * static_cast<double>(d_fs_in); | ||||
|     double T_prn_diff_seconds=  T_prn_true_seconds - T_prn_mod_seconds; | ||||
|     double T_prn_diff_seconds = T_prn_true_seconds - T_prn_mod_seconds; | ||||
|     double N_prn_diff = acq_trk_diff_seconds / T_prn_true_seconds; | ||||
|     double corrected_acq_phase_samples, delay_correction_samples; | ||||
|     corrected_acq_phase_samples = fmod((d_acq_code_phase_samples + T_prn_diff_seconds * N_prn_diff * static_cast<double>(d_fs_in)), T_prn_true_samples); | ||||
| @@ -229,25 +232,28 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking() | ||||
|  | ||||
|     d_carrier_doppler_hz = d_acq_carrier_doppler_hz; | ||||
|  | ||||
|     d_carrier_phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in); | ||||
|  | ||||
|     // DLL/PLL filter initialization | ||||
|     d_carrier_loop_filter.initialize(); // initialize the carrier filter | ||||
|     d_carrier_loop_filter.initialize(d_acq_carrier_doppler_hz); //The carrier loop filter implements the Doppler accumulator | ||||
|     d_code_loop_filter.initialize();    // initialize the code filter | ||||
|  | ||||
|     // generate local reference ALWAYS starting at chip 1 (1 sample per chip) | ||||
|     gps_l1_ca_code_gen_complex(d_ca_code, d_acquisition_gnss_synchro->PRN, 0); | ||||
|  | ||||
|     d_local_code_shift_chips[0] = - d_early_late_spc_chips; | ||||
|     d_local_code_shift_chips[1] = 0.0; | ||||
|     d_local_code_shift_chips[2] = d_early_late_spc_chips; | ||||
|     multicorrelator_gpu->set_local_code_and_taps(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS), d_ca_code, d_local_code_shift_chips, d_n_correlator_taps); | ||||
|  | ||||
|     multicorrelator_gpu->set_local_code_and_taps(GPS_L1_CA_CODE_LENGTH_CHIPS, d_ca_code, d_local_code_shift_chips, 3); | ||||
|     for (int n = 0; n < d_n_correlator_taps; n++) | ||||
|         { | ||||
|             d_correlator_outs[n] = gr_complex(0,0); | ||||
|         } | ||||
|  | ||||
|     d_carrier_lock_fail_counter = 0; | ||||
|     d_rem_code_phase_samples = 0; | ||||
|     d_rem_carr_phase_rad = 0; | ||||
|     d_acc_carrier_phase_rad = 0; | ||||
|     d_acc_code_phase_secs = 0; | ||||
|  | ||||
|     d_rem_code_phase_samples = 0.0; | ||||
|     d_rem_carrier_phase_rad = 0.0; | ||||
|     d_rem_code_phase_chips = 0.0; | ||||
|     d_acc_carrier_phase_cycles = 0.0; | ||||
|     d_pll_to_dll_assist_secs_Ti = 0.0; | ||||
|     d_code_phase_samples = d_acq_code_phase_samples; | ||||
|  | ||||
|     std::string sys_ = &d_acquisition_gnss_synchro->System; | ||||
| @@ -270,14 +276,15 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking() | ||||
|  | ||||
| Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::~Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc() | ||||
| { | ||||
|  | ||||
|     d_dump_file.close(); | ||||
|     cudaFreeHost(in_gpu); | ||||
|     cudaFreeHost(d_corr_outs_gpu); | ||||
|     cudaFreeHost(d_correlator_outs); | ||||
|     cudaFreeHost(d_local_code_shift_chips); | ||||
|     cudaFreeHost(d_ca_code); | ||||
|     multicorrelator_gpu->free_cuda(); | ||||
|     delete(multicorrelator_gpu); | ||||
|     delete[] d_Prompt_buffer; | ||||
|     delete(multicorrelator_gpu); | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -285,29 +292,34 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::~Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc() | ||||
| int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vector_int &ninput_items, | ||||
|         gr_vector_const_void_star &input_items, gr_vector_void_star &output_items) | ||||
| { | ||||
|     // process vars | ||||
| 	double carr_error_hz=0.0; | ||||
| 	double carr_error_filt_hz=0.0; | ||||
| 	double code_error_chips=0.0; | ||||
| 	double code_error_filt_chips=0.0; | ||||
|  | ||||
|     // Block input data and block output stream pointers | ||||
|     const gr_complex* in = (gr_complex*) input_items[0]; | ||||
|     const gr_complex* in = (gr_complex*) input_items[0]; //PRN start block alignment | ||||
|     Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0]; | ||||
|  | ||||
|     // GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder | ||||
|     Gnss_Synchro current_synchro_data = Gnss_Synchro(); | ||||
|  | ||||
|     // process vars | ||||
|     double code_error_chips_Ti = 0.0; | ||||
|     double code_error_filt_chips = 0.0; | ||||
|     double code_error_filt_secs_Ti = 0.0; | ||||
|     double CURRENT_INTEGRATION_TIME_S; | ||||
|     double CORRECTED_INTEGRATION_TIME_S; | ||||
|     double dll_code_error_secs_Ti = 0.0; | ||||
|     double carr_phase_error_secs_Ti = 0.0; | ||||
|     double old_d_rem_code_phase_samples; | ||||
|     if (d_enable_tracking == true) | ||||
|         { | ||||
|             // Receiver signal alignment | ||||
|             if (d_pull_in == true) | ||||
|                 { | ||||
|                     int samples_offset; | ||||
|                     double acq_trk_shif_correction_samples; | ||||
|                     int acq_to_trk_delay_samples; | ||||
|                     acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp; | ||||
|                     samples_offset = round(d_acq_code_phase_samples)+d_current_prn_length_samples - acq_to_trk_delay_samples%d_current_prn_length_samples; | ||||
|                     d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples | ||||
|                     acq_trk_shif_correction_samples = d_correlation_length_samples - fmod(static_cast<double>(acq_to_trk_delay_samples), static_cast<double>(d_correlation_length_samples)); | ||||
|                     samples_offset = round(d_acq_code_phase_samples + acq_trk_shif_correction_samples); | ||||
|                     d_sample_counter += samples_offset; //count for the processed samples | ||||
|                     d_pull_in = false; | ||||
|                     // Fill the acquisition data | ||||
|                     current_synchro_data = *d_acquisition_gnss_synchro; | ||||
| @@ -319,46 +331,44 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto | ||||
|             // Fill the acquisition data | ||||
|             current_synchro_data = *d_acquisition_gnss_synchro; | ||||
|  | ||||
|             // UPDATE NCO COMMAND | ||||
|             double phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in); | ||||
|             // ################# CARRIER WIPEOFF AND CORRELATORS ############################## | ||||
|             // perform carrier wipe-off and compute Early, Prompt and Late correlation | ||||
|  | ||||
|         	//code resampler on GPU (new) | ||||
|             double code_phase_step_chips = d_code_freq_chips / static_cast<double>(d_fs_in); | ||||
|             double rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in); | ||||
|  | ||||
|             memcpy(in_gpu, in, sizeof(gr_complex) * d_current_prn_length_samples); | ||||
|             memcpy(in_gpu, in, sizeof(gr_complex) * d_correlation_length_samples); | ||||
|             cudaProfilerStart(); | ||||
|             multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda( static_cast<float>(d_rem_carr_phase_rad), | ||||
|             		static_cast<float>(phase_step_rad), | ||||
|             		static_cast<float>(code_phase_step_chips), | ||||
|             		static_cast<float>(rem_code_phase_chips), | ||||
|             		d_current_prn_length_samples, 3); | ||||
|             multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda( static_cast<float>(d_rem_carrier_phase_rad), | ||||
|             		static_cast<float>(d_carrier_phase_step_rad), | ||||
|             		static_cast<float>(d_code_phase_step_chips), | ||||
|             		static_cast<float>(d_rem_code_phase_chips), | ||||
|             		d_correlation_length_samples, d_n_correlator_taps); | ||||
|             cudaProfilerStop(); | ||||
|             //std::cout<<"c_out[0]="<<d_correlator_outs[0]<<"c_out[1]="<<d_correlator_outs[1]<<"c_out[2]="<<d_correlator_outs[2]<<std::endl; | ||||
|  | ||||
|             // UPDATE INTEGRATION TIME | ||||
|             CURRENT_INTEGRATION_TIME_S = static_cast<double>(d_correlation_length_samples) / static_cast<double>(d_fs_in); | ||||
|  | ||||
|             // ################## PLL ########################################################## | ||||
|             // PLL discriminator | ||||
|             carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / GPS_TWO_PI; | ||||
|             // Update PLL discriminator [rads/Ti -> Secs/Ti] | ||||
|             carr_phase_error_secs_Ti = pll_cloop_two_quadrant_atan(d_correlator_outs[1]) / GPS_TWO_PI; //prompt output | ||||
|             // Carrier discriminator filter | ||||
|             carr_error_filt_hz = d_carrier_loop_filter.get_carrier_nco(carr_error_hz); | ||||
|             // New carrier Doppler frequency estimation | ||||
|             d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_error_filt_hz; | ||||
|             // New code Doppler frequency estimation | ||||
|             // NOTICE: The carrier loop filter includes the Carrier Doppler accumulator, as described in Kaplan | ||||
|             //d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_phase_error_filt_secs_ti/INTEGRATION_TIME; | ||||
|             // Input [s/Ti] -> output [Hz] | ||||
|             d_carrier_doppler_hz = d_carrier_loop_filter.get_carrier_error(0.0, carr_phase_error_secs_Ti, CURRENT_INTEGRATION_TIME_S); | ||||
|             // PLL to DLL assistance [Secs/Ti] | ||||
|             d_pll_to_dll_assist_secs_Ti = (d_carrier_doppler_hz * CURRENT_INTEGRATION_TIME_S) / GPS_L1_FREQ_HZ; | ||||
|             // code Doppler frequency update | ||||
|             d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ); | ||||
|             //carrier phase accumulator for (K) doppler estimation | ||||
|             d_acc_carrier_phase_rad -= GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD; | ||||
|             //remanent carrier phase to prevent overflow in the code NCO | ||||
|             d_rem_carr_phase_rad = d_rem_carr_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD; | ||||
|             d_rem_carr_phase_rad = fmod(d_rem_carr_phase_rad, GPS_TWO_PI); | ||||
|  | ||||
|             // ################## DLL ########################################################## | ||||
|             // DLL discriminator | ||||
|             code_error_chips = dll_nc_e_minus_l_normalized(*d_Early, *d_Late); //[chips/Ti] | ||||
|             code_error_chips_Ti = dll_nc_e_minus_l_normalized(d_correlator_outs[0], d_correlator_outs[2]); //[chips/Ti] //early and late | ||||
|             // Code discriminator filter | ||||
|             code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips); //[chips/second] | ||||
|             //Code phase accumulator | ||||
|             double code_error_filt_secs; | ||||
|             code_error_filt_secs = (GPS_L1_CA_CODE_PERIOD * code_error_filt_chips) / GPS_L1_CA_CODE_RATE_HZ; //[seconds] | ||||
|             d_acc_code_phase_secs = d_acc_code_phase_secs + code_error_filt_secs; | ||||
|             code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips_Ti); //input [chips/Ti] -> output [chips/second] | ||||
|             code_error_filt_secs_Ti = code_error_filt_chips*CURRENT_INTEGRATION_TIME_S/d_code_freq_chips; // [s/Ti] | ||||
|             // DLL code error estimation [s/Ti] | ||||
|             // TODO: PLL carrier aid to DLL is disabled. Re-enable it and measure performance | ||||
|             dll_code_error_secs_Ti = - code_error_filt_secs_Ti + d_pll_to_dll_assist_secs_Ti; | ||||
|  | ||||
|             // ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT ####################### | ||||
|             // keep alignment parameters for the next input buffer | ||||
| @@ -367,17 +377,38 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto | ||||
|             double T_prn_samples; | ||||
|             double K_blk_samples; | ||||
|             // Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation | ||||
|             T_chip_seconds = 1.0 / d_code_freq_chips; | ||||
|             T_chip_seconds = 1 / d_code_freq_chips; | ||||
|             T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS; | ||||
|             T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in); | ||||
|             K_blk_samples = T_prn_samples + d_rem_code_phase_samples + code_error_filt_secs * static_cast<double>(d_fs_in); | ||||
|             //d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample | ||||
|             K_blk_samples = T_prn_samples + d_rem_code_phase_samples - dll_code_error_secs_Ti * static_cast<double>(d_fs_in); | ||||
|  | ||||
|             // ####### CN0 ESTIMATION AND LOCK DETECTORS ###### | ||||
|             d_correlation_length_samples = round(K_blk_samples); //round to a discrete samples | ||||
|             old_d_rem_code_phase_samples=d_rem_code_phase_samples; | ||||
|             d_rem_code_phase_samples = K_blk_samples - static_cast<double>(d_correlation_length_samples); //rounding error < 1 sample | ||||
|  | ||||
|             // UPDATE REMNANT CARRIER PHASE | ||||
|             CORRECTED_INTEGRATION_TIME_S=(static_cast<double>(d_correlation_length_samples)/static_cast<double>(d_fs_in)); | ||||
|             //remnant carrier phase [rad] | ||||
|             d_rem_carrier_phase_rad = fmod(d_rem_carrier_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S, GPS_TWO_PI); | ||||
|             // UPDATE CARRIER PHASE ACCUULATOR | ||||
|             //carrier phase accumulator prior to update the PLL estimators (accumulated carrier in this loop depends on the old estimations!) | ||||
|             d_acc_carrier_phase_cycles -= d_carrier_doppler_hz * CORRECTED_INTEGRATION_TIME_S; | ||||
|  | ||||
|             //################### PLL COMMANDS ################################################# | ||||
|             //carrier phase step (NCO phase increment per sample) [rads/sample] | ||||
|             d_carrier_phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast<double>(d_fs_in); | ||||
|  | ||||
|             //################### DLL COMMANDS ################################################# | ||||
|             //code phase step (Code resampler phase increment per sample) [chips/sample] | ||||
|             d_code_phase_step_chips = d_code_freq_chips / static_cast<double>(d_fs_in); | ||||
|             //remnant code phase [chips] | ||||
|             d_rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / static_cast<double>(d_fs_in)); | ||||
|  | ||||
|             // ####### CN0 ESTIMATION AND LOCK DETECTORS ####################################### | ||||
|             if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES) | ||||
|                 { | ||||
|                     // fill buffer with prompt correlator output values | ||||
|                     d_Prompt_buffer[d_cn0_estimation_counter] = *d_Prompt; | ||||
|                     d_Prompt_buffer[d_cn0_estimation_counter] = d_correlator_outs[1]; //prompt | ||||
|                     d_cn0_estimation_counter++; | ||||
|                 } | ||||
|             else | ||||
| @@ -409,24 +440,15 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto | ||||
|                             d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine | ||||
|                         } | ||||
|                 } | ||||
|  | ||||
|             // ########### Output the tracking data to navigation and PVT ########## | ||||
|             current_synchro_data.Prompt_I = static_cast<double>((*d_Prompt).real()); | ||||
|             current_synchro_data.Prompt_Q = static_cast<double>((*d_Prompt).imag()); | ||||
|  | ||||
|             // Tracking_timestamp_secs is aligned with the NEXT PRN start sample (Hybridization problem!) | ||||
|             //compute remnant code phase samples BEFORE the Tracking timestamp | ||||
|             //d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample | ||||
|             //current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter + (double)d_current_prn_length_samples + (double)d_rem_code_phase_samples)/static_cast<double>(d_fs_in); | ||||
|  | ||||
|             // Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!, but some glitches??) | ||||
|             current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + d_rem_code_phase_samples) / static_cast<double>(d_fs_in); | ||||
|             //compute remnant code phase samples AFTER the Tracking timestamp | ||||
|             d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample | ||||
|  | ||||
|             //current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter)/static_cast<double>(d_fs_in); | ||||
|             current_synchro_data.Prompt_I = static_cast<double>((d_correlator_outs[1]).real()); | ||||
|             current_synchro_data.Prompt_Q = static_cast<double>((d_correlator_outs[1]).imag()); | ||||
|             // Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!) | ||||
|             current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + old_d_rem_code_phase_samples) / static_cast<double>(d_fs_in); | ||||
|             // This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0 | ||||
|             current_synchro_data.Code_phase_secs = 0; | ||||
|             current_synchro_data.Carrier_phase_rads = d_acc_carrier_phase_rad; | ||||
|             current_synchro_data.Carrier_phase_rads = GPS_TWO_PI * d_acc_carrier_phase_cycles; | ||||
|             current_synchro_data.Carrier_Doppler_hz = d_carrier_doppler_hz; | ||||
|             current_synchro_data.CN0_dB_hz = d_CN0_SNV_dB_Hz; | ||||
|             current_synchro_data.Flag_valid_pseudorange = false; | ||||
| @@ -444,7 +466,7 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto | ||||
|                             d_last_seg = floor(d_sample_counter / d_fs_in); | ||||
|                             std::cout << "Current input signal time = " << d_last_seg << " [s]" << std::endl; | ||||
|                             DLOG(INFO) << "GPS L1 C/A Tracking CH " << d_channel <<  ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) | ||||
|                                       << ", CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]" << std::endl; | ||||
|                                               << ", CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]" << std::endl; | ||||
|                             //if (d_last_seg==5) d_carrier_lock_fail_counter=500; //DEBUG: force unlock! | ||||
|                         } | ||||
|                 } | ||||
| @@ -454,7 +476,7 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto | ||||
|                         { | ||||
|                             d_last_seg = floor(d_sample_counter / d_fs_in); | ||||
|                             DLOG(INFO) << "Tracking CH " << d_channel <<  ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) | ||||
|                                        << ", CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]"; | ||||
|                                                << ", CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]"; | ||||
|                         } | ||||
|                 } | ||||
|         } | ||||
| @@ -477,9 +499,10 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto | ||||
|                             std::cout << tmp_str_stream.rdbuf() << std::flush; | ||||
|                         } | ||||
|                 } | ||||
|             *d_Early = gr_complex(0,0); | ||||
|             *d_Prompt = gr_complex(0,0); | ||||
|             *d_Late = gr_complex(0,0); | ||||
|             for (int n = 0; n < d_n_correlator_taps; n++) | ||||
|                 { | ||||
|                     d_correlator_outs[n] = gr_complex(0,0); | ||||
|                 } | ||||
|  | ||||
|             current_synchro_data.System = {'G'}; | ||||
|             current_synchro_data.Flag_valid_pseudorange = false; | ||||
| @@ -492,74 +515,65 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto | ||||
|             float prompt_I; | ||||
|             float prompt_Q; | ||||
|             float tmp_E, tmp_P, tmp_L; | ||||
|             float tmp_float; | ||||
|             double tmp_double; | ||||
|             prompt_I = (*d_Prompt).real(); | ||||
|             prompt_Q = (*d_Prompt).imag(); | ||||
|             tmp_E = std::abs<float>(*d_Early); | ||||
|             tmp_P = std::abs<float>(*d_Prompt); | ||||
|             tmp_L = std::abs<float>(*d_Late); | ||||
|             prompt_I = d_correlator_outs[1].real(); | ||||
|             prompt_Q = d_correlator_outs[1].imag(); | ||||
|             tmp_E = std::abs<float>(d_correlator_outs[0]); | ||||
|             tmp_P = std::abs<float>(d_correlator_outs[1]); | ||||
|             tmp_L = std::abs<float>(d_correlator_outs[2]); | ||||
|             try | ||||
|             { | ||||
|                     // EPR | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&tmp_E), sizeof(float)); | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&tmp_P), sizeof(float)); | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&tmp_L), sizeof(float)); | ||||
|                     // PROMPT I and Q (to analyze navigation symbols) | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&prompt_I), sizeof(float)); | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&prompt_Q), sizeof(float)); | ||||
|                     // PRN start sample stamp | ||||
|                     //tmp_float=(float)d_sample_counter; | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&d_sample_counter), sizeof(unsigned long int)); | ||||
|                     // accumulated carrier phase | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&d_acc_carrier_phase_cycles), sizeof(double)); | ||||
|  | ||||
|                 // EPR | ||||
|                 d_dump_file.write((char*)&tmp_E, sizeof(float)); | ||||
|                 d_dump_file.write((char*)&tmp_P, sizeof(float)); | ||||
|                 d_dump_file.write((char*)&tmp_L, sizeof(float)); | ||||
|                 // PROMPT I and Q (to analyze navigation symbols) | ||||
|                 d_dump_file.write((char*)&prompt_I, sizeof(float)); | ||||
|                 d_dump_file.write((char*)&prompt_Q, sizeof(float)); | ||||
|                 // PRN start sample stamp | ||||
|                 //tmp_float=(float)d_sample_counter; | ||||
|                 d_dump_file.write((char*)&d_sample_counter, sizeof(unsigned long int)); | ||||
|                 // accumulated carrier phase | ||||
|                 tmp_float = d_acc_carrier_phase_rad; | ||||
|                 d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                     // carrier and code frequency | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(double)); | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&d_code_freq_chips), sizeof(double)); | ||||
|  | ||||
|                 // carrier and code frequency | ||||
|                 tmp_float = d_carrier_doppler_hz; | ||||
|                 d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                 tmp_float = d_code_freq_chips; | ||||
|                 d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                     //PLL commands | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&carr_phase_error_secs_Ti), sizeof(double)); | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(double)); | ||||
|  | ||||
|                 //PLL commands | ||||
|                 tmp_float = carr_error_hz; | ||||
|                 d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                 tmp_float = carr_error_filt_hz; | ||||
|                 d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                     //DLL commands | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&code_error_chips_Ti), sizeof(double)); | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&code_error_filt_chips), sizeof(double)); | ||||
|  | ||||
|                 //DLL commands | ||||
|                 tmp_float = code_error_chips; | ||||
|                 d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                 tmp_float = code_error_filt_chips; | ||||
|                 d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                     // CN0 and carrier lock test | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&d_CN0_SNV_dB_Hz), sizeof(double)); | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&d_carrier_lock_test), sizeof(double)); | ||||
|  | ||||
|                 // CN0 and carrier lock test | ||||
|                 tmp_float = d_CN0_SNV_dB_Hz; | ||||
|                 d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                 tmp_float = d_carrier_lock_test; | ||||
|                 d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|  | ||||
|                 // AUX vars (for debug purposes) | ||||
|                 tmp_float = d_rem_code_phase_samples; | ||||
|                 d_dump_file.write((char*)&tmp_float, sizeof(float)); | ||||
|                 tmp_double = (double)(d_sample_counter + d_current_prn_length_samples); | ||||
|                 d_dump_file.write((char*)&tmp_double, sizeof(double)); | ||||
|                     // AUX vars (for debug purposes) | ||||
|                     tmp_double = d_rem_code_phase_samples; | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&tmp_double), sizeof(double)); | ||||
|                     tmp_double = static_cast<double>(d_sample_counter + d_correlation_length_samples); | ||||
|                     d_dump_file.write(reinterpret_cast<char*>(&tmp_double), sizeof(double)); | ||||
|             } | ||||
|             catch (std::ifstream::failure e) | ||||
|             catch (const std::ifstream::failure* e) | ||||
|             { | ||||
|                     LOG(WARNING) << "Exception writing trk dump file " << e.what(); | ||||
|                     LOG(WARNING) << "Exception writing trk dump file " << e->what(); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|     consume_each(d_current_prn_length_samples); // this is necessary in gr::block derivates | ||||
|     d_sample_counter += d_current_prn_length_samples; //count for the processed samples | ||||
|     //LOG(INFO)<<"GPS tracking output end on CH="<<this->d_channel << " SAMPLE STAMP="<<d_sample_counter<<std::endl; | ||||
|     consume_each(d_correlation_length_samples); // this is necessary in gr::block derivates | ||||
|     d_sample_counter += d_correlation_length_samples; //count for the processed samples | ||||
|  | ||||
|     if((noutput_items == 0) || (ninput_items[0] == 0)) | ||||
|         { | ||||
|             LOG(WARNING) << "noutput_items = 0"; | ||||
|         } | ||||
|     return 1; //output tracking result ALWAYS even in the case of d_enable_tracking==false | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_channel(unsigned int channel) | ||||
| { | ||||
|     d_channel = channel; | ||||
| @@ -577,22 +591,19 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_channel(unsigned int channel) | ||||
|                             d_dump_file.open(d_dump_filename.c_str(), std::ios::out | std::ios::binary); | ||||
|                             LOG(INFO) << "Tracking dump enabled on channel " << d_channel << " Log file: " << d_dump_filename.c_str() << std::endl; | ||||
|                     } | ||||
|                     catch (std::ifstream::failure e) | ||||
|                     catch (const std::ifstream::failure* e) | ||||
|                     { | ||||
|                             LOG(WARNING) << "channel " << d_channel << " Exception opening trk dump file " << e.what() << std::endl; | ||||
|                             LOG(WARNING) << "channel " << d_channel << " Exception opening trk dump file " << e->what() << std::endl; | ||||
|                     } | ||||
|                 } | ||||
|         } | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_channel_queue(concurrent_queue<int> *channel_internal_queue) | ||||
| { | ||||
|     d_channel_internal_queue = channel_internal_queue; | ||||
| } | ||||
|  | ||||
|  | ||||
| void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro) | ||||
| { | ||||
|     d_acquisition_gnss_synchro = p_gnss_synchro; | ||||
|   | ||||
| @@ -48,7 +48,7 @@ | ||||
| #include "gps_sdr_signal_processing.h" | ||||
| #include "gnss_synchro.h" | ||||
| #include "tracking_2nd_DLL_filter.h" | ||||
| #include "tracking_2nd_PLL_filter.h" | ||||
| #include "tracking_FLL_PLL_filter.h" | ||||
| #include "cuda_multicorrelator.h" | ||||
|  | ||||
| class Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc; | ||||
| @@ -124,12 +124,13 @@ private: | ||||
|     long d_fs_in; | ||||
|  | ||||
|     double d_early_late_spc_chips; | ||||
|     int d_n_correlator_taps; | ||||
|  | ||||
|  | ||||
|     //GPU HOST PINNED MEMORY IN/OUT VECTORS | ||||
|     gr_complex* in_gpu; | ||||
|     float* d_local_code_shift_chips; | ||||
|     gr_complex* d_corr_outs_gpu; | ||||
|     gr_complex* d_correlator_outs; | ||||
|     cuda_multicorrelator *multicorrelator_gpu; | ||||
|     gr_complex* d_ca_code; | ||||
|  | ||||
| @@ -140,11 +141,12 @@ private: | ||||
|  | ||||
|     // remaining code phase and carrier phase between tracking loops | ||||
|     double d_rem_code_phase_samples; | ||||
|     double d_rem_carr_phase_rad; | ||||
|     double d_rem_code_phase_chips; | ||||
|     double d_rem_carrier_phase_rad; | ||||
|  | ||||
|     // PLL and DLL filter library | ||||
|     Tracking_2nd_DLL_filter d_code_loop_filter; | ||||
|     Tracking_2nd_PLL_filter d_carrier_loop_filter; | ||||
|     Tracking_FLL_PLL_filter d_carrier_loop_filter; | ||||
|  | ||||
|     // acquisition | ||||
|     double d_acq_code_phase_samples; | ||||
| @@ -152,13 +154,15 @@ private: | ||||
|  | ||||
|     // tracking vars | ||||
|     double d_code_freq_chips; | ||||
|     double d_code_phase_step_chips; | ||||
|     double d_carrier_doppler_hz; | ||||
|     double d_acc_carrier_phase_rad; | ||||
|     double d_carrier_phase_step_rad; | ||||
|     double d_acc_carrier_phase_cycles; | ||||
|     double d_code_phase_samples; | ||||
|     double d_acc_code_phase_secs; | ||||
|     double d_pll_to_dll_assist_secs_Ti; | ||||
|  | ||||
|     //PRN period in samples | ||||
|     int d_current_prn_length_samples; | ||||
|     //Integration period in samples | ||||
|     int d_correlation_length_samples; | ||||
|  | ||||
|     //processing samples counters | ||||
|     unsigned long int d_sample_counter; | ||||
|   | ||||
| @@ -47,7 +47,7 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips( | ||||
|     GPU_Complex *d_sig_wiped, | ||||
|     GPU_Complex *d_local_code_in, | ||||
|     float *d_shifts_chips, | ||||
|     float code_length_chips, | ||||
|     int code_length_chips, | ||||
|     float code_phase_step_chips, | ||||
|     float rem_code_phase_chips, | ||||
|     int vectorN, | ||||
| @@ -90,7 +90,7 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips( | ||||
|         for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x) | ||||
|         { | ||||
|         	GPU_Complex sum = GPU_Complex(0,0); | ||||
|             float local_code_chip_index; | ||||
|             float local_code_chip_index=0.0;; | ||||
|             //float code_phase; | ||||
|             for (int pos = iAccum; pos < elementN; pos += ACCUM_N) | ||||
|             { | ||||
| @@ -105,7 +105,7 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips( | ||||
|             	local_code_chip_index= fmodf(code_phase_step_chips*__int2float_rd(pos)+ d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips); | ||||
|  | ||||
|             	//Take into account that in multitap correlators, the shifts can be negative! | ||||
|             	if (local_code_chip_index<0.0) local_code_chip_index+=code_length_chips; | ||||
|             	if (local_code_chip_index<0.0) local_code_chip_index+=(code_length_chips-1); | ||||
|             	//printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index); | ||||
|             	// 2.correlate | ||||
|             	sum.multiply_acc(d_sig_wiped[pos],d_local_code_in[__float2int_rd(local_code_chip_index)]); | ||||
| @@ -143,52 +143,52 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler( | ||||
| { | ||||
| 	// use command-line specified CUDA device, otherwise use device with highest Gflops/s | ||||
| //	findCudaDevice(argc, (const char **)argv); | ||||
| //      cudaDeviceProp  prop; | ||||
| //    int num_devices, device; | ||||
| //    cudaGetDeviceCount(&num_devices); | ||||
| //    if (num_devices > 1) { | ||||
| //          int max_multiprocessors = 0, max_device = 0; | ||||
| //          for (device = 0; device < num_devices; device++) { | ||||
| //                  cudaDeviceProp properties; | ||||
| //                  cudaGetDeviceProperties(&properties, device); | ||||
| //                  if (max_multiprocessors < properties.multiProcessorCount) { | ||||
| //                          max_multiprocessors = properties.multiProcessorCount; | ||||
| //                          max_device = device; | ||||
| //                  } | ||||
| //                  printf("Found GPU device # %i\n",device); | ||||
| //          } | ||||
| //          //cudaSetDevice(max_device); | ||||
| // | ||||
| //          //set random device! | ||||
| //          cudaSetDevice(rand() % num_devices); //generates a random number between 0 and num_devices to split the threads between GPUs | ||||
| // | ||||
| //          cudaGetDeviceProperties( &prop, max_device ); | ||||
| //          //debug code | ||||
| //          if (prop.canMapHostMemory != 1) { | ||||
| //              printf( "Device can not map memory.\n" ); | ||||
| //          } | ||||
| //          printf("L2 Cache size= %u \n",prop.l2CacheSize); | ||||
| //          printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock); | ||||
| //          printf("maxGridSize= %i \n",prop.maxGridSize[0]); | ||||
| //          printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock); | ||||
| //          printf("deviceOverlap= %i \n",prop.deviceOverlap); | ||||
| //  	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount); | ||||
| //    }else{ | ||||
| //    	    int whichDevice; | ||||
| //    	    cudaGetDevice( &whichDevice ); | ||||
| //    	    cudaGetDeviceProperties( &prop, whichDevice ); | ||||
| //    	    //debug code | ||||
| //    	    if (prop.canMapHostMemory != 1) { | ||||
| //    	        printf( "Device can not map memory.\n" ); | ||||
| //    	    } | ||||
| // | ||||
| //    	    printf("L2 Cache size= %u \n",prop.l2CacheSize); | ||||
| //    	    printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock); | ||||
| //    	    printf("maxGridSize= %i \n",prop.maxGridSize[0]); | ||||
| //    	    printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock); | ||||
| //    	    printf("deviceOverlap= %i \n",prop.deviceOverlap); | ||||
| //    	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount); | ||||
| //    } | ||||
|       cudaDeviceProp  prop; | ||||
|     int num_devices, device; | ||||
|     cudaGetDeviceCount(&num_devices); | ||||
|     if (num_devices > 1) { | ||||
|           int max_multiprocessors = 0, max_device = 0; | ||||
|           for (device = 0; device < num_devices; device++) { | ||||
|                   cudaDeviceProp properties; | ||||
|                   cudaGetDeviceProperties(&properties, device); | ||||
|                   if (max_multiprocessors < properties.multiProcessorCount) { | ||||
|                           max_multiprocessors = properties.multiProcessorCount; | ||||
|                           max_device = device; | ||||
|                   } | ||||
|                   printf("Found GPU device # %i\n",device); | ||||
|           } | ||||
|           //cudaSetDevice(max_device); | ||||
|  | ||||
|           //set random device! | ||||
|           cudaSetDevice(rand() % num_devices); //generates a random number between 0 and num_devices to split the threads between GPUs | ||||
|  | ||||
|           cudaGetDeviceProperties( &prop, max_device ); | ||||
|           //debug code | ||||
|           if (prop.canMapHostMemory != 1) { | ||||
|               printf( "Device can not map memory.\n" ); | ||||
|           } | ||||
|           printf("L2 Cache size= %u \n",prop.l2CacheSize); | ||||
|           printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock); | ||||
|           printf("maxGridSize= %i \n",prop.maxGridSize[0]); | ||||
|           printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock); | ||||
|           printf("deviceOverlap= %i \n",prop.deviceOverlap); | ||||
|   	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount); | ||||
|     }else{ | ||||
|     	    int whichDevice; | ||||
|     	    cudaGetDevice( &whichDevice ); | ||||
|     	    cudaGetDeviceProperties( &prop, whichDevice ); | ||||
|     	    //debug code | ||||
|     	    if (prop.canMapHostMemory != 1) { | ||||
|     	        printf( "Device can not map memory.\n" ); | ||||
|     	    } | ||||
|  | ||||
|     	    printf("L2 Cache size= %u \n",prop.l2CacheSize); | ||||
|     	    printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock); | ||||
|     	    printf("maxGridSize= %i \n",prop.maxGridSize[0]); | ||||
|     	    printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock); | ||||
|     	    printf("deviceOverlap= %i \n",prop.deviceOverlap); | ||||
|     	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount); | ||||
|     } | ||||
|  | ||||
| 	// (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared)); | ||||
|  | ||||
| @@ -228,7 +228,7 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler( | ||||
|     // Launch the Vector Add CUDA Kernel | ||||
|     // TODO: write a smart load balance using device info! | ||||
| 	threadsPerBlock = 64; | ||||
|     blocksPerGrid =(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock; | ||||
|     blocksPerGrid = 128;//(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock; | ||||
|  | ||||
| 	cudaStreamCreate (&stream1) ; | ||||
| 	//cudaStreamCreate (&stream2) ; | ||||
| @@ -261,7 +261,7 @@ bool cuda_multicorrelator::set_local_code_and_taps( | ||||
| 	//******** CudaMalloc version *********** | ||||
|     //local code CPU -> GPU copy memory | ||||
|     cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1); | ||||
|     d_code_length_chips=(float)code_length_chips; | ||||
|     d_code_length_chips=code_length_chips; | ||||
|  | ||||
|     //Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!) | ||||
|     cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators, | ||||
| @@ -292,6 +292,17 @@ bool cuda_multicorrelator::set_input_output_vectors( | ||||
| 	return true; | ||||
|  | ||||
| } | ||||
|  | ||||
| #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } | ||||
| inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) | ||||
| { | ||||
|    if (code != cudaSuccess) | ||||
|    { | ||||
|       fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); | ||||
|       if (abort) exit(code); | ||||
|    } | ||||
| } | ||||
|  | ||||
| bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda( | ||||
| 		float rem_carrier_phase_in_rad, | ||||
| 		float phase_step_rad, | ||||
| @@ -325,14 +336,14 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda( | ||||
| 			phase_step_rad | ||||
| 			); | ||||
|  | ||||
|     //cudaGetLastError(); | ||||
|     //wait for correlators end... | ||||
|     cudaStreamSynchronize(stream1); | ||||
|     gpuErrchk( cudaPeekAtLastError() ); | ||||
|     gpuErrchk( cudaStreamSynchronize(stream1)); | ||||
|  | ||||
| 	// cudaMemCpy version | ||||
|     // Copy the device result vector in device memory to the host result vector | ||||
|     // in host memory. | ||||
|     //scalar products (correlators outputs) | ||||
|     //cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators, | ||||
|     //cudaMemcpyAsync(d_corr_out_cpu, d_corr_out, sizeof(std::complex<float>)*n_correlators, | ||||
|     //        cudaMemcpyDeviceToHost,stream1); | ||||
|     return true; | ||||
| } | ||||
|   | ||||
| @@ -155,7 +155,7 @@ private: | ||||
|  | ||||
|     int *d_shifts_samples; | ||||
|     float *d_shifts_chips; | ||||
|     float d_code_length_chips; | ||||
|     int d_code_length_chips; | ||||
|  | ||||
|     int threadsPerBlock; | ||||
|     int blocksPerGrid; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Javier Arribas
					Javier Arribas