diff --git a/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf b/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf index 8787ef5aa..52b178176 100644 --- a/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf +++ b/conf/gnss-sdr_GPS_L1_gr_complex_gpu.conf @@ -17,10 +17,10 @@ ControlThread.wait_for_flowgraph=false SignalSource.implementation=File_Signal_Source ;#filename: path to file with the captured GNSS signal samples to be processed -SignalSource.filename=/media/javier/SISTEMA/signals/New York/4msps.dat +SignalSource.filename=/home/javier/ClionProjects/gnss-sim/build/signal_out.bin ;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version. -SignalSource.item_type=gr_complex +SignalSource.item_type=byte ;#sampling_frequency: Original Signal sampling frequency in [Hz] SignalSource.sampling_frequency=4000000 @@ -28,12 +28,6 @@ SignalSource.sampling_frequency=4000000 ;#freq: RF front-end center frequency in [Hz] SignalSource.freq=1575420000 -;#gain: Front-end Gain in [dB] -SignalSource.gain=60 - -;#subdevice: UHD subdevice specification (for USRP1 use A:0 or B:0) -SignalSource.subdevice=B:0 - ;#samples: Number of samples to be processed. Notice that 0 indicates the entire file. SignalSource.samples=0 @@ -58,12 +52,12 @@ SignalSource.enable_throttle_control=false ;#[Pass_Through] disables this block and the [DataTypeAdapter], [InputFilter] and [Resampler] blocks ;#[Signal_Conditioner] enables this block. Then you have to configure [DataTypeAdapter], [InputFilter] and [Resampler] blocks ;SignalConditioner.implementation=Signal_Conditioner -SignalConditioner.implementation=Pass_Through +SignalConditioner.implementation=Signal_Conditioner ;######### DATA_TYPE_ADAPTER CONFIG ############ ;## Changes the type of input data. Please disable it in this version. ;#implementation: [Pass_Through] disables this block -DataTypeAdapter.implementation=Pass_Through +DataTypeAdapter.implementation=Ibyte_To_Complex ;######### INPUT_FILTER CONFIG ############ ;## Filter the input data. Can be combined with frequency translation for IF signals @@ -210,13 +204,13 @@ Acquisition_GPS.sampled_ms=1 ;#implementation: Acquisition algorithm selection for this channel: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition] Acquisition_GPS.implementation=GPS_L1_CA_PCPS_Acquisition ;#threshold: Acquisition threshold -Acquisition_GPS.threshold=0.005 +Acquisition_GPS.threshold=0.06 ;#pfa: Acquisition false alarm probability. This option overrides the threshold option. Only use with implementations: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition] ;Acquisition_GPS.pfa=0.01 ;#doppler_max: Maximum expected Doppler shift [Hz] -Acquisition_GPS.doppler_max=10000 +Acquisition_GPS.doppler_max=6000 ;#doppler_max: Doppler step in the grid search [Hz] -Acquisition_GPS.doppler_step=500 +Acquisition_GPS.doppler_step=100 ;######### TRACKING GLOBAL CONFIG ############ @@ -235,7 +229,7 @@ Tracking_GPS.dump=true Tracking_GPS.dump_filename=../data/epl_tracking_ch_ ;#pll_bw_hz: PLL loop filter bandwidth [Hz] -Tracking_GPS.pll_bw_hz=55.0; +Tracking_GPS.pll_bw_hz=15.0; ;#dll_bw_hz: DLL loop filter bandwidth [Hz] Tracking_GPS.dll_bw_hz=1.5 diff --git a/conf/gnss-sdr_Hybrid_byte_sim.conf b/conf/gnss-sdr_Hybrid_byte_sim.conf index 2f8cb0654..5102e3517 100644 --- a/conf/gnss-sdr_Hybrid_byte_sim.conf +++ b/conf/gnss-sdr_Hybrid_byte_sim.conf @@ -233,7 +233,7 @@ Acquisition_1B.doppler_step=125 ;######### TRACKING GPS CONFIG ############ ;#implementation: Selected tracking algorithm: [GPS_L1_CA_DLL_PLL_Tracking] or [GPS_L1_CA_DLL_FLL_PLL_Tracking] or [GPS_L1_CA_TCP_CONNECTOR_Tracking] or [Galileo_E1_DLL_PLL_VEML_Tracking] -Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_Artemisa_Tracking +Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_Tracking ;#item_type: Type and resolution for each of the signal samples. Use only [gr_complex] in this version. Tracking_1C.item_type=gr_complex diff --git a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc index fb3875936..d4fffeb55 100644 --- a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc +++ b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc @@ -195,30 +195,30 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking() d_acq_sample_stamp = d_acquisition_gnss_synchro->Acq_samplestamp_samples; long int acq_trk_diff_samples; - float acq_trk_diff_seconds; + double acq_trk_diff_seconds; acq_trk_diff_samples = static_cast(d_sample_counter) - static_cast(d_acq_sample_stamp);//-d_vector_length; DLOG(INFO) << "Number of samples between Acquisition and Tracking =" << acq_trk_diff_samples; - acq_trk_diff_seconds = static_cast(acq_trk_diff_samples) / static_cast(d_fs_in); + acq_trk_diff_seconds = static_cast(acq_trk_diff_samples) / static_cast(d_fs_in); //doppler effect // Fd=(C/(C+Vr))*F - float radial_velocity = (GPS_L1_FREQ_HZ + d_acq_carrier_doppler_hz) / GPS_L1_FREQ_HZ; + double radial_velocity = (GPS_L1_FREQ_HZ + d_acq_carrier_doppler_hz) / GPS_L1_FREQ_HZ; // new chip and prn sequence periods based on acq Doppler - float T_chip_mod_seconds; - float T_prn_mod_seconds; - float T_prn_mod_samples; + double T_chip_mod_seconds; + double T_prn_mod_seconds; + double T_prn_mod_samples; d_code_freq_chips = radial_velocity * GPS_L1_CA_CODE_RATE_HZ; - T_chip_mod_seconds = 1/d_code_freq_chips; + T_chip_mod_seconds = 1.0/d_code_freq_chips; T_prn_mod_seconds = T_chip_mod_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS; - T_prn_mod_samples = T_prn_mod_seconds * static_cast(d_fs_in); + T_prn_mod_samples = T_prn_mod_seconds * static_cast(d_fs_in); d_current_prn_length_samples = round(T_prn_mod_samples); - float T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ; - float T_prn_true_samples = T_prn_true_seconds * static_cast(d_fs_in); - float T_prn_diff_seconds= T_prn_true_seconds - T_prn_mod_seconds; - float N_prn_diff = acq_trk_diff_seconds / T_prn_true_seconds; - float corrected_acq_phase_samples, delay_correction_samples; - corrected_acq_phase_samples = fmod((d_acq_code_phase_samples + T_prn_diff_seconds * N_prn_diff * static_cast(d_fs_in)), T_prn_true_samples); + double T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ; + double T_prn_true_samples = T_prn_true_seconds * static_cast(d_fs_in); + double T_prn_diff_seconds= T_prn_true_seconds - T_prn_mod_seconds; + double N_prn_diff = acq_trk_diff_seconds / T_prn_true_seconds; + double corrected_acq_phase_samples, delay_correction_samples; + corrected_acq_phase_samples = fmod((d_acq_code_phase_samples + T_prn_diff_seconds * N_prn_diff * static_cast(d_fs_in)), T_prn_true_samples); if (corrected_acq_phase_samples < 0) { corrected_acq_phase_samples = T_prn_mod_samples + corrected_acq_phase_samples; @@ -286,10 +286,10 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto gr_vector_const_void_star &input_items, gr_vector_void_star &output_items) { // process vars - float carr_error_hz=0.0; - float carr_error_filt_hz=0.0; - float code_error_chips=0.0; - float code_error_filt_chips=0.0; + double carr_error_hz=0.0; + double carr_error_filt_hz=0.0; + double code_error_chips=0.0; + double code_error_filt_chips=0.0; // Block input data and block output stream pointers const gr_complex* in = (gr_complex*) input_items[0]; @@ -320,20 +320,24 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto current_synchro_data = *d_acquisition_gnss_synchro; // UPDATE NCO COMMAND - float phase_step_rad = static_cast(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast(d_fs_in); + double phase_step_rad = GPS_TWO_PI * d_carrier_doppler_hz / static_cast(d_fs_in); //code resampler on GPU (new) - float code_phase_step_chips = static_cast(d_code_freq_chips) / static_cast(d_fs_in); - float rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in); + double code_phase_step_chips = d_code_freq_chips / static_cast(d_fs_in); + double rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in); memcpy(in_gpu, in, sizeof(gr_complex) * d_current_prn_length_samples); cudaProfilerStart(); - multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carr_phase_rad, phase_step_rad, code_phase_step_chips, rem_code_phase_chips, d_current_prn_length_samples, 3); + multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda( static_cast(d_rem_carr_phase_rad), + static_cast(phase_step_rad), + static_cast(code_phase_step_chips), + static_cast(rem_code_phase_chips), + d_current_prn_length_samples, 3); cudaProfilerStop(); // ################## PLL ########################################################## // PLL discriminator - carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / static_cast(GPS_TWO_PI); + carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / GPS_TWO_PI; // Carrier discriminator filter carr_error_filt_hz = d_carrier_loop_filter.get_carrier_nco(carr_error_hz); // New carrier Doppler frequency estimation @@ -352,7 +356,7 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto // Code discriminator filter code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips); //[chips/second] //Code phase accumulator - float code_error_filt_secs; + double code_error_filt_secs; code_error_filt_secs = (GPS_L1_CA_CODE_PERIOD * code_error_filt_chips) / GPS_L1_CA_CODE_RATE_HZ; //[seconds] d_acc_code_phase_secs = d_acc_code_phase_secs + code_error_filt_secs; @@ -363,10 +367,10 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto double T_prn_samples; double K_blk_samples; // Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation - T_chip_seconds = 1 / static_cast(d_code_freq_chips); + T_chip_seconds = 1.0 / d_code_freq_chips; T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS; T_prn_samples = T_prn_seconds * static_cast(d_fs_in); - K_blk_samples = T_prn_samples + d_rem_code_phase_samples + static_cast(code_error_filt_secs) * static_cast(d_fs_in); + K_blk_samples = T_prn_samples + d_rem_code_phase_samples + code_error_filt_secs * static_cast(d_fs_in); //d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample // ####### CN0 ESTIMATION AND LOCK DETECTORS ###### @@ -415,16 +419,16 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto //current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter + (double)d_current_prn_length_samples + (double)d_rem_code_phase_samples)/static_cast(d_fs_in); // Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!, but some glitches??) - current_synchro_data.Tracking_timestamp_secs = (static_cast(d_sample_counter) + static_cast(d_rem_code_phase_samples)) / static_cast(d_fs_in); + current_synchro_data.Tracking_timestamp_secs = (static_cast(d_sample_counter) + d_rem_code_phase_samples) / static_cast(d_fs_in); //compute remnant code phase samples AFTER the Tracking timestamp d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample //current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter)/static_cast(d_fs_in); // This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0 current_synchro_data.Code_phase_secs = 0; - current_synchro_data.Carrier_phase_rads = static_cast(d_acc_carrier_phase_rad); - current_synchro_data.Carrier_Doppler_hz = static_cast(d_carrier_doppler_hz); - current_synchro_data.CN0_dB_hz = static_cast(d_CN0_SNV_dB_Hz); + current_synchro_data.Carrier_phase_rads = d_acc_carrier_phase_rad; + current_synchro_data.Carrier_Doppler_hz = d_carrier_doppler_hz; + current_synchro_data.CN0_dB_hz = d_CN0_SNV_dB_Hz; current_synchro_data.Flag_valid_pseudorange = false; *out[0] = current_synchro_data; @@ -497,41 +501,50 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto tmp_L = std::abs(*d_Late); try { - // EPR - d_dump_file.write(reinterpret_cast(&tmp_E), sizeof(float)); - d_dump_file.write(reinterpret_cast(&tmp_P), sizeof(float)); - d_dump_file.write(reinterpret_cast(&tmp_L), sizeof(float)); - // PROMPT I and Q (to analyze navigation symbols) - d_dump_file.write(reinterpret_cast(&prompt_I), sizeof(float)); - d_dump_file.write(reinterpret_cast(&prompt_Q), sizeof(float)); - // PRN start sample stamp - //tmp_float=(float)d_sample_counter; - d_dump_file.write(reinterpret_cast(&d_sample_counter), sizeof(unsigned long int)); - // accumulated carrier phase - d_dump_file.write(reinterpret_cast(&d_acc_carrier_phase_rad), sizeof(float)); - // carrier and code frequency - d_dump_file.write(reinterpret_cast(&d_carrier_doppler_hz), sizeof(float)); - tmp_float=d_code_freq_chips; - d_dump_file.write(reinterpret_cast(&tmp_float), sizeof(float)); + // EPR + d_dump_file.write((char*)&tmp_E, sizeof(float)); + d_dump_file.write((char*)&tmp_P, sizeof(float)); + d_dump_file.write((char*)&tmp_L, sizeof(float)); + // PROMPT I and Q (to analyze navigation symbols) + d_dump_file.write((char*)&prompt_I, sizeof(float)); + d_dump_file.write((char*)&prompt_Q, sizeof(float)); + // PRN start sample stamp + //tmp_float=(float)d_sample_counter; + d_dump_file.write((char*)&d_sample_counter, sizeof(unsigned long int)); + // accumulated carrier phase + tmp_float = d_acc_carrier_phase_rad; + d_dump_file.write((char*)&tmp_float, sizeof(float)); - //PLL commands - d_dump_file.write(reinterpret_cast(&carr_error_hz), sizeof(float)); - d_dump_file.write(reinterpret_cast(&carr_error_filt_hz), sizeof(float)); + // carrier and code frequency + tmp_float = d_carrier_doppler_hz; + d_dump_file.write((char*)&tmp_float, sizeof(float)); + tmp_float = d_code_freq_chips; + d_dump_file.write((char*)&tmp_float, sizeof(float)); - //DLL commands - d_dump_file.write(reinterpret_cast(&code_error_chips), sizeof(float)); - d_dump_file.write(reinterpret_cast(&code_error_filt_chips), sizeof(float)); + //PLL commands + tmp_float = carr_error_hz; + d_dump_file.write((char*)&tmp_float, sizeof(float)); + tmp_float = carr_error_filt_hz; + d_dump_file.write((char*)&tmp_float, sizeof(float)); - // CN0 and carrier lock test - d_dump_file.write(reinterpret_cast(&d_CN0_SNV_dB_Hz), sizeof(float)); - d_dump_file.write(reinterpret_cast(&d_carrier_lock_test), sizeof(float)); + //DLL commands + tmp_float = code_error_chips; + d_dump_file.write((char*)&tmp_float, sizeof(float)); + tmp_float = code_error_filt_chips; + d_dump_file.write((char*)&tmp_float, sizeof(float)); - // AUX vars (for debug purposes) - tmp_float = d_rem_code_phase_samples; - d_dump_file.write(reinterpret_cast(&tmp_float), sizeof(float)); - tmp_double = static_cast(d_sample_counter + d_current_prn_length_samples); - d_dump_file.write(reinterpret_cast(&tmp_double), sizeof(double)); + // CN0 and carrier lock test + tmp_float = d_CN0_SNV_dB_Hz; + d_dump_file.write((char*)&tmp_float, sizeof(float)); + tmp_float = d_carrier_lock_test; + d_dump_file.write((char*)&tmp_float, sizeof(float)); + + // AUX vars (for debug purposes) + tmp_float = d_rem_code_phase_samples; + d_dump_file.write((char*)&tmp_float, sizeof(float)); + tmp_double = (double)(d_sample_counter + d_current_prn_length_samples); + d_dump_file.write((char*)&tmp_double, sizeof(double)); } catch (std::ifstream::failure e) { diff --git a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h index e632c48a4..b6842f466 100644 --- a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h +++ b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.h @@ -140,22 +140,22 @@ private: // remaining code phase and carrier phase between tracking loops double d_rem_code_phase_samples; - float d_rem_carr_phase_rad; + double d_rem_carr_phase_rad; // PLL and DLL filter library Tracking_2nd_DLL_filter d_code_loop_filter; Tracking_2nd_PLL_filter d_carrier_loop_filter; // acquisition - float d_acq_code_phase_samples; - float d_acq_carrier_doppler_hz; + double d_acq_code_phase_samples; + double d_acq_carrier_doppler_hz; // tracking vars double d_code_freq_chips; - float d_carrier_doppler_hz; - float d_acc_carrier_phase_rad; - float d_code_phase_samples; - float d_acc_code_phase_secs; + double d_carrier_doppler_hz; + double d_acc_carrier_phase_rad; + double d_code_phase_samples; + double d_acc_code_phase_secs; //PRN period in samples int d_current_prn_length_samples; @@ -167,9 +167,9 @@ private: // CN0 estimation and lock detector int d_cn0_estimation_counter; gr_complex* d_Prompt_buffer; - float d_carrier_lock_test; - float d_CN0_SNV_dB_Hz; - float d_carrier_lock_threshold; + double d_carrier_lock_test; + double d_CN0_SNV_dB_Hz; + double d_carrier_lock_threshold; int d_carrier_lock_fail_counter; // control vars diff --git a/src/algorithms/tracking/libs/cuda_multicorrelator.cu b/src/algorithms/tracking/libs/cuda_multicorrelator.cu index 5017d1493..6ebec80f4 100644 --- a/src/algorithms/tracking/libs/cuda_multicorrelator.cu +++ b/src/algorithms/tracking/libs/cuda_multicorrelator.cu @@ -41,103 +41,6 @@ #define ACCUM_N 128 -__global__ void scalarProdGPUCPXxN_shifts_chips( - GPU_Complex *d_corr_out, - GPU_Complex *d_sig_in, - GPU_Complex *d_local_code_in, - float *d_shifts_chips, - float code_length_chips, - float code_phase_step_chips, - float rem_code_phase_chips, - int vectorN, - int elementN -) -{ - //Accumulators cache - __shared__ GPU_Complex accumResult[ACCUM_N]; - - //////////////////////////////////////////////////////////////////////////// - // Cycle through every pair of vectors, - // taking into account that vector counts can be different - // from total number of thread blocks - //////////////////////////////////////////////////////////////////////////// - for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x) - { - //int vectorBase = IMUL(elementN, vec); - //int vectorEnd = elementN; - - //////////////////////////////////////////////////////////////////////// - // Each accumulator cycles through vectors with - // stride equal to number of total number of accumulators ACCUM_N - // At this stage ACCUM_N is only preferred be a multiple of warp size - // to meet memory coalescing alignment constraints. - //////////////////////////////////////////////////////////////////////// - for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x) - { - GPU_Complex sum = GPU_Complex(0,0); - - for (int pos = iAccum; pos < elementN; pos += ACCUM_N) - { - //original sample code - //sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos]; - //sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos]; - //sum.multiply_acc(d_sig_in[pos],d_local_codes_in[pos+d_shifts_samples[vec]]); - - //custom code for multitap correlator - // 1.resample local code for the current shift - float local_code_chip_index= fmod(code_phase_step_chips*(float)pos + d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips); - //Take into account that in multitap correlators, the shifts can be negative! - if (local_code_chip_index<0.0) local_code_chip_index+=code_length_chips; - //printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index); - // 2.correlate - sum.multiply_acc(d_sig_in[pos],d_local_code_in[__float2int_rd(local_code_chip_index)]); - - } - accumResult[iAccum] = sum; - } - - //////////////////////////////////////////////////////////////////////// - // Perform tree-like reduction of accumulators' results. - // ACCUM_N has to be power of two at this stage - //////////////////////////////////////////////////////////////////////// - for (int stride = ACCUM_N / 2; stride > 0; stride >>= 1) - { - __syncthreads(); - - for (int iAccum = threadIdx.x; iAccum < stride; iAccum += blockDim.x) - { - accumResult[iAccum] += accumResult[stride + iAccum]; - } - } - - if (threadIdx.x == 0) - { - d_corr_out[vec] = accumResult[0]; - } - } -} - -/** - * CUDA Kernel Device code - * - * Computes the carrier Doppler wipe-off by integrating the NCO in the CUDA kernel - */ -__global__ void -CUDA_32fc_Doppler_wipeoff( GPU_Complex *sig_out, GPU_Complex *sig_in, float rem_carrier_phase_in_rad, float phase_step_rad, int numElements) -{ - // CUDA version of floating point NCO and vector dot product integrated - float sin; - float cos; - for (int i = blockIdx.x * blockDim.x + threadIdx.x; - i < numElements; - i += blockDim.x * gridDim.x) - { - __sincosf(rem_carrier_phase_in_rad + i*phase_step_rad, &sin, &cos); - sig_out[i] = sig_in[i] * GPU_Complex(cos,-sin); - } -} - - __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips( GPU_Complex *d_corr_out, GPU_Complex *d_sig_in, @@ -398,37 +301,15 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda( int n_correlators) { - // cudaMemCpy version //size_t memSize = signal_length_samples * sizeof(std::complex); // input signal CPU -> GPU copy memory //cudaMemcpyAsync(d_sig_in, d_sig_in_cpu, memSize, // cudaMemcpyHostToDevice, stream2); - //***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! **** - //Launch carrier wipe-off kernel here, while local codes are being copied to GPU! - //cudaStreamSynchronize(stream2); - - //CUDA_32fc_Doppler_wipeoff<<>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples); - - //wait for Doppler wipeoff end... - //cudaStreamSynchronize(stream1); - //cudaStreamSynchronize(stream2); //launch the multitap correlator with integrated local code resampler! -// scalarProdGPUCPXxN_shifts_chips<<>>( -// d_corr_out, -// d_sig_doppler_wiped, -// d_local_codes_in, -// d_shifts_chips, -// d_code_length_chips, -// code_phase_step_chips, -// rem_code_phase_chips, -// n_correlators, -// signal_length_samples -// ); - Doppler_wippe_scalarProdGPUCPXxN_shifts_chips<<>>( d_corr_out, d_sig_in, @@ -444,25 +325,15 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda( phase_step_rad ); - //debug -// std::complex* debug_signal; -// debug_signal=static_cast*>(malloc(memSize)); -// cudaMemcpyAsync(debug_signal, d_sig_doppler_wiped, memSize, -// cudaMemcpyDeviceToHost,stream1); -// cudaStreamSynchronize(stream1); -// std::cout<<"d_sig_doppler_wiped GPU="<)*n_correlators, // cudaMemcpyDeviceToHost,stream1); - - cudaStreamSynchronize(stream1); return true; } @@ -490,7 +361,6 @@ bool cuda_multicorrelator::free_cuda() if (d_corr_out!=NULL) cudaFree(d_corr_out); if (d_shifts_samples!=NULL) cudaFree(d_shifts_samples); if (d_shifts_chips!=NULL) cudaFree(d_shifts_chips); - // Reset the device and exit // cudaDeviceReset causes the driver to clean up all state. While // not mandatory in normal operation, it is good practice. It is also