mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-11-04 09:13:05 +00:00 
			
		
		
		
	Updated CUDA kernels and several GPU tracking optimizations.
Bug fix in GPS_L1_CA_DLL_PLL binary dump
This commit is contained in:
		@@ -165,7 +165,7 @@ Resampler.sample_freq_out=4000000
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
;######### CHANNELS GLOBAL CONFIG ############
 | 
					;######### CHANNELS GLOBAL CONFIG ############
 | 
				
			||||||
;#count: Number of available GPS satellite channels.
 | 
					;#count: Number of available GPS satellite channels.
 | 
				
			||||||
Channels_GPS.count=8
 | 
					Channels_GPS.count=1
 | 
				
			||||||
;#count: Number of available Galileo satellite channels.
 | 
					;#count: Number of available Galileo satellite channels.
 | 
				
			||||||
Channels_Galileo.count=0
 | 
					Channels_Galileo.count=0
 | 
				
			||||||
;#in_acquisition: Number of channels simultaneously acquiring for the whole receiver
 | 
					;#in_acquisition: Number of channels simultaneously acquiring for the whole receiver
 | 
				
			||||||
@@ -229,16 +229,16 @@ Tracking_GPS.item_type=gr_complex
 | 
				
			|||||||
Tracking_GPS.if=0
 | 
					Tracking_GPS.if=0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#dump: Enable or disable the Tracking internal binary data file logging [true] or [false] 
 | 
					;#dump: Enable or disable the Tracking internal binary data file logging [true] or [false] 
 | 
				
			||||||
Tracking_GPS.dump=false
 | 
					Tracking_GPS.dump=true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#dump_filename: Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
 | 
					;#dump_filename: Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
 | 
				
			||||||
Tracking_GPS.dump_filename=../data/epl_tracking_ch_
 | 
					Tracking_GPS.dump_filename=../data/epl_tracking_ch_
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#pll_bw_hz: PLL loop filter bandwidth [Hz]
 | 
					;#pll_bw_hz: PLL loop filter bandwidth [Hz]
 | 
				
			||||||
Tracking_GPS.pll_bw_hz=45.0;
 | 
					Tracking_GPS.pll_bw_hz=55.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#dll_bw_hz: DLL loop filter bandwidth [Hz]
 | 
					;#dll_bw_hz: DLL loop filter bandwidth [Hz]
 | 
				
			||||||
Tracking_GPS.dll_bw_hz=2.0;
 | 
					Tracking_GPS.dll_bw_hz=1.5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#fll_bw_hz: FLL loop filter bandwidth [Hz]
 | 
					;#fll_bw_hz: FLL loop filter bandwidth [Hz]
 | 
				
			||||||
Tracking_GPS.fll_bw_hz=10.0;
 | 
					Tracking_GPS.fll_bw_hz=10.0;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -29,13 +29,13 @@ GNSS-SDR.SUPL_CI=0x31b0
 | 
				
			|||||||
SignalSource.implementation=Flexiband_Signal_Source
 | 
					SignalSource.implementation=Flexiband_Signal_Source
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SignalSource.flag_read_file=true
 | 
					SignalSource.flag_read_file=true
 | 
				
			||||||
SignalSource.signal_file=/datalogger/captures/eclipse/eclipse_IIIa_2.bin
 | 
					SignalSource.signal_file=/datalogger/L125_III1b_210s.usb
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
 | 
					;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
 | 
				
			||||||
SignalSource.item_type=gr_complex
 | 
					SignalSource.item_type=gr_complex
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;# FPGA firmware file
 | 
					;# FPGA firmware file
 | 
				
			||||||
SignalSource.firmware_file=flexiband_III-1a.bit
 | 
					SignalSource.firmware_file=flexiband_III-1b.bit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#RF_channels: Number of RF channels present in the frontend device, must agree the FPGA firmware file
 | 
					;#RF_channels: Number of RF channels present in the frontend device, must agree the FPGA firmware file
 | 
				
			||||||
SignalSource.RF_channels=1
 | 
					SignalSource.RF_channels=1
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -28,9 +28,9 @@ GNSS-SDR.SUPL_CI=0x31b0
 | 
				
			|||||||
;#implementation: Use [File_Signal_Source] or [UHD_Signal_Source] or [GN3S_Signal_Source] (experimental)
 | 
					;#implementation: Use [File_Signal_Source] or [UHD_Signal_Source] or [GN3S_Signal_Source] (experimental)
 | 
				
			||||||
SignalSource.implementation=Flexiband_Signal_Source
 | 
					SignalSource.implementation=Flexiband_Signal_Source
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SignalSource.flag_read_file=false
 | 
					SignalSource.flag_read_file=true
 | 
				
			||||||
#SignalSource.signal_file=/datalogger/signals/Fraunhofer/L125_III1b_210s.usb
 | 
					SignalSource.signal_file=/datalogger/L125_III1b_210s.usb
 | 
				
			||||||
SignalSource.signal_file=/datalogger/captures/flexiband_III_1b_cap1.usb
 | 
					#SignalSource.signal_file=/datalogger/captures/flexiband_III_1b_cap1.usb
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
 | 
					;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
 | 
				
			||||||
SignalSource.item_type=gr_complex
 | 
					SignalSource.item_type=gr_complex
 | 
				
			||||||
@@ -136,8 +136,8 @@ InputFilter0.grid_density=16
 | 
				
			|||||||
InputFilter0.sampling_frequency=20000000
 | 
					InputFilter0.sampling_frequency=20000000
 | 
				
			||||||
;# IF deviation due to front-end LO inaccuracies [HZ]
 | 
					;# IF deviation due to front-end LO inaccuracies [HZ]
 | 
				
			||||||
;# WARNING: Fraunhofer front-end hardwareconfigurations can difer. Signals available on http://www.iis.fraunhofer.de/de/ff/lok/leist/test/flexiband.html are centered on 0 Hz, ALL BANDS.
 | 
					;# WARNING: Fraunhofer front-end hardwareconfigurations can difer. Signals available on http://www.iis.fraunhofer.de/de/ff/lok/leist/test/flexiband.html are centered on 0 Hz, ALL BANDS.
 | 
				
			||||||
InputFilter0.IF=-205000
 | 
					;#InputFilter0.IF=-205000
 | 
				
			||||||
;#InputFilter0.IF=0
 | 
					InputFilter0.IF=0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;# Decimation factor after the frequency tranaslating block
 | 
					;# Decimation factor after the frequency tranaslating block
 | 
				
			||||||
InputFilter0.decimation_factor=8
 | 
					InputFilter0.decimation_factor=8
 | 
				
			||||||
@@ -230,8 +230,8 @@ InputFilter1.grid_density=16
 | 
				
			|||||||
InputFilter1.sampling_frequency=20000000
 | 
					InputFilter1.sampling_frequency=20000000
 | 
				
			||||||
;# IF deviation due to front-end LO inaccuracies [HZ]
 | 
					;# IF deviation due to front-end LO inaccuracies [HZ]
 | 
				
			||||||
;# WARNING: Fraunhofer front-end hardwareconfigurations can difer. Signals available on http://www.iis.fraunhofer.de/de/ff/lok/leist/test/flexiband.html are centered on 0 Hz, ALL BANDS.
 | 
					;# WARNING: Fraunhofer front-end hardwareconfigurations can difer. Signals available on http://www.iis.fraunhofer.de/de/ff/lok/leist/test/flexiband.html are centered on 0 Hz, ALL BANDS.
 | 
				
			||||||
InputFilter1.IF=100000
 | 
					;#InputFilter1.IF=100000
 | 
				
			||||||
;#InputFilter1.IF=0
 | 
					InputFilter1.IF=0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;# Decimation factor after the frequency tranaslating block
 | 
					;# Decimation factor after the frequency tranaslating block
 | 
				
			||||||
InputFilter1.decimation_factor=8
 | 
					InputFilter1.decimation_factor=8
 | 
				
			||||||
@@ -272,7 +272,7 @@ Resampler2.implementation=Pass_Through
 | 
				
			|||||||
;#count: Number of available GPS satellite channels.
 | 
					;#count: Number of available GPS satellite channels.
 | 
				
			||||||
Channels_1C.count=8
 | 
					Channels_1C.count=8
 | 
				
			||||||
Channels_1B.count=1
 | 
					Channels_1B.count=1
 | 
				
			||||||
Channels_2S.count=8
 | 
					Channels_2S.count=1
 | 
				
			||||||
;#count: Number of available Galileo satellite channels.
 | 
					;#count: Number of available Galileo satellite channels.
 | 
				
			||||||
;Channels_Galileo.count=0
 | 
					;Channels_Galileo.count=0
 | 
				
			||||||
;#in_acquisition: Number of channels simultaneously acquiring for the whole receiver
 | 
					;#in_acquisition: Number of channels simultaneously acquiring for the whole receiver
 | 
				
			||||||
@@ -378,13 +378,13 @@ Acquisition_1C.max_dwells=1
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
;#implementation: Selected tracking algorithm: [GPS_L1_CA_DLL_PLL_Tracking] or [GPS_L1_CA_DLL_FLL_PLL_Tracking]
 | 
					;#implementation: Selected tracking algorithm: [GPS_L1_CA_DLL_PLL_Tracking] or [GPS_L1_CA_DLL_FLL_PLL_Tracking]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_Tracking
 | 
					Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_Tracking_GPU
 | 
				
			||||||
Tracking_1C.item_type=gr_complex
 | 
					Tracking_1C.item_type=gr_complex
 | 
				
			||||||
Tracking_1C.if=0
 | 
					Tracking_1C.if=0
 | 
				
			||||||
Tracking_1C.dump=true
 | 
					Tracking_1C.dump=false
 | 
				
			||||||
Tracking_1C.dump_filename=./tracking_ch_
 | 
					Tracking_1C.dump_filename=../data/epl_tracking_ch_
 | 
				
			||||||
Tracking_1C.pll_bw_hz=40.0;
 | 
					Tracking_1C.pll_bw_hz=40.0;
 | 
				
			||||||
Tracking_1C.dll_bw_hz=3.0;
 | 
					Tracking_1C.dll_bw_hz=1.5;
 | 
				
			||||||
Tracking_1C.fll_bw_hz=10.0;
 | 
					Tracking_1C.fll_bw_hz=10.0;
 | 
				
			||||||
Tracking_1C.order=3;
 | 
					Tracking_1C.order=3;
 | 
				
			||||||
Tracking_1C.early_late_space_chips=0.5;
 | 
					Tracking_1C.early_late_space_chips=0.5;
 | 
				
			||||||
@@ -405,7 +405,7 @@ Acquisition_2S.max_dwells=1
 | 
				
			|||||||
Tracking_2S.implementation=GPS_L2_M_DLL_PLL_Tracking
 | 
					Tracking_2S.implementation=GPS_L2_M_DLL_PLL_Tracking
 | 
				
			||||||
Tracking_2S.item_type=gr_complex
 | 
					Tracking_2S.item_type=gr_complex
 | 
				
			||||||
Tracking_2S.if=0
 | 
					Tracking_2S.if=0
 | 
				
			||||||
Tracking_2S.dump=true
 | 
					Tracking_2S.dump=false
 | 
				
			||||||
Tracking_2S.dump_filename=./tracking_ch_
 | 
					Tracking_2S.dump_filename=./tracking_ch_
 | 
				
			||||||
Tracking_2S.pll_bw_hz=1.5;
 | 
					Tracking_2S.pll_bw_hz=1.5;
 | 
				
			||||||
Tracking_2S.dll_bw_hz=0.3;
 | 
					Tracking_2S.dll_bw_hz=0.3;
 | 
				
			||||||
@@ -447,7 +447,7 @@ Tracking_1B.item_type=gr_complex
 | 
				
			|||||||
Tracking_1B.if=0
 | 
					Tracking_1B.if=0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#dump: Enable or disable the Tracking internal binary data file logging [true] or [false] 
 | 
					;#dump: Enable or disable the Tracking internal binary data file logging [true] or [false] 
 | 
				
			||||||
Tracking_1B.dump=true
 | 
					Tracking_1B.dump=false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#dump_filename: Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
 | 
					;#dump_filename: Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
 | 
				
			||||||
Tracking_1B.dump_filename=./veml_tracking_ch_
 | 
					Tracking_1B.dump_filename=./veml_tracking_ch_
 | 
				
			||||||
@@ -497,7 +497,7 @@ TelemetryDecoder_1B.decimation_factor=5;
 | 
				
			|||||||
Observables.implementation=Mixed_Observables
 | 
					Observables.implementation=Mixed_Observables
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#dump: Enable or disable the Observables internal binary data file logging [true] or [false] 
 | 
					;#dump: Enable or disable the Observables internal binary data file logging [true] or [false] 
 | 
				
			||||||
Observables.dump=true
 | 
					Observables.dump=false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;#dump_filename: Log path and filename.
 | 
					;#dump_filename: Log path and filename.
 | 
				
			||||||
Observables.dump_filename=./observables.dat
 | 
					Observables.dump_filename=./observables.dat
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -135,7 +135,8 @@ InputFilter0.grid_density=16
 | 
				
			|||||||
; i.e. using front-end-cal as reported here:http://www.cttc.es/publication/turning-a-television-into-a-gnss-receiver/
 | 
					; i.e. using front-end-cal as reported here:http://www.cttc.es/publication/turning-a-television-into-a-gnss-receiver/
 | 
				
			||||||
InputFilter0.sampling_frequency=20000000
 | 
					InputFilter0.sampling_frequency=20000000
 | 
				
			||||||
;# IF deviation due to front-end LO inaccuracies [HZ]
 | 
					;# IF deviation due to front-end LO inaccuracies [HZ]
 | 
				
			||||||
InputFilter0.IF=-205000
 | 
					;#InputFilter0.IF=-205000
 | 
				
			||||||
 | 
					InputFilter0.IF=0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;# Decimation factor after the frequency tranaslating block
 | 
					;# Decimation factor after the frequency tranaslating block
 | 
				
			||||||
InputFilter0.decimation_factor=4
 | 
					InputFilter0.decimation_factor=4
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -58,7 +58,7 @@ if(ENABLE_FLEXIBAND)
 | 
				
			|||||||
     if(OS_IS_MACOSX)
 | 
					     if(OS_IS_MACOSX)
 | 
				
			||||||
          set(MACOSX_ARGS "-DCMAKE_CXX_COMPILER=/usr/bin/clang++")
 | 
					          set(MACOSX_ARGS "-DCMAKE_CXX_COMPILER=/usr/bin/clang++")
 | 
				
			||||||
     endif(OS_IS_MACOSX)
 | 
					     endif(OS_IS_MACOSX)
 | 
				
			||||||
    find_package(teleorbit REQUIRED)
 | 
					    find_package(Teleorbit REQUIRED)
 | 
				
			||||||
    if(NOT TELEORBIT_FOUND)
 | 
					    if(NOT TELEORBIT_FOUND)
 | 
				
			||||||
        message(FATAL_ERROR "Teleorbit Flexiband GNURadio driver required to build gnss-sdr with the optional FLEXIBAND adapter")
 | 
					        message(FATAL_ERROR "Teleorbit Flexiband GNURadio driver required to build gnss-sdr with the optional FLEXIBAND adapter")
 | 
				
			||||||
    endif(NOT TELEORBIT_FOUND)
 | 
					    endif(NOT TELEORBIT_FOUND)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -18,6 +18,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
if(ENABLE_CUDA)
 | 
					if(ENABLE_CUDA)
 | 
				
			||||||
	FIND_PACKAGE(CUDA REQUIRED) 
 | 
						FIND_PACKAGE(CUDA REQUIRED) 
 | 
				
			||||||
 | 
						set(OPT_TRACKING_ADAPTERS ${OPT_TRACKING_ADAPTERS} gps_l1_ca_dll_pll_tracking_gpu.cc)
 | 
				
			||||||
endif(ENABLE_CUDA)
 | 
					endif(ENABLE_CUDA)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
set(TRACKING_ADAPTER_SOURCES 
 | 
					set(TRACKING_ADAPTER_SOURCES 
 | 
				
			||||||
@@ -30,7 +31,7 @@ set(TRACKING_ADAPTER_SOURCES
 | 
				
			|||||||
     gps_l1_ca_tcp_connector_tracking.cc
 | 
					     gps_l1_ca_tcp_connector_tracking.cc
 | 
				
			||||||
     galileo_e5a_dll_pll_tracking.cc
 | 
					     galileo_e5a_dll_pll_tracking.cc
 | 
				
			||||||
     gps_l2_m_dll_pll_tracking.cc
 | 
					     gps_l2_m_dll_pll_tracking.cc
 | 
				
			||||||
     gps_l1_ca_dll_pll_tracking_gpu.cc
 | 
					     ${OPT_TRACKING_ADAPTERS}
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
include_directories(
 | 
					include_directories(
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -19,6 +19,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
if(ENABLE_CUDA)
 | 
					if(ENABLE_CUDA)
 | 
				
			||||||
	FIND_PACKAGE(CUDA REQUIRED)
 | 
						FIND_PACKAGE(CUDA REQUIRED)
 | 
				
			||||||
 | 
						set(OPT_TRACKING_BLOCKS ${OPT_TRACKING_BLOCKS} gps_l1_ca_dll_pll_tracking_gpu_cc.cc)
 | 
				
			||||||
endif(ENABLE_CUDA)
 | 
					endif(ENABLE_CUDA)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
set(TRACKING_GR_BLOCKS_SOURCES
 | 
					set(TRACKING_GR_BLOCKS_SOURCES
 | 
				
			||||||
@@ -31,7 +32,7 @@ set(TRACKING_GR_BLOCKS_SOURCES
 | 
				
			|||||||
     gps_l1_ca_tcp_connector_tracking_cc.cc
 | 
					     gps_l1_ca_tcp_connector_tracking_cc.cc
 | 
				
			||||||
     galileo_e5a_dll_pll_tracking_cc.cc
 | 
					     galileo_e5a_dll_pll_tracking_cc.cc
 | 
				
			||||||
     gps_l2_m_dll_pll_tracking_cc.cc
 | 
					     gps_l2_m_dll_pll_tracking_cc.cc
 | 
				
			||||||
     gps_l1_ca_dll_pll_tracking_gpu_cc.cc
 | 
						 ${OPT_TRACKING_BLOCKS}   
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
include_directories(
 | 
					include_directories(
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -594,7 +594,8 @@ int Gps_L1_Ca_Dll_Pll_Tracking_cc::general_work (int noutput_items, gr_vector_in
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                    // carrier and code frequency
 | 
					                    // carrier and code frequency
 | 
				
			||||||
                    d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(float));
 | 
					                    d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(float));
 | 
				
			||||||
                    d_dump_file.write(reinterpret_cast<char*>(&d_code_freq_chips), sizeof(float));
 | 
					                    tmp_float=d_code_freq_chips;
 | 
				
			||||||
 | 
					                    d_dump_file.write(reinterpret_cast<char*>(&tmp_float), sizeof(float));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    //PLL commands
 | 
					                    //PLL commands
 | 
				
			||||||
                    d_dump_file.write(reinterpret_cast<char*>(&carr_error_hz), sizeof(float));
 | 
					                    d_dump_file.write(reinterpret_cast<char*>(&carr_error_hz), sizeof(float));
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -81,7 +81,6 @@ gps_l1_ca_dll_pll_make_tracking_gpu_cc(
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::forecast (int noutput_items,
 | 
					void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::forecast (int noutput_items,
 | 
				
			||||||
        gr_vector_int &ninput_items_required)
 | 
					        gr_vector_int &ninput_items_required)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -120,14 +119,19 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    // Initialization of local code replica
 | 
					    // Initialization of local code replica
 | 
				
			||||||
    // Get space for a vector with the C/A code replica sampled 1x/chip
 | 
					    // Get space for a vector with the C/A code replica sampled 1x/chip
 | 
				
			||||||
    d_ca_code = static_cast<gr_complex*>(volk_malloc((GPS_L1_CA_CODE_LENGTH_CHIPS + 2) * sizeof(gr_complex), volk_get_alignment()));
 | 
					    //d_ca_code = static_cast<gr_complex*>(volk_malloc((GPS_L1_CA_CODE_LENGTH_CHIPS + 2) * sizeof(gr_complex), volk_get_alignment()));
 | 
				
			||||||
 | 
					    d_ca_code = static_cast<gr_complex*>(volk_malloc((GPS_L1_CA_CODE_LENGTH_CHIPS) * sizeof(gr_complex), volk_get_alignment()));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    multicorrelator_gpu = new cuda_multicorrelator();
 | 
					    multicorrelator_gpu = new cuda_multicorrelator();
 | 
				
			||||||
    int N_CORRELATORS=3;
 | 
					    int N_CORRELATORS=3;
 | 
				
			||||||
    multicorrelator_gpu->init_cuda(0, NULL, 2 * d_vector_length , 2 * d_vector_length , N_CORRELATORS);
 | 
					    //local code resampler on CPU (old)
 | 
				
			||||||
 | 
					    //multicorrelator_gpu->init_cuda(0, NULL, 2 * d_vector_length , 2 * d_vector_length , N_CORRELATORS);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //local code resampler on GPU (new)
 | 
				
			||||||
 | 
					    multicorrelator_gpu->init_cuda_integrated_resampler(0, NULL, 2 * d_vector_length , GPS_L1_CA_CODE_LENGTH_CHIPS , N_CORRELATORS);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Get space for the resampled early / prompt / late local replicas
 | 
					    // Get space for the resampled early / prompt / late local replicas
 | 
				
			||||||
	checkCudaErrors(cudaHostAlloc((void**)&d_local_code_shift_samples, N_CORRELATORS * sizeof(int),  cudaHostAllocMapped ));
 | 
						checkCudaErrors(cudaHostAlloc((void**)&d_local_code_shift_chips, N_CORRELATORS * sizeof(float),  cudaHostAllocMapped ));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    //allocate host memory
 | 
					    //allocate host memory
 | 
				
			||||||
@@ -138,7 +142,7 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
 | 
				
			|||||||
	//checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (V_LEN * sizeof(gr_complex))*N_CORRELATORS, cudaHostAllocWriteCombined ));
 | 
						//checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (V_LEN * sizeof(gr_complex))*N_CORRELATORS, cudaHostAllocWriteCombined ));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	//new integrated shifts
 | 
						//new integrated shifts
 | 
				
			||||||
	checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (2 * d_vector_length * sizeof(gr_complex)), cudaHostAllocWriteCombined ));
 | 
						//checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (2 * d_vector_length * sizeof(gr_complex)), cudaHostAllocWriteCombined ));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// correlator outputs (scalar)
 | 
						// correlator outputs (scalar)
 | 
				
			||||||
	checkCudaErrors(cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS,  cudaHostAllocWriteCombined ));
 | 
						checkCudaErrors(cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS,  cudaHostAllocWriteCombined ));
 | 
				
			||||||
@@ -242,9 +246,13 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking()
 | 
				
			|||||||
    d_code_loop_filter.initialize();    // initialize the code filter
 | 
					    d_code_loop_filter.initialize();    // initialize the code filter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // generate local reference ALWAYS starting at chip 1 (1 sample per chip)
 | 
					    // generate local reference ALWAYS starting at chip 1 (1 sample per chip)
 | 
				
			||||||
    gps_l1_ca_code_gen_complex(&d_ca_code[1], d_acquisition_gnss_synchro->PRN, 0);
 | 
					    gps_l1_ca_code_gen_complex(d_ca_code, d_acquisition_gnss_synchro->PRN, 0);
 | 
				
			||||||
    d_ca_code[0] = d_ca_code[static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS)];
 | 
					
 | 
				
			||||||
    d_ca_code[static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS) + 1] = d_ca_code[1];
 | 
					    d_local_code_shift_chips[0]=-d_early_late_spc_chips;
 | 
				
			||||||
 | 
					    d_local_code_shift_chips[1]=0.0;
 | 
				
			||||||
 | 
					    d_local_code_shift_chips[2]=d_early_late_spc_chips;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    multicorrelator_gpu->set_local_code_and_taps(GPS_L1_CA_CODE_LENGTH_CHIPS,d_ca_code, d_local_code_shift_chips,3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    d_carrier_lock_fail_counter = 0;
 | 
					    d_carrier_lock_fail_counter = 0;
 | 
				
			||||||
    d_rem_code_phase_samples = 0;
 | 
					    d_rem_code_phase_samples = 0;
 | 
				
			||||||
@@ -272,40 +280,6 @@ void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking()
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::update_local_code()
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    double tcode_chips;
 | 
					 | 
				
			||||||
    double rem_code_phase_chips;
 | 
					 | 
				
			||||||
    int associated_chip_index;
 | 
					 | 
				
			||||||
    int code_length_chips = static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS);
 | 
					 | 
				
			||||||
    double code_phase_step_chips;
 | 
					 | 
				
			||||||
    int epl_loop_length_samples;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // unified loop for E, P, L code vectors
 | 
					 | 
				
			||||||
    code_phase_step_chips = static_cast<double>(d_code_freq_chips) / static_cast<double>(d_fs_in);
 | 
					 | 
				
			||||||
    rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
 | 
					 | 
				
			||||||
    tcode_chips = -rem_code_phase_chips;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Alternative EPL code generation (40% of speed improvement!)
 | 
					 | 
				
			||||||
    d_local_code_shift_samples[0]=0;
 | 
					 | 
				
			||||||
    d_local_code_shift_samples[1]=round(d_early_late_spc_chips / code_phase_step_chips);
 | 
					 | 
				
			||||||
    d_local_code_shift_samples[2]=round((2*d_early_late_spc_chips) / code_phase_step_chips);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    epl_loop_length_samples = d_current_prn_length_samples + d_local_code_shift_samples[2]; //maximum length
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for (int i = 0; i < epl_loop_length_samples; i++)
 | 
					 | 
				
			||||||
        {
 | 
					 | 
				
			||||||
            associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips));
 | 
					 | 
				
			||||||
            d_local_codes_gpu[i] = d_ca_code[associated_chip_index];
 | 
					 | 
				
			||||||
            tcode_chips = tcode_chips + code_phase_step_chips;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::~Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc()
 | 
					Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::~Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc()
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    d_dump_file.close();
 | 
					    d_dump_file.close();
 | 
				
			||||||
@@ -313,7 +287,7 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::~Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc()
 | 
				
			|||||||
	cudaFreeHost(in_gpu);
 | 
						cudaFreeHost(in_gpu);
 | 
				
			||||||
	cudaFreeHost(d_carr_sign_gpu);
 | 
						cudaFreeHost(d_carr_sign_gpu);
 | 
				
			||||||
	cudaFreeHost(d_corr_outs_gpu);
 | 
						cudaFreeHost(d_corr_outs_gpu);
 | 
				
			||||||
	cudaFreeHost(d_local_codes_gpu);
 | 
						cudaFreeHost(d_local_code_shift_chips);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	multicorrelator_gpu->free_cuda();
 | 
						multicorrelator_gpu->free_cuda();
 | 
				
			||||||
	delete(multicorrelator_gpu);
 | 
						delete(multicorrelator_gpu);
 | 
				
			||||||
@@ -329,10 +303,10 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
 | 
				
			|||||||
        gr_vector_const_void_star &input_items, gr_vector_void_star &output_items)
 | 
					        gr_vector_const_void_star &input_items, gr_vector_void_star &output_items)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    // process vars
 | 
					    // process vars
 | 
				
			||||||
    float carr_error_hz;
 | 
					    float carr_error_hz=0.0;
 | 
				
			||||||
    float carr_error_filt_hz;
 | 
					    float carr_error_filt_hz=0.0;
 | 
				
			||||||
    float code_error_chips;
 | 
					    float code_error_chips=0.0;
 | 
				
			||||||
    float code_error_filt_chips;
 | 
					    float code_error_filt_chips=0.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Block input data and block output stream pointers
 | 
					    // Block input data and block output stream pointers
 | 
				
			||||||
    const gr_complex* in = (gr_complex*) input_items[0];
 | 
					    const gr_complex* in = (gr_complex*) input_items[0];
 | 
				
			||||||
@@ -341,23 +315,17 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
 | 
				
			|||||||
    // GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
 | 
					    // GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
 | 
				
			||||||
    Gnss_Synchro current_synchro_data = Gnss_Synchro();
 | 
					    Gnss_Synchro current_synchro_data = Gnss_Synchro();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (d_enable_tracking == true)
 | 
					    if (d_enable_tracking == true)
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            // Receiver signal alignment
 | 
					            // Receiver signal alignment
 | 
				
			||||||
            if (d_pull_in == true)
 | 
					            if (d_pull_in == true)
 | 
				
			||||||
                {
 | 
					                {
 | 
				
			||||||
                    int samples_offset;
 | 
					                    int samples_offset;
 | 
				
			||||||
                    float acq_trk_shif_correction_samples;
 | 
					 | 
				
			||||||
                    int acq_to_trk_delay_samples;
 | 
					                    int acq_to_trk_delay_samples;
 | 
				
			||||||
                    acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp;
 | 
					                    acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp;
 | 
				
			||||||
                    acq_trk_shif_correction_samples = d_current_prn_length_samples - fmod(static_cast<float>(acq_to_trk_delay_samples), static_cast<float>(d_current_prn_length_samples));
 | 
					                    samples_offset = round(d_acq_code_phase_samples)+d_current_prn_length_samples - acq_to_trk_delay_samples%d_current_prn_length_samples;
 | 
				
			||||||
                    samples_offset = round(d_acq_code_phase_samples + acq_trk_shif_correction_samples);
 | 
					 | 
				
			||||||
                    // /todo: Check if the sample counter sent to the next block as a time reference should be incremented AFTER sended or BEFORE
 | 
					 | 
				
			||||||
                    //d_sample_counter_seconds = d_sample_counter_seconds + (((double)samples_offset) / static_cast<double>(d_fs_in));
 | 
					 | 
				
			||||||
                    d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples
 | 
					                    d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples
 | 
				
			||||||
                    d_pull_in = false;
 | 
					                    d_pull_in = false;
 | 
				
			||||||
                    //std::cout<<" samples_offset="<<samples_offset<<"\r\n";
 | 
					 | 
				
			||||||
                    // Fill the acquisition data
 | 
					                    // Fill the acquisition data
 | 
				
			||||||
                    current_synchro_data = *d_acquisition_gnss_synchro;
 | 
					                    current_synchro_data = *d_acquisition_gnss_synchro;
 | 
				
			||||||
                    *out[0] = current_synchro_data;
 | 
					                    *out[0] = current_synchro_data;
 | 
				
			||||||
@@ -368,46 +336,24 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
 | 
				
			|||||||
            // Fill the acquisition data
 | 
					            // Fill the acquisition data
 | 
				
			||||||
            current_synchro_data = *d_acquisition_gnss_synchro;
 | 
					            current_synchro_data = *d_acquisition_gnss_synchro;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // Generate local code and carrier replicas (using \hat{f}_d(k-1))
 | 
					 | 
				
			||||||
            update_local_code();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            // UPDATE NCO COMMAND
 | 
					            // UPDATE NCO COMMAND
 | 
				
			||||||
            float phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);
 | 
					            float phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);
 | 
				
			||||||
            //std::cout<<"d_current_prn_length_samples="<<d_current_prn_length_samples<<std::endl;
 | 
					
 | 
				
			||||||
            // perform carrier wipe-off and compute Early, Prompt and Late correlation
 | 
					        	//code resampler on GPU (new)
 | 
				
			||||||
 | 
					            float code_phase_step_chips = static_cast<float>(d_code_freq_chips) / static_cast<float>(d_fs_in);
 | 
				
			||||||
 | 
					            float rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            cudaProfilerStart();
 | 
					            cudaProfilerStart();
 | 
				
			||||||
            multicorrelator_gpu->Carrier_wipeoff_multicorrelator_cuda(
 | 
					            multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda(
 | 
				
			||||||
    				d_corr_outs_gpu,
 | 
					    				d_corr_outs_gpu,
 | 
				
			||||||
    				in,
 | 
					    				in,
 | 
				
			||||||
    				d_local_codes_gpu,
 | 
					 | 
				
			||||||
    				d_rem_carr_phase_rad,
 | 
					    				d_rem_carr_phase_rad,
 | 
				
			||||||
    				phase_step_rad,
 | 
					    				phase_step_rad,
 | 
				
			||||||
    				d_local_code_shift_samples,
 | 
					    				code_phase_step_chips,
 | 
				
			||||||
 | 
					    				rem_code_phase_chips,
 | 
				
			||||||
    				d_current_prn_length_samples,
 | 
					    				d_current_prn_length_samples,
 | 
				
			||||||
    				3);
 | 
					    				3);
 | 
				
			||||||
        	cudaProfilerStop();
 | 
					        	cudaProfilerStop();
 | 
				
			||||||
            //std::cout<<"d_Prompt="<<*d_Prompt<<"d_Early="<<*d_Early<<"d_Late="<<*d_Late<<std::endl;
 | 
					 | 
				
			||||||
            // check for samples consistency (this should be done before in the receiver / here only if the source is a file)
 | 
					 | 
				
			||||||
            if (std::isnan((*d_Prompt).real()) == true or std::isnan((*d_Prompt).imag()) == true ) // or std::isinf(in[i].real())==true or std::isinf(in[i].imag())==true)
 | 
					 | 
				
			||||||
                {
 | 
					 | 
				
			||||||
                    const int samples_available = ninput_items[0];
 | 
					 | 
				
			||||||
                    d_sample_counter = d_sample_counter + samples_available;
 | 
					 | 
				
			||||||
                    LOG(WARNING) << "Detected NaN samples at sample number " << d_sample_counter;
 | 
					 | 
				
			||||||
                    consume_each(samples_available);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    // make an output to not stop the rest of the processing blocks
 | 
					 | 
				
			||||||
                    current_synchro_data.Prompt_I = 0.0;
 | 
					 | 
				
			||||||
                    current_synchro_data.Prompt_Q = 0.0;
 | 
					 | 
				
			||||||
                    current_synchro_data.Tracking_timestamp_secs = static_cast<double>(d_sample_counter) / static_cast<double>(d_fs_in);
 | 
					 | 
				
			||||||
                    current_synchro_data.Carrier_phase_rads = 0.0;
 | 
					 | 
				
			||||||
                    current_synchro_data.Code_phase_secs = 0.0;
 | 
					 | 
				
			||||||
                    current_synchro_data.CN0_dB_hz = 0.0;
 | 
					 | 
				
			||||||
                    current_synchro_data.Flag_valid_tracking = false;
 | 
					 | 
				
			||||||
                    current_synchro_data.Flag_valid_pseudorange = false;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    *out[0] = current_synchro_data;
 | 
					 | 
				
			||||||
                    return 1;
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // ################## PLL ##########################################################
 | 
					            // ################## PLL ##########################################################
 | 
				
			||||||
            // PLL discriminator
 | 
					            // PLL discriminator
 | 
				
			||||||
@@ -444,8 +390,7 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
 | 
				
			|||||||
            T_chip_seconds = 1 / static_cast<double>(d_code_freq_chips);
 | 
					            T_chip_seconds = 1 / static_cast<double>(d_code_freq_chips);
 | 
				
			||||||
            T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
 | 
					            T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
 | 
				
			||||||
            T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
 | 
					            T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
 | 
				
			||||||
            K_blk_samples = T_prn_samples + d_rem_code_phase_samples + code_error_filt_secs * static_cast<double>(d_fs_in);
 | 
					            K_blk_samples = T_prn_samples + d_rem_code_phase_samples + static_cast<double>(code_error_filt_secs) * static_cast<double>(d_fs_in);
 | 
				
			||||||
            d_current_prn_length_samples = round(K_blk_samples); //round to a discrete samples
 | 
					 | 
				
			||||||
            //d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
 | 
					            //d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // ####### CN0 ESTIMATION AND LOCK DETECTORS ######
 | 
					            // ####### CN0 ESTIMATION AND LOCK DETECTORS ######
 | 
				
			||||||
@@ -591,7 +536,8 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                    // carrier and code frequency
 | 
					                    // carrier and code frequency
 | 
				
			||||||
                    d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(float));
 | 
					                    d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(float));
 | 
				
			||||||
                    d_dump_file.write(reinterpret_cast<char*>(&d_code_freq_chips), sizeof(float));
 | 
					                    tmp_float=d_code_freq_chips;
 | 
				
			||||||
 | 
					                    d_dump_file.write(reinterpret_cast<char*>(&tmp_float), sizeof(float));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    //PLL commands
 | 
					                    //PLL commands
 | 
				
			||||||
                    d_dump_file.write(reinterpret_cast<char*>(&carr_error_hz), sizeof(float));
 | 
					                    d_dump_file.write(reinterpret_cast<char*>(&carr_error_hz), sizeof(float));
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -130,7 +130,7 @@ private:
 | 
				
			|||||||
    gr_complex* in_gpu;
 | 
					    gr_complex* in_gpu;
 | 
				
			||||||
    gr_complex* d_carr_sign_gpu;
 | 
					    gr_complex* d_carr_sign_gpu;
 | 
				
			||||||
    gr_complex* d_local_codes_gpu;
 | 
					    gr_complex* d_local_codes_gpu;
 | 
				
			||||||
	int* d_local_code_shift_samples;
 | 
						float* d_local_code_shift_chips;
 | 
				
			||||||
    gr_complex* d_corr_outs_gpu;
 | 
					    gr_complex* d_corr_outs_gpu;
 | 
				
			||||||
    cuda_multicorrelator *multicorrelator_gpu;
 | 
					    cuda_multicorrelator *multicorrelator_gpu;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -33,7 +33,6 @@ if(ENABLE_CUDA)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	SET(LIB_TYPE STATIC) #set the lib type
 | 
						SET(LIB_TYPE STATIC) #set the lib type
 | 
				
			||||||
	CUDA_ADD_LIBRARY(CUDA_CORRELATOR_LIB ${LIB_TYPE} cuda_multicorrelator.h cuda_multicorrelator.cu)
 | 
						CUDA_ADD_LIBRARY(CUDA_CORRELATOR_LIB ${LIB_TYPE} cuda_multicorrelator.h cuda_multicorrelator.cu)
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
endif(ENABLE_CUDA)
 | 
					endif(ENABLE_CUDA)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -53,7 +53,83 @@
 | 
				
			|||||||
#include <helper_cuda.h>
 | 
					#include <helper_cuda.h>
 | 
				
			||||||
#include <helper_functions.h>
 | 
					#include <helper_functions.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define ACCUM_N 1024
 | 
					#define ACCUM_N 256
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__global__ void scalarProdGPUCPXxN_shifts_chips(
 | 
				
			||||||
 | 
					    GPU_Complex *d_corr_out,
 | 
				
			||||||
 | 
					    GPU_Complex *d_sig_in,
 | 
				
			||||||
 | 
					    GPU_Complex *d_local_code_in,
 | 
				
			||||||
 | 
					    float *d_shifts_chips,
 | 
				
			||||||
 | 
					    float code_length_chips,
 | 
				
			||||||
 | 
					    float code_phase_step_chips,
 | 
				
			||||||
 | 
					    float rem_code_phase_chips,
 | 
				
			||||||
 | 
					    int vectorN,
 | 
				
			||||||
 | 
					    int elementN
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    //Accumulators cache
 | 
				
			||||||
 | 
					    __shared__ GPU_Complex accumResult[ACCUM_N];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    // Cycle through every pair of vectors,
 | 
				
			||||||
 | 
					    // taking into account that vector counts can be different
 | 
				
			||||||
 | 
					    // from total number of thread blocks
 | 
				
			||||||
 | 
					    ////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					    for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x)
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					        //int vectorBase = IMUL(elementN, vec);
 | 
				
			||||||
 | 
					        //int vectorEnd  = elementN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					        // Each accumulator cycles through vectors with
 | 
				
			||||||
 | 
					        // stride equal to number of total number of accumulators ACCUM_N
 | 
				
			||||||
 | 
					        // At this stage ACCUM_N is only preferred be a multiple of warp size
 | 
				
			||||||
 | 
					        // to meet memory coalescing alignment constraints.
 | 
				
			||||||
 | 
					        ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					        for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					        	GPU_Complex sum = GPU_Complex(0,0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                //sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos];
 | 
				
			||||||
 | 
					            	//sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos];
 | 
				
			||||||
 | 
					            	//sum.multiply_acc(d_sig_in[pos],d_local_codes_in[pos+d_shifts_samples[vec]]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            	// 1.resample local code for the current shift
 | 
				
			||||||
 | 
					            	float local_code_chip_index= fmod(code_phase_step_chips*(float)pos + d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips);
 | 
				
			||||||
 | 
					            	//TODO: Take into account that in multitap correlators, the shifts can be negative!
 | 
				
			||||||
 | 
					            	if (local_code_chip_index<0.0) local_code_chip_index+=code_length_chips;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            	// 2.correlate
 | 
				
			||||||
 | 
					            	sum.multiply_acc(d_sig_in[pos],d_local_code_in[__float2int_rd(local_code_chip_index)]);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            accumResult[iAccum] = sum;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					        // Perform tree-like reduction of accumulators' results.
 | 
				
			||||||
 | 
					        // ACCUM_N has to be power of two at this stage
 | 
				
			||||||
 | 
					        ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
 | 
					        for (int stride = ACCUM_N / 2; stride > 0; stride >>= 1)
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            __syncthreads();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            for (int iAccum = threadIdx.x; iAccum < stride; iAccum += blockDim.x)
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                accumResult[iAccum] += accumResult[stride + iAccum];
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (threadIdx.x == 0)
 | 
				
			||||||
 | 
					        	{
 | 
				
			||||||
 | 
					        		d_corr_out[vec] = accumResult[0];
 | 
				
			||||||
 | 
					        	}
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
///////////////////////////////////////////////////////////////////////////////
 | 
					///////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
// Calculate scalar products of VectorN vectors of ElementN elements on GPU
 | 
					// Calculate scalar products of VectorN vectors of ElementN elements on GPU
 | 
				
			||||||
@@ -145,8 +221,9 @@ __global__ void scalarProdGPUCPXxN(
 | 
				
			|||||||
    ////////////////////////////////////////////////////////////////////////////
 | 
					    ////////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
    for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x)
 | 
					    for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        int vectorBase = IMUL(elementN, vec);
 | 
					        //int vectorBase = IMUL(elementN, vec);
 | 
				
			||||||
        int vectorEnd  = vectorBase + elementN;
 | 
					        //int vectorEnd  = vectorBase + elementN;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        ////////////////////////////////////////////////////////////////////////
 | 
					        ////////////////////////////////////////////////////////////////////////
 | 
				
			||||||
        // Each accumulator cycles through vectors with
 | 
					        // Each accumulator cycles through vectors with
 | 
				
			||||||
@@ -158,11 +235,13 @@ __global__ void scalarProdGPUCPXxN(
 | 
				
			|||||||
        {
 | 
					        {
 | 
				
			||||||
        	GPU_Complex sum = GPU_Complex(0,0);
 | 
					        	GPU_Complex sum = GPU_Complex(0,0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for (int pos = vectorBase + iAccum; pos < vectorEnd; pos += ACCUM_N)
 | 
					            //for (int pos = vectorBase + iAccum; pos < vectorEnd; pos += ACCUM_N)
 | 
				
			||||||
 | 
					        	for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
 | 
				
			||||||
            {
 | 
					            {
 | 
				
			||||||
                //sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos];
 | 
					                //sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos];
 | 
				
			||||||
            	//sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos];
 | 
					            	//sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos];
 | 
				
			||||||
            	sum.multiply_acc(d_sig_in[pos-vectorBase],d_local_codes_in[pos]);
 | 
					            	//sum.multiply_acc(d_sig_in[pos-vectorBase],d_local_codes_in[pos]);
 | 
				
			||||||
 | 
					        		sum.multiply_acc(d_sig_in[pos],d_local_codes_in[pos]);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            accumResult[iAccum] = sum;
 | 
					            accumResult[iAccum] = sum;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
@@ -200,9 +279,9 @@ __global__ void scalarProdGPUCPXxN(
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
__global__ void CUDA_32fc_x2_multiply_32fc(  GPU_Complex *A,   GPU_Complex  *B, GPU_Complex  *C, int numElements)
 | 
					__global__ void CUDA_32fc_x2_multiply_32fc(  GPU_Complex *A,   GPU_Complex  *B, GPU_Complex  *C, int numElements)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int i = blockDim.x * blockIdx.x + threadIdx.x;
 | 
					    for (int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
 | 
					         i < numElements;
 | 
				
			||||||
    if (i < numElements)
 | 
					         i += blockDim.x * gridDim.x)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        C[i] =  A[i] * B[i];
 | 
					        C[i] =  A[i] * B[i];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -232,10 +311,11 @@ CUDA_32fc_Doppler_wipeoff(  GPU_Complex *sig_out, GPU_Complex *sig_in, float rem
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	// CUDA version of floating point NCO and vector dot product integrated
 | 
						// CUDA version of floating point NCO and vector dot product integrated
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    int i = blockDim.x * blockIdx.x + threadIdx.x;
 | 
					 | 
				
			||||||
    float sin;
 | 
					    float sin;
 | 
				
			||||||
    float cos;
 | 
					    float cos;
 | 
				
			||||||
    if (i < numElements)
 | 
					    for (int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
 | 
					         i < numElements;
 | 
				
			||||||
 | 
					         i += blockDim.x * gridDim.x)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
    	__sincosf(rem_carrier_phase_in_rad + i*phase_step_rad, &sin, &cos);
 | 
					    	__sincosf(rem_carrier_phase_in_rad + i*phase_step_rad, &sin, &cos);
 | 
				
			||||||
    	sig_out[i] =  sig_in[i] * GPU_Complex(cos,-sin);
 | 
					    	sig_out[i] =  sig_in[i] * GPU_Complex(cos,-sin);
 | 
				
			||||||
@@ -252,11 +332,11 @@ CUDA_32fc_Doppler_wipeoff(  GPU_Complex *sig_out, GPU_Complex *sig_in, float rem
 | 
				
			|||||||
__global__ void
 | 
					__global__ void
 | 
				
			||||||
CUDA_32fc_x2_add_32fc(  GPU_Complex *A,   GPU_Complex  *B, GPU_Complex  *C, int numElements)
 | 
					CUDA_32fc_x2_add_32fc(  GPU_Complex *A,   GPU_Complex  *B, GPU_Complex  *C, int numElements)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int i = blockDim.x * blockIdx.x + threadIdx.x;
 | 
					    for (int i = blockIdx.x * blockDim.x + threadIdx.x;
 | 
				
			||||||
 | 
					         i < numElements;
 | 
				
			||||||
    if (i < numElements)
 | 
					         i += blockDim.x * gridDim.x)
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        C[i] =  A[i] * B[i];
 | 
					        C[i] =  A[i] + B[i];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -264,23 +344,53 @@ CUDA_32fc_x2_add_32fc(  GPU_Complex *A,   GPU_Complex  *B, GPU_Complex  *C, int
 | 
				
			|||||||
bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int signal_length_samples, int local_codes_length_samples, int n_correlators)
 | 
					bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int signal_length_samples, int local_codes_length_samples, int n_correlators)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	// use command-line specified CUDA device, otherwise use device with highest Gflops/s
 | 
						// use command-line specified CUDA device, otherwise use device with highest Gflops/s
 | 
				
			||||||
	findCudaDevice(argc, (const char **)argv);
 | 
					//	findCudaDevice(argc, (const char **)argv);
 | 
				
			||||||
 | 
					//      cudaDeviceProp  prop;
 | 
				
			||||||
    cudaDeviceProp  prop;
 | 
					//    int num_devices, device;
 | 
				
			||||||
    int whichDevice;
 | 
					//    cudaGetDeviceCount(&num_devices);
 | 
				
			||||||
    cudaGetDevice( &whichDevice );
 | 
					//    if (num_devices > 1) {
 | 
				
			||||||
    cudaGetDeviceProperties( &prop, whichDevice );
 | 
					//          int max_multiprocessors = 0, max_device = 0;
 | 
				
			||||||
    //debug code
 | 
					//          for (device = 0; device < num_devices; device++) {
 | 
				
			||||||
    if (prop.canMapHostMemory != 1) {
 | 
					//                  cudaDeviceProp properties;
 | 
				
			||||||
        printf( "Device can not map memory.\n" );
 | 
					//                  cudaGetDeviceProperties(&properties, device);
 | 
				
			||||||
    }
 | 
					//                  if (max_multiprocessors < properties.multiProcessorCount) {
 | 
				
			||||||
 | 
					//                          max_multiprocessors = properties.multiProcessorCount;
 | 
				
			||||||
    printf("L2 Cache size= %u \n",prop.l2CacheSize);
 | 
					//                          max_device = device;
 | 
				
			||||||
    printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
 | 
					//                  }
 | 
				
			||||||
    printf("maxGridSize= %i \n",prop.maxGridSize[0]);
 | 
					//                  printf("Found GPU device # %i\n",device);
 | 
				
			||||||
    printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
 | 
					//          }
 | 
				
			||||||
    printf("deviceOverlap= %i \n",prop.deviceOverlap);
 | 
					//          //cudaSetDevice(max_device);
 | 
				
			||||||
    //end debug code
 | 
					//
 | 
				
			||||||
 | 
					//          //set random device!
 | 
				
			||||||
 | 
					//          cudaSetDevice(rand() % num_devices); //generates a random number between 0 and num_devices to split the threads between GPUs
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//          cudaGetDeviceProperties( &prop, max_device );
 | 
				
			||||||
 | 
					//          //debug code
 | 
				
			||||||
 | 
					//          if (prop.canMapHostMemory != 1) {
 | 
				
			||||||
 | 
					//              printf( "Device can not map memory.\n" );
 | 
				
			||||||
 | 
					//          }
 | 
				
			||||||
 | 
					//          printf("L2 Cache size= %u \n",prop.l2CacheSize);
 | 
				
			||||||
 | 
					//          printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
 | 
				
			||||||
 | 
					//          printf("maxGridSize= %i \n",prop.maxGridSize[0]);
 | 
				
			||||||
 | 
					//          printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
 | 
				
			||||||
 | 
					//          printf("deviceOverlap= %i \n",prop.deviceOverlap);
 | 
				
			||||||
 | 
					//  	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
 | 
				
			||||||
 | 
					//    }else{
 | 
				
			||||||
 | 
					//    	    int whichDevice;
 | 
				
			||||||
 | 
					//    	    cudaGetDevice( &whichDevice );
 | 
				
			||||||
 | 
					//    	    cudaGetDeviceProperties( &prop, whichDevice );
 | 
				
			||||||
 | 
					//    	    //debug code
 | 
				
			||||||
 | 
					//    	    if (prop.canMapHostMemory != 1) {
 | 
				
			||||||
 | 
					//    	        printf( "Device can not map memory.\n" );
 | 
				
			||||||
 | 
					//    	    }
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//    	    printf("L2 Cache size= %u \n",prop.l2CacheSize);
 | 
				
			||||||
 | 
					//    	    printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
 | 
				
			||||||
 | 
					//    	    printf("maxGridSize= %i \n",prop.maxGridSize[0]);
 | 
				
			||||||
 | 
					//    	    printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
 | 
				
			||||||
 | 
					//    	    printf("deviceOverlap= %i \n",prop.deviceOverlap);
 | 
				
			||||||
 | 
					//    	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
 | 
				
			||||||
 | 
					//    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	//checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
 | 
						//checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -300,7 +410,7 @@ bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int sign
 | 
				
			|||||||
	// Required: The last correlator tap in d_shifts_samples has the largest sample shift
 | 
						// Required: The last correlator tap in d_shifts_samples has the largest sample shift
 | 
				
			||||||
    size_t size_local_code_bytes = local_codes_length_samples * sizeof(GPU_Complex);
 | 
					    size_t size_local_code_bytes = local_codes_length_samples * sizeof(GPU_Complex);
 | 
				
			||||||
	checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size_local_code_bytes));
 | 
						checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size_local_code_bytes));
 | 
				
			||||||
	checkCudaErrors(cudaMalloc((void **)&d_shifts_samples, size+sizeof(int)*n_correlators));
 | 
						checkCudaErrors(cudaMalloc((void **)&d_shifts_samples, sizeof(int)*n_correlators));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	//scalars
 | 
						//scalars
 | 
				
			||||||
	checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
 | 
						checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
 | 
				
			||||||
@@ -315,6 +425,116 @@ bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int sign
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool cuda_multicorrelator::init_cuda_integrated_resampler(
 | 
				
			||||||
 | 
							const int argc, const char **argv,
 | 
				
			||||||
 | 
							int signal_length_samples,
 | 
				
			||||||
 | 
							int code_length_chips,
 | 
				
			||||||
 | 
							int n_correlators
 | 
				
			||||||
 | 
							)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						// use command-line specified CUDA device, otherwise use device with highest Gflops/s
 | 
				
			||||||
 | 
					//	findCudaDevice(argc, (const char **)argv);
 | 
				
			||||||
 | 
					//      cudaDeviceProp  prop;
 | 
				
			||||||
 | 
					//    int num_devices, device;
 | 
				
			||||||
 | 
					//    cudaGetDeviceCount(&num_devices);
 | 
				
			||||||
 | 
					//    if (num_devices > 1) {
 | 
				
			||||||
 | 
					//          int max_multiprocessors = 0, max_device = 0;
 | 
				
			||||||
 | 
					//          for (device = 0; device < num_devices; device++) {
 | 
				
			||||||
 | 
					//                  cudaDeviceProp properties;
 | 
				
			||||||
 | 
					//                  cudaGetDeviceProperties(&properties, device);
 | 
				
			||||||
 | 
					//                  if (max_multiprocessors < properties.multiProcessorCount) {
 | 
				
			||||||
 | 
					//                          max_multiprocessors = properties.multiProcessorCount;
 | 
				
			||||||
 | 
					//                          max_device = device;
 | 
				
			||||||
 | 
					//                  }
 | 
				
			||||||
 | 
					//                  printf("Found GPU device # %i\n",device);
 | 
				
			||||||
 | 
					//          }
 | 
				
			||||||
 | 
					//          //cudaSetDevice(max_device);
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//          //set random device!
 | 
				
			||||||
 | 
					//          cudaSetDevice(rand() % num_devices); //generates a random number between 0 and num_devices to split the threads between GPUs
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//          cudaGetDeviceProperties( &prop, max_device );
 | 
				
			||||||
 | 
					//          //debug code
 | 
				
			||||||
 | 
					//          if (prop.canMapHostMemory != 1) {
 | 
				
			||||||
 | 
					//              printf( "Device can not map memory.\n" );
 | 
				
			||||||
 | 
					//          }
 | 
				
			||||||
 | 
					//          printf("L2 Cache size= %u \n",prop.l2CacheSize);
 | 
				
			||||||
 | 
					//          printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
 | 
				
			||||||
 | 
					//          printf("maxGridSize= %i \n",prop.maxGridSize[0]);
 | 
				
			||||||
 | 
					//          printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
 | 
				
			||||||
 | 
					//          printf("deviceOverlap= %i \n",prop.deviceOverlap);
 | 
				
			||||||
 | 
					//  	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
 | 
				
			||||||
 | 
					//    }else{
 | 
				
			||||||
 | 
					//    	    int whichDevice;
 | 
				
			||||||
 | 
					//    	    cudaGetDevice( &whichDevice );
 | 
				
			||||||
 | 
					//    	    cudaGetDeviceProperties( &prop, whichDevice );
 | 
				
			||||||
 | 
					//    	    //debug code
 | 
				
			||||||
 | 
					//    	    if (prop.canMapHostMemory != 1) {
 | 
				
			||||||
 | 
					//    	        printf( "Device can not map memory.\n" );
 | 
				
			||||||
 | 
					//    	    }
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//    	    printf("L2 Cache size= %u \n",prop.l2CacheSize);
 | 
				
			||||||
 | 
					//    	    printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
 | 
				
			||||||
 | 
					//    	    printf("maxGridSize= %i \n",prop.maxGridSize[0]);
 | 
				
			||||||
 | 
					//    	    printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
 | 
				
			||||||
 | 
					//    	    printf("deviceOverlap= %i \n",prop.deviceOverlap);
 | 
				
			||||||
 | 
					//    	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
 | 
				
			||||||
 | 
					//    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    size_t size = signal_length_samples * sizeof(GPU_Complex);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						checkCudaErrors(cudaMalloc((void **)&d_sig_in, size));
 | 
				
			||||||
 | 
						checkCudaErrors(cudaMemset(d_sig_in,0,size));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//checkCudaErrors(cudaMalloc((void **)&d_nco_in, size));
 | 
				
			||||||
 | 
						checkCudaErrors(cudaMalloc((void **)&d_sig_doppler_wiped, size));
 | 
				
			||||||
 | 
						checkCudaErrors(cudaMemset(d_sig_doppler_wiped,0,size));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips));
 | 
				
			||||||
 | 
						checkCudaErrors(cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    d_code_length_chips=code_length_chips;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						checkCudaErrors(cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators));
 | 
				
			||||||
 | 
						checkCudaErrors(cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						//scalars
 | 
				
			||||||
 | 
						checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
 | 
				
			||||||
 | 
						checkCudaErrors(cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Launch the Vector Add CUDA Kernel
 | 
				
			||||||
 | 
						threadsPerBlock = 256;
 | 
				
			||||||
 | 
					    blocksPerGrid =(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						cudaStreamCreate (&stream1) ;
 | 
				
			||||||
 | 
						cudaStreamCreate (&stream2) ;
 | 
				
			||||||
 | 
						return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool cuda_multicorrelator::set_local_code_and_taps(
 | 
				
			||||||
 | 
							int code_length_chips,
 | 
				
			||||||
 | 
							const std::complex<float>* local_codes_in,
 | 
				
			||||||
 | 
							float *shifts_chips,
 | 
				
			||||||
 | 
							int n_correlators
 | 
				
			||||||
 | 
							)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    // local code CPU -> GPU copy memory
 | 
				
			||||||
 | 
					    checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1));
 | 
				
			||||||
 | 
					    d_code_length_chips=(float)code_length_chips;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
 | 
				
			||||||
 | 
					    checkCudaErrors(cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
 | 
				
			||||||
 | 
					                                    cudaMemcpyHostToDevice,stream1));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
 | 
					bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
 | 
				
			||||||
		std::complex<float>* corr_out,
 | 
							std::complex<float>* corr_out,
 | 
				
			||||||
		const std::complex<float>* sig_in,
 | 
							const std::complex<float>* sig_in,
 | 
				
			||||||
@@ -396,13 +616,88 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
 | 
				
			|||||||
    return true;
 | 
					    return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
 | 
				
			||||||
 | 
							std::complex<float>* corr_out,
 | 
				
			||||||
 | 
							const std::complex<float>* sig_in,
 | 
				
			||||||
 | 
							float rem_carrier_phase_in_rad,
 | 
				
			||||||
 | 
							float phase_step_rad,
 | 
				
			||||||
 | 
					        float code_phase_step_chips,
 | 
				
			||||||
 | 
					        float rem_code_phase_chips,
 | 
				
			||||||
 | 
							int signal_length_samples,
 | 
				
			||||||
 | 
							int n_correlators)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						size_t memSize = signal_length_samples * sizeof(std::complex<float>);
 | 
				
			||||||
 | 
						// input signal CPU -> GPU copy memory
 | 
				
			||||||
 | 
					    checkCudaErrors(cudaMemcpyAsync(d_sig_in, sig_in, memSize,
 | 
				
			||||||
 | 
					                                    cudaMemcpyHostToDevice, stream2));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //Launch carrier wipe-off kernel here, while local codes are being copied to GPU!
 | 
				
			||||||
 | 
					    checkCudaErrors(cudaStreamSynchronize(stream2));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    CUDA_32fc_Doppler_wipeoff<<<blocksPerGrid, threadsPerBlock,0, stream2>>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //wait for Doppler wipeoff end...
 | 
				
			||||||
 | 
					    checkCudaErrors(cudaStreamSynchronize(stream1));
 | 
				
			||||||
 | 
					    checkCudaErrors(cudaStreamSynchronize(stream2));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //launch the multitap correlator with integrated local code resampler!
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    scalarProdGPUCPXxN_shifts_chips<<<blocksPerGrid, threadsPerBlock,0 ,stream1>>>(
 | 
				
			||||||
 | 
								d_corr_out,
 | 
				
			||||||
 | 
								d_sig_doppler_wiped,
 | 
				
			||||||
 | 
								d_local_codes_in,
 | 
				
			||||||
 | 
								d_shifts_chips,
 | 
				
			||||||
 | 
								d_code_length_chips,
 | 
				
			||||||
 | 
						        code_phase_step_chips,
 | 
				
			||||||
 | 
						        rem_code_phase_chips,
 | 
				
			||||||
 | 
								n_correlators,
 | 
				
			||||||
 | 
								signal_length_samples
 | 
				
			||||||
 | 
							);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    checkCudaErrors(cudaGetLastError());
 | 
				
			||||||
 | 
					    //wait for correlators end...
 | 
				
			||||||
 | 
					    checkCudaErrors(cudaStreamSynchronize(stream1));
 | 
				
			||||||
 | 
					    // Copy the device result vector in device memory to the host result vector
 | 
				
			||||||
 | 
					    // in host memory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    //scalar products (correlators outputs)
 | 
				
			||||||
 | 
					    checkCudaErrors(cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
 | 
				
			||||||
 | 
					            cudaMemcpyDeviceToHost,stream1));
 | 
				
			||||||
 | 
					    checkCudaErrors(cudaStreamSynchronize(stream1));
 | 
				
			||||||
 | 
					    return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cuda_multicorrelator::cuda_multicorrelator()
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						d_sig_in=NULL;
 | 
				
			||||||
 | 
						d_nco_in=NULL;
 | 
				
			||||||
 | 
						d_sig_doppler_wiped=NULL;
 | 
				
			||||||
 | 
						d_local_codes_in=NULL;
 | 
				
			||||||
 | 
						d_shifts_samples=NULL;
 | 
				
			||||||
 | 
						d_shifts_chips=NULL;
 | 
				
			||||||
 | 
						d_corr_out=NULL;
 | 
				
			||||||
 | 
						threadsPerBlock=0;
 | 
				
			||||||
 | 
						blocksPerGrid=0;
 | 
				
			||||||
 | 
						d_code_length_chips=0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool cuda_multicorrelator::free_cuda()
 | 
					bool cuda_multicorrelator::free_cuda()
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	// Free device global memory
 | 
						// Free device global memory
 | 
				
			||||||
	cudaFree(d_sig_in);
 | 
						if (d_sig_in!=NULL) cudaFree(d_sig_in);
 | 
				
			||||||
	//cudaFree(d_nco_in);
 | 
						if (d_nco_in!=NULL) cudaFree(d_nco_in);
 | 
				
			||||||
	cudaFree(d_local_codes_in);
 | 
						if (d_sig_doppler_wiped!=NULL) cudaFree(d_sig_doppler_wiped);
 | 
				
			||||||
	cudaFree(d_corr_out);
 | 
						if (d_local_codes_in!=NULL) cudaFree(d_local_codes_in);
 | 
				
			||||||
 | 
						if (d_corr_out!=NULL) cudaFree(d_corr_out);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (d_shifts_samples!=NULL) cudaFree(d_shifts_samples);
 | 
				
			||||||
 | 
						if (d_shifts_chips!=NULL) cudaFree(d_shifts_chips);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	cudaStreamDestroy(stream1) ;
 | 
						cudaStreamDestroy(stream1) ;
 | 
				
			||||||
	cudaStreamDestroy(stream2) ;
 | 
						cudaStreamDestroy(stream2) ;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -113,8 +113,20 @@ struct GPU_Complex_Short {
 | 
				
			|||||||
class cuda_multicorrelator
 | 
					class cuda_multicorrelator
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
 | 
						cuda_multicorrelator();
 | 
				
			||||||
	bool init_cuda(const int argc, const char **argv, int signal_length_samples, int local_codes_length_samples, int n_correlators);
 | 
						bool init_cuda(const int argc, const char **argv, int signal_length_samples, int local_codes_length_samples, int n_correlators);
 | 
				
			||||||
 | 
						bool init_cuda_integrated_resampler(
 | 
				
			||||||
 | 
								const int argc, const char **argv,
 | 
				
			||||||
 | 
								int signal_length_samples,
 | 
				
			||||||
 | 
								int code_length_chips,
 | 
				
			||||||
 | 
								int n_correlators
 | 
				
			||||||
 | 
								);
 | 
				
			||||||
 | 
						bool set_local_code_and_taps(
 | 
				
			||||||
 | 
								int code_length_chips,
 | 
				
			||||||
 | 
								const std::complex<float>* local_codes_in,
 | 
				
			||||||
 | 
								float *shifts_chips,
 | 
				
			||||||
 | 
								int n_correlators
 | 
				
			||||||
 | 
								);
 | 
				
			||||||
	bool free_cuda();
 | 
						bool free_cuda();
 | 
				
			||||||
	bool Carrier_wipeoff_multicorrelator_cuda(
 | 
						bool Carrier_wipeoff_multicorrelator_cuda(
 | 
				
			||||||
			std::complex<float>* corr_out,
 | 
								std::complex<float>* corr_out,
 | 
				
			||||||
@@ -125,6 +137,15 @@ public:
 | 
				
			|||||||
			const int *shifts_samples,
 | 
								const int *shifts_samples,
 | 
				
			||||||
			int signal_length_samples,
 | 
								int signal_length_samples,
 | 
				
			||||||
			int n_correlators);
 | 
								int n_correlators);
 | 
				
			||||||
 | 
						bool Carrier_wipeoff_multicorrelator_resampler_cuda(
 | 
				
			||||||
 | 
								std::complex<float>* corr_out,
 | 
				
			||||||
 | 
								const std::complex<float>* sig_in,
 | 
				
			||||||
 | 
								float rem_carrier_phase_in_rad,
 | 
				
			||||||
 | 
								float phase_step_rad,
 | 
				
			||||||
 | 
						        float code_phase_step_chips,
 | 
				
			||||||
 | 
						        float rem_code_phase_chips,
 | 
				
			||||||
 | 
								int signal_length_samples,
 | 
				
			||||||
 | 
								int n_correlators);
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
	// Allocate the device input vectors
 | 
						// Allocate the device input vectors
 | 
				
			||||||
	GPU_Complex *d_sig_in;
 | 
						GPU_Complex *d_sig_in;
 | 
				
			||||||
@@ -133,6 +154,9 @@ private:
 | 
				
			|||||||
	GPU_Complex *d_local_codes_in;
 | 
						GPU_Complex *d_local_codes_in;
 | 
				
			||||||
	GPU_Complex *d_corr_out;
 | 
						GPU_Complex *d_corr_out;
 | 
				
			||||||
	int *d_shifts_samples;
 | 
						int *d_shifts_samples;
 | 
				
			||||||
 | 
						float *d_shifts_chips;
 | 
				
			||||||
 | 
						float d_code_length_chips;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	int threadsPerBlock;
 | 
						int threadsPerBlock;
 | 
				
			||||||
	int blocksPerGrid;
 | 
						int blocksPerGrid;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -94,7 +94,7 @@ Tracking_2nd_PLL_filter::Tracking_2nd_PLL_filter ()
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
    //--- PLL variables --------------------------------------------------------
 | 
					    //--- PLL variables --------------------------------------------------------
 | 
				
			||||||
    d_pdi_carr = 0.001;// Summation interval for carrier
 | 
					    d_pdi_carr = 0.001;// Summation interval for carrier
 | 
				
			||||||
    d_plldampingratio = 0.65;
 | 
					    d_plldampingratio = 0.7;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -18,6 +18,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
if(ENABLE_CUDA)
 | 
					if(ENABLE_CUDA)
 | 
				
			||||||
	FIND_PACKAGE(CUDA REQUIRED)
 | 
						FIND_PACKAGE(CUDA REQUIRED)
 | 
				
			||||||
 | 
						add_definitions(-DCUDA_GPU_ACCEL=1)
 | 
				
			||||||
endif(ENABLE_CUDA)
 | 
					endif(ENABLE_CUDA)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
set(GNSS_RECEIVER_SOURCES
 | 
					set(GNSS_RECEIVER_SOURCES
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1610,12 +1610,14 @@ std::unique_ptr<TrackingInterface> GNSSBlockFactory::GetTrkBlock(
 | 
				
			|||||||
                    out_streams, queue));
 | 
					                    out_streams, queue));
 | 
				
			||||||
            block = std::move(block_);
 | 
					            block = std::move(block_);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					#if CUDA_GPU_ACCEL
 | 
				
			||||||
    else if (implementation.compare("GPS_L1_CA_DLL_PLL_Tracking_GPU") == 0)
 | 
					    else if (implementation.compare("GPS_L1_CA_DLL_PLL_Tracking_GPU") == 0)
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            std::unique_ptr<TrackingInterface> block_(new GpsL1CaDllPllTrackingGPU(configuration.get(), role, in_streams,
 | 
					            std::unique_ptr<TrackingInterface> block_(new GpsL1CaDllPllTrackingGPU(configuration.get(), role, in_streams,
 | 
				
			||||||
                    out_streams, queue));
 | 
					                    out_streams, queue));
 | 
				
			||||||
            block = std::move(block_);
 | 
					            block = std::move(block_);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
    else
 | 
					    else
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            // Log fatal. This causes execution to stop.
 | 
					            // Log fatal. This causes execution to stop.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -33,6 +33,12 @@ if(ENABLE_UHD)
 | 
				
			|||||||
    set(GNSS_SDR_OPTIONAL_HEADERS ${GNSS_SDR_OPTIONAL_HEADERS} ${UHD_INCLUDE_DIRS})
 | 
					    set(GNSS_SDR_OPTIONAL_HEADERS ${GNSS_SDR_OPTIONAL_HEADERS} ${UHD_INCLUDE_DIRS})
 | 
				
			||||||
endif(ENABLE_UHD)
 | 
					endif(ENABLE_UHD)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if(ENABLE_CUDA)
 | 
				
			||||||
 | 
						FIND_PACKAGE(CUDA REQUIRED)
 | 
				
			||||||
 | 
					    add_definitions(-DCUDA_GPU_ACCEL=1)
 | 
				
			||||||
 | 
					endif(ENABLE_CUDA)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
include_directories(
 | 
					include_directories(
 | 
				
			||||||
     ${CMAKE_SOURCE_DIR}/src/core/system_parameters
 | 
					     ${CMAKE_SOURCE_DIR}/src/core/system_parameters
 | 
				
			||||||
     ${CMAKE_SOURCE_DIR}/src/core/interfaces
 | 
					     ${CMAKE_SOURCE_DIR}/src/core/interfaces
 | 
				
			||||||
@@ -48,6 +54,7 @@ include_directories(
 | 
				
			|||||||
     ${GNURADIO_RUNTIME_INCLUDE_DIRS}
 | 
					     ${GNURADIO_RUNTIME_INCLUDE_DIRS}
 | 
				
			||||||
     ${GNSS_SDR_OPTIONAL_HEADERS}
 | 
					     ${GNSS_SDR_OPTIONAL_HEADERS}
 | 
				
			||||||
     ${VOLK_GNSSSDR_INCLUDE_DIRS}
 | 
					     ${VOLK_GNSSSDR_INCLUDE_DIRS}
 | 
				
			||||||
 | 
					     ${CUDA_INCLUDE_DIRS}
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
add_definitions( -DGNSS_SDR_VERSION="${VERSION}" )
 | 
					add_definitions( -DGNSS_SDR_VERSION="${VERSION}" )
 | 
				
			||||||
@@ -79,6 +86,7 @@ target_link_libraries(gnss-sdr ${MAC_LIBRARIES}
 | 
				
			|||||||
                               ${GNSS_SDR_OPTIONAL_LIBS}
 | 
					                               ${GNSS_SDR_OPTIONAL_LIBS}
 | 
				
			||||||
                               gnss_sp_libs
 | 
					                               gnss_sp_libs
 | 
				
			||||||
                               gnss_rx
 | 
					                               gnss_rx
 | 
				
			||||||
 | 
					                               ${CUDA_LIBRARIES}
 | 
				
			||||||
                               )
 | 
					                               )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -68,6 +68,11 @@
 | 
				
			|||||||
#include "sbas_ephemeris.h"
 | 
					#include "sbas_ephemeris.h"
 | 
				
			||||||
#include "sbas_time.h"
 | 
					#include "sbas_time.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#if CUDA_GPU_ACCEL
 | 
				
			||||||
 | 
						// For the CUDA runtime routines (prefixed with "cuda_")
 | 
				
			||||||
 | 
						#include <cuda_runtime.h>
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
using google::LogMessage;
 | 
					using google::LogMessage;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -143,6 +148,17 @@ int main(int argc, char** argv)
 | 
				
			|||||||
    google::ParseCommandLineFlags(&argc, &argv, true);
 | 
					    google::ParseCommandLineFlags(&argc, &argv, true);
 | 
				
			||||||
    std::cout << "Initializing GNSS-SDR v" << gnss_sdr_version << " ... Please wait." << std::endl;
 | 
					    std::cout << "Initializing GNSS-SDR v" << gnss_sdr_version << " ... Please wait." << std::endl;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						#if CUDA_GPU_ACCEL
 | 
				
			||||||
 | 
							// Reset the device
 | 
				
			||||||
 | 
							// cudaDeviceReset causes the driver to clean up all state. While
 | 
				
			||||||
 | 
							// not mandatory in normal operation, it is good practice.  It is also
 | 
				
			||||||
 | 
							// needed to ensure correct operation when the application is being
 | 
				
			||||||
 | 
							// profiled. Calling cudaDeviceReset causes all profile data to be
 | 
				
			||||||
 | 
							// flushed before the application exits
 | 
				
			||||||
 | 
							cudaDeviceReset();
 | 
				
			||||||
 | 
							 std::cout << "Reset CUDA device done " << std::endl;
 | 
				
			||||||
 | 
						#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(GOOGLE_STRIP_LOG == 0)
 | 
					    if(GOOGLE_STRIP_LOG == 0)
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            google::InitGoogleLogging(argv[0]);
 | 
					            google::InitGoogleLogging(argv[0]);
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user