1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-12-13 19:50:34 +00:00

Merge branch 'next' of https://github.com/Arribas/gnss-sdr into next

Working with GPUs
This commit is contained in:
Carles Fernandez 2015-08-25 15:54:02 +02:00
commit e38cb40d4f
30 changed files with 8089 additions and 25 deletions

View File

@ -955,6 +955,20 @@ else(ENABLE_OSMOSDR)
message(STATUS "Enable it with 'cmake -DENABLE_OSMOSDR=ON ../' to add support for OsmoSDR and other front-ends (HackRF, bladeRF, Realtek's RTL2832U-based USB dongles, etc.)" )
endif(ENABLE_OSMOSDR)
if($ENV{CUDA_GPU_ACCEL})
message(STATUS "CUDA_GPU_ACCEL environment variable found." )
set(ENABLE_CUDA ON)
endif($ENV{CUDA_GPU_ACCEL})
if(ENABLE_CUDA)
message(STATUS "NVIDIA CUDA GPU Acceleration will be enabled." )
message(STATUS "You can disable it with 'cmake -DENABLE_CUDA=OFF ../'" )
else(ENABLE_CUDA)
message(STATUS "NVIDIA CUDA GPU Acceleration will is not enabled." )
message(STATUS "Enable it with 'cmake -DENABLE_CUDA=ON ../' to add support for the Teleorbit Flexiband front-end." )
endif(ENABLE_CUDA)
if($ENV{FLEXIBAND_DRIVER})
message(STATUS "FLEXIBAND_DRIVER environment variable found." )
set(ENABLE_FLEXIBAND ON)

View File

@ -0,0 +1,305 @@
; Default configuration file
; You can define your own receiver and invoke it by doing
; gnss-sdr --config_file=my_GNSS_SDR_configuration.conf
;
[GNSS-SDR]
;######### GLOBAL OPTIONS ##################
;internal_fs_hz: Internal signal sampling frequency after the signal conditioning stage [Hz].
GNSS-SDR.internal_fs_hz=4000000
;######### CONTROL_THREAD CONFIG ############
ControlThread.wait_for_flowgraph=false
;######### SIGNAL_SOURCE CONFIG ############
;#implementation: Use [File_Signal_Source] or [UHD_Signal_Source] or [GN3S_Signal_Source] (experimental)
SignalSource.implementation=File_Signal_Source
;#filename: path to file with the captured GNSS signal samples to be processed
SignalSource.filename=/home/javier/signals/4msps.dat
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
SignalSource.item_type=gr_complex
;#sampling_frequency: Original Signal sampling frequency in [Hz]
SignalSource.sampling_frequency=4000000
;#freq: RF front-end center frequency in [Hz]
SignalSource.freq=1575420000
;#gain: Front-end Gain in [dB]
SignalSource.gain=60
;#subdevice: UHD subdevice specification (for USRP1 use A:0 or B:0)
SignalSource.subdevice=B:0
;#samples: Number of samples to be processed. Notice that 0 indicates the entire file.
SignalSource.samples=0
;#repeat: Repeat the processing file. Disable this option in this version
SignalSource.repeat=false
;#dump: Dump the Signal source data to a file. Disable this option in this version
SignalSource.dump=false
SignalSource.dump_filename=../data/signal_source.dat
;#enable_throttle_control: Enabling this option tells the signal source to keep the delay between samples in post processing.
; it helps to not overload the CPU, but the processing time will be longer.
SignalSource.enable_throttle_control=false
;######### SIGNAL_CONDITIONER CONFIG ############
;## It holds blocks to change data type, filter and resample input data.
;#implementation: Use [Pass_Through] or [Signal_Conditioner]
;#[Pass_Through] disables this block and the [DataTypeAdapter], [InputFilter] and [Resampler] blocks
;#[Signal_Conditioner] enables this block. Then you have to configure [DataTypeAdapter], [InputFilter] and [Resampler] blocks
;SignalConditioner.implementation=Signal_Conditioner
SignalConditioner.implementation=Pass_Through
;######### DATA_TYPE_ADAPTER CONFIG ############
;## Changes the type of input data. Please disable it in this version.
;#implementation: [Pass_Through] disables this block
DataTypeAdapter.implementation=Pass_Through
;######### INPUT_FILTER CONFIG ############
;## Filter the input data. Can be combined with frequency translation for IF signals
;#implementation: Use [Pass_Through] or [Fir_Filter] or [Freq_Xlating_Fir_Filter]
;#[Pass_Through] disables this block
;#[Fir_Filter] enables a FIR Filter
;#[Freq_Xlating_Fir_Filter] enables FIR filter and a composite frequency translation that shifts IF down to zero Hz.
;InputFilter.implementation=Fir_Filter
;InputFilter.implementation=Freq_Xlating_Fir_Filter
InputFilter.implementation=Pass_Through
;#dump: Dump the filtered data to a file.
InputFilter.dump=false
;#dump_filename: Log path and filename.
InputFilter.dump_filename=../data/input_filter.dat
;#The following options are used in the filter design of Fir_Filter and Freq_Xlating_Fir_Filter implementation.
;#These options are based on parameters of gnuradio's function: gr_remez.
;#These function calculates the optimal (in the Chebyshev/minimax sense) FIR filter inpulse reponse given a set of band edges, the desired reponse on those bands, and the weight given to the error in those bands.
;#input_item_type: Type and resolution for input signal samples. Use only gr_complex in this version.
InputFilter.input_item_type=gr_complex
;#outut_item_type: Type and resolution for output filtered signal samples. Use only gr_complex in this version.
InputFilter.output_item_type=gr_complex
;#taps_item_type: Type and resolution for the taps of the filter. Use only float in this version.
InputFilter.taps_item_type=float
;#number_of_taps: Number of taps in the filter. Increasing this parameter increases the processing time
InputFilter.number_of_taps=5
;#number_of _bands: Number of frequency bands in the filter.
InputFilter.number_of_bands=2
;#bands: frequency at the band edges [ b1 e1 b2 e2 b3 e3 ...].
;#Frequency is in the range [0, 1], with 1 being the Nyquist frequency (Fs/2)
;#The number of band_begin and band_end elements must match the number of bands
InputFilter.band1_begin=0.0
InputFilter.band1_end=0.45
InputFilter.band2_begin=0.55
InputFilter.band2_end=1.0
;#ampl: desired amplitude at the band edges [ a(b1) a(e1) a(b2) a(e2) ...].
;#The number of ampl_begin and ampl_end elements must match the number of bands
InputFilter.ampl1_begin=1.0
InputFilter.ampl1_end=1.0
InputFilter.ampl2_begin=0.0
InputFilter.ampl2_end=0.0
;#band_error: weighting applied to each band (usually 1).
;#The number of band_error elements must match the number of bands
InputFilter.band1_error=1.0
InputFilter.band2_error=1.0
;#filter_type: one of "bandpass", "hilbert" or "differentiator"
InputFilter.filter_type=bandpass
;#grid_density: determines how accurately the filter will be constructed.
;The minimum value is 16; higher values are slower to compute the filter.
InputFilter.grid_density=16
;#The following options are used only in Freq_Xlating_Fir_Filter implementation.
;#InputFilter.IF is the intermediate frequency (in Hz) shifted down to zero Hz
InputFilter.sampling_frequency=4000000
InputFilter.IF=0
;######### RESAMPLER CONFIG ############
;## Resamples the input data.
;#implementation: Use [Pass_Through] or [Direct_Resampler]
;#[Pass_Through] disables this block
;#[Direct_Resampler] enables a resampler that implements a nearest neigbourhood interpolation
;Resampler.implementation=Direct_Resampler
Resampler.implementation=Pass_Through
;#dump: Dump the resamplered data to a file.
Resampler.dump=false
;#dump_filename: Log path and filename.
Resampler.dump_filename=../data/resampler.dat
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
Resampler.item_type=gr_complex
;#sample_freq_in: the sample frequency of the input signal
Resampler.sample_freq_in=8000000
;#sample_freq_out: the desired sample frequency of the output signal
Resampler.sample_freq_out=4000000
;######### CHANNELS GLOBAL CONFIG ############
;#count: Number of available GPS satellite channels.
Channels_GPS.count=1
;#count: Number of available Galileo satellite channels.
Channels_Galileo.count=0
;#in_acquisition: Number of channels simultaneously acquiring for the whole receiver
Channels.in_acquisition=1
;#system: GPS, GLONASS, GALILEO, SBAS or COMPASS
;#if the option is disabled by default is assigned GPS
Channel.system=GPS
;#if the option is disabled by default is assigned "1C" GPS L1 C/A
Channel.signal=1C
;######### SPECIFIC CHANNELS CONFIG ######
;#The following options are specific to each channel and overwrite the generic options
;######### CHANNEL 0 CONFIG ############
;Channel0.system=GPS
;Channel0.signal=1C
;#satellite: Satellite PRN ID for this channel. Disable this option to random search
;Channel0.satellite=11
;######### CHANNEL 1 CONFIG ############
;Channel1.system=GPS
;Channel1.signal=1C
;Channel1.satellite=18
;######### ACQUISITION GLOBAL CONFIG ############
;#dump: Enable or disable the acquisition internal data file logging [true] or [false]
Acquisition_GPS.dump=false
;#filename: Log path and filename
Acquisition_GPS.dump_filename=./acq_dump.dat
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
Acquisition_GPS.item_type=gr_complex
;#if: Signal intermediate frequency in [Hz]
Acquisition_GPS.if=0
;#sampled_ms: Signal block duration for the acquisition signal detection [ms]
Acquisition_GPS.sampled_ms=1
;#implementation: Acquisition algorithm selection for this channel: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition]
Acquisition_GPS.implementation=GPS_L1_CA_PCPS_Acquisition
;#threshold: Acquisition threshold
Acquisition_GPS.threshold=0.005
;#pfa: Acquisition false alarm probability. This option overrides the threshold option. Only use with implementations: [GPS_L1_CA_PCPS_Acquisition] or [Galileo_E1_PCPS_Ambiguous_Acquisition]
;Acquisition_GPS.pfa=0.01
;#doppler_max: Maximum expected Doppler shift [Hz]
Acquisition_GPS.doppler_max=10000
;#doppler_max: Doppler step in the grid search [Hz]
Acquisition_GPS.doppler_step=500
;######### TRACKING GLOBAL CONFIG ############
;#implementation: Selected tracking algorithm: [GPS_L1_CA_DLL_PLL_Tracking] or [GPS_L1_CA_DLL_FLL_PLL_Tracking] or [GPS_L1_CA_TCP_CONNECTOR_Tracking] or [Galileo_E1_DLL_PLL_VEML_Tracking]
Tracking_GPS.implementation=GPS_L1_CA_DLL_PLL_Tracking_GPU
;#item_type: Type and resolution for each of the signal samples. Use only [gr_complex] in this version.
Tracking_GPS.item_type=gr_complex
;#sampling_frequency: Signal Intermediate Frequency in [Hz]
Tracking_GPS.if=0
;#dump: Enable or disable the Tracking internal binary data file logging [true] or [false]
Tracking_GPS.dump=true
;#dump_filename: Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
Tracking_GPS.dump_filename=../data/epl_tracking_ch_
;#pll_bw_hz: PLL loop filter bandwidth [Hz]
Tracking_GPS.pll_bw_hz=55.0;
;#dll_bw_hz: DLL loop filter bandwidth [Hz]
Tracking_GPS.dll_bw_hz=1.5
;#fll_bw_hz: FLL loop filter bandwidth [Hz]
Tracking_GPS.fll_bw_hz=10.0;
;#order: PLL/DLL loop filter order [2] or [3]
Tracking_GPS.order=3;
;######### TELEMETRY DECODER GPS CONFIG ############
;#implementation: Use [GPS_L1_CA_Telemetry_Decoder] for GPS L1 C/A
TelemetryDecoder_GPS.implementation=GPS_L1_CA_Telemetry_Decoder
TelemetryDecoder_GPS.dump=false
;#decimation factor
TelemetryDecoder_GPS.decimation_factor=1;
;######### OBSERVABLES CONFIG ############
;#implementation: Use [GPS_L1_CA_Observables] for GPS L1 C/A.
Observables.implementation=GPS_L1_CA_Observables
;#dump: Enable or disable the Observables internal binary data file logging [true] or [false]
Observables.dump=false
;#dump_filename: Log path and filename.
Observables.dump_filename=./observables.dat
;######### PVT CONFIG ############
;#implementation: Position Velocity and Time (PVT) implementation algorithm: Use [GPS_L1_CA_PVT] in this version.
PVT.implementation=GPS_L1_CA_PVT
;#averaging_depth: Number of PVT observations in the moving average algorithm
PVT.averaging_depth=100
;#flag_average: Enables the PVT averaging between output intervals (arithmetic mean) [true] or [false]
PVT.flag_averaging=false
;#output_rate_ms: Period between two PVT outputs. Notice that the minimum period is equal to the tracking integration time (for GPS CA L1 is 1ms) [ms]
PVT.output_rate_ms=10
;#display_rate_ms: Position console print (std::out) interval [ms]. Notice that output_rate_ms<=display_rate_ms.
PVT.display_rate_ms=500
;# RINEX, KML, and NMEA output configuration
;#dump_filename: Log path and filename without extension. Notice that PVT will add ".dat" to the binary dump and ".kml" to GoogleEarth dump.
PVT.dump_filename=./PVT
;#nmea_dump_filename: NMEA log path and filename
PVT.nmea_dump_filename=./gnss_sdr_pvt.nmea;
;#flag_nmea_tty_port: Enable or disable the NMEA log to a serial TTY port (Can be used with real hardware or virtual one)
PVT.flag_nmea_tty_port=false;
;#nmea_dump_devname: serial device descriptor for NMEA logging
PVT.nmea_dump_devname=/dev/pts/4
;#dump: Enable or disable the PVT internal binary data file logging [true] or [false]
PVT.dump=false
;######### OUTPUT_FILTER CONFIG ############
;# Receiver output filter: Leave this block disabled in this version
OutputFilter.implementation=Null_Sink_Output_Filter
OutputFilter.filename=data/gnss-sdr.dat
OutputFilter.item_type=gr_complex

View File

@ -29,13 +29,13 @@ GNSS-SDR.SUPL_CI=0x31b0
SignalSource.implementation=Flexiband_Signal_Source
SignalSource.flag_read_file=true
SignalSource.signal_file=/datalogger/captures/eclipse/eclipse_IIIa_2.bin
SignalSource.signal_file=/datalogger/L125_III1b_210s.usb
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
SignalSource.item_type=gr_complex
;# FPGA firmware file
SignalSource.firmware_file=flexiband_III-1a.bit
SignalSource.firmware_file=flexiband_III-1b.bit
;#RF_channels: Number of RF channels present in the frontend device, must agree the FPGA firmware file
SignalSource.RF_channels=1

View File

@ -28,9 +28,9 @@ GNSS-SDR.SUPL_CI=0x31b0
;#implementation: Use [File_Signal_Source] or [UHD_Signal_Source] or [GN3S_Signal_Source] (experimental)
SignalSource.implementation=Flexiband_Signal_Source
SignalSource.flag_read_file=false
#SignalSource.signal_file=/datalogger/signals/Fraunhofer/L125_III1b_210s.usb
SignalSource.signal_file=/datalogger/captures/flexiband_III_1b_cap1.usb
SignalSource.flag_read_file=true
SignalSource.signal_file=/datalogger/L125_III1b_210s.usb
#SignalSource.signal_file=/datalogger/captures/flexiband_III_1b_cap1.usb
;#item_type: Type and resolution for each of the signal samples. Use only gr_complex in this version.
SignalSource.item_type=gr_complex
@ -136,8 +136,8 @@ InputFilter0.grid_density=16
InputFilter0.sampling_frequency=20000000
;# IF deviation due to front-end LO inaccuracies [HZ]
;# WARNING: Fraunhofer front-end hardwareconfigurations can difer. Signals available on http://www.iis.fraunhofer.de/de/ff/lok/leist/test/flexiband.html are centered on 0 Hz, ALL BANDS.
InputFilter0.IF=-205000
;#InputFilter0.IF=0
;#InputFilter0.IF=-205000
InputFilter0.IF=0
;# Decimation factor after the frequency tranaslating block
InputFilter0.decimation_factor=8
@ -230,8 +230,8 @@ InputFilter1.grid_density=16
InputFilter1.sampling_frequency=20000000
;# IF deviation due to front-end LO inaccuracies [HZ]
;# WARNING: Fraunhofer front-end hardwareconfigurations can difer. Signals available on http://www.iis.fraunhofer.de/de/ff/lok/leist/test/flexiband.html are centered on 0 Hz, ALL BANDS.
InputFilter1.IF=100000
;#InputFilter1.IF=0
;#InputFilter1.IF=100000
InputFilter1.IF=0
;# Decimation factor after the frequency tranaslating block
InputFilter1.decimation_factor=8
@ -272,7 +272,7 @@ Resampler2.implementation=Pass_Through
;#count: Number of available GPS satellite channels.
Channels_1C.count=8
Channels_1B.count=1
Channels_2S.count=8
Channels_2S.count=1
;#count: Number of available Galileo satellite channels.
;Channels_Galileo.count=0
;#in_acquisition: Number of channels simultaneously acquiring for the whole receiver
@ -378,13 +378,13 @@ Acquisition_1C.max_dwells=1
;#implementation: Selected tracking algorithm: [GPS_L1_CA_DLL_PLL_Tracking] or [GPS_L1_CA_DLL_FLL_PLL_Tracking]
Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_Tracking
Tracking_1C.implementation=GPS_L1_CA_DLL_PLL_Tracking_GPU
Tracking_1C.item_type=gr_complex
Tracking_1C.if=0
Tracking_1C.dump=true
Tracking_1C.dump_filename=./tracking_ch_
Tracking_1C.dump=false
Tracking_1C.dump_filename=../data/epl_tracking_ch_
Tracking_1C.pll_bw_hz=40.0;
Tracking_1C.dll_bw_hz=3.0;
Tracking_1C.dll_bw_hz=1.5;
Tracking_1C.fll_bw_hz=10.0;
Tracking_1C.order=3;
Tracking_1C.early_late_space_chips=0.5;
@ -405,7 +405,7 @@ Acquisition_2S.max_dwells=1
Tracking_2S.implementation=GPS_L2_M_DLL_PLL_Tracking
Tracking_2S.item_type=gr_complex
Tracking_2S.if=0
Tracking_2S.dump=true
Tracking_2S.dump=false
Tracking_2S.dump_filename=./tracking_ch_
Tracking_2S.pll_bw_hz=1.5;
Tracking_2S.dll_bw_hz=0.3;
@ -447,7 +447,7 @@ Tracking_1B.item_type=gr_complex
Tracking_1B.if=0
;#dump: Enable or disable the Tracking internal binary data file logging [true] or [false]
Tracking_1B.dump=true
Tracking_1B.dump=false
;#dump_filename: Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
Tracking_1B.dump_filename=./veml_tracking_ch_
@ -497,7 +497,7 @@ TelemetryDecoder_1B.decimation_factor=5;
Observables.implementation=Mixed_Observables
;#dump: Enable or disable the Observables internal binary data file logging [true] or [false]
Observables.dump=true
Observables.dump=false
;#dump_filename: Log path and filename.
Observables.dump_filename=./observables.dat

View File

@ -135,7 +135,8 @@ InputFilter0.grid_density=16
; i.e. using front-end-cal as reported here:http://www.cttc.es/publication/turning-a-television-into-a-gnss-receiver/
InputFilter0.sampling_frequency=20000000
;# IF deviation due to front-end LO inaccuracies [HZ]
InputFilter0.IF=-205000
;#InputFilter0.IF=-205000
InputFilter0.IF=0
;# Decimation factor after the frequency tranaslating block
InputFilter0.decimation_factor=4

View File

@ -58,7 +58,7 @@ if(ENABLE_FLEXIBAND)
if(OS_IS_MACOSX)
set(MACOSX_ARGS "-DCMAKE_CXX_COMPILER=/usr/bin/clang++")
endif(OS_IS_MACOSX)
find_package(teleorbit REQUIRED)
find_package(Teleorbit REQUIRED)
if(NOT TELEORBIT_FOUND)
message(FATAL_ERROR "Teleorbit Flexiband GNURadio driver required to build gnss-sdr with the optional FLEXIBAND adapter")
endif(NOT TELEORBIT_FOUND)

View File

@ -16,6 +16,10 @@
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
if(ENABLE_CUDA)
FIND_PACKAGE(CUDA REQUIRED)
set(OPT_TRACKING_ADAPTERS ${OPT_TRACKING_ADAPTERS} gps_l1_ca_dll_pll_tracking_gpu.cc)
endif(ENABLE_CUDA)
set(TRACKING_ADAPTER_SOURCES
galileo_e1_dll_pll_veml_tracking.cc
@ -27,6 +31,7 @@ set(TRACKING_ADAPTER_SOURCES
gps_l1_ca_tcp_connector_tracking.cc
galileo_e5a_dll_pll_tracking.cc
gps_l2_m_dll_pll_tracking.cc
${OPT_TRACKING_ADAPTERS}
)
include_directories(
@ -40,6 +45,7 @@ include_directories(
${GLOG_INCLUDE_DIRS}
${GFlags_INCLUDE_DIRS}
${GNURADIO_RUNTIME_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
)
file(GLOB TRACKING_ADAPTER_HEADERS "*.h")

View File

@ -0,0 +1,158 @@
/*!
* \file gps_l1_ca_dll_pll_tracking_gpu.cc
* \brief Implementation of an adapter of a DLL+PLL tracking loop block using GPU accelerated functions
* for GPS L1 C/A to a TrackingInterface
* \author Javier Arribas, 2015. jarribas(at)cttc.es
*
* Code DLL + carrier PLL according to the algorithms described in:
* K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
* Approach, Birkhauser, 2007
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#include "gps_l1_ca_dll_pll_tracking_gpu.h"
#include <glog/logging.h>
#include "GPS_L1_CA.h"
#include "configuration_interface.h"
using google::LogMessage;
GpsL1CaDllPllTrackingGPU::GpsL1CaDllPllTrackingGPU(
ConfigurationInterface* configuration, std::string role,
unsigned int in_streams, unsigned int out_streams,
boost::shared_ptr<gr::msg_queue> queue) :
role_(role), in_streams_(in_streams), out_streams_(out_streams),
queue_(queue)
{
DLOG(INFO) << "role " << role;
//################# CONFIGURATION PARAMETERS ########################
int fs_in;
int vector_length;
int f_if;
bool dump;
std::string dump_filename;
std::string item_type;
std::string default_item_type = "gr_complex";
float pll_bw_hz;
float dll_bw_hz;
float early_late_space_chips;
item_type = configuration->property(role + ".item_type", default_item_type);
//vector_length = configuration->property(role + ".vector_length", 2048);
fs_in = configuration->property("GNSS-SDR.internal_fs_hz", 2048000);
f_if = configuration->property(role + ".if", 0);
dump = configuration->property(role + ".dump", false);
pll_bw_hz = configuration->property(role + ".pll_bw_hz", 50.0);
dll_bw_hz = configuration->property(role + ".dll_bw_hz", 2.0);
early_late_space_chips = configuration->property(role + ".early_late_space_chips", 0.5);
std::string default_dump_filename = "./track_ch";
dump_filename = configuration->property(role + ".dump_filename",
default_dump_filename); //unused!
vector_length = std::round(fs_in / (GPS_L1_CA_CODE_RATE_HZ / GPS_L1_CA_CODE_LENGTH_CHIPS));
//################# MAKE TRACKING GNURadio object ###################
if (item_type.compare("gr_complex") == 0)
{
item_size_ = sizeof(gr_complex);
tracking_ = gps_l1_ca_dll_pll_make_tracking_gpu_cc(
f_if,
fs_in,
vector_length,
queue_,
dump,
dump_filename,
pll_bw_hz,
dll_bw_hz,
early_late_space_chips);
}
else
{
item_size_ = sizeof(gr_complex);
LOG(WARNING) << item_type << " unknown tracking item type.";
}
channel_ = 0;
channel_internal_queue_ = 0;
DLOG(INFO) << "tracking(" << tracking_->unique_id() << ")";
}
GpsL1CaDllPllTrackingGPU::~GpsL1CaDllPllTrackingGPU()
{}
void GpsL1CaDllPllTrackingGPU::start_tracking()
{
tracking_->start_tracking();
}
/*
* Set tracking channel unique ID
*/
void GpsL1CaDllPllTrackingGPU::set_channel(unsigned int channel)
{
channel_ = channel;
tracking_->set_channel(channel);
}
/*
* Set tracking channel internal queue
*/
void GpsL1CaDllPllTrackingGPU::set_channel_queue(
concurrent_queue<int> *channel_internal_queue)
{
channel_internal_queue_ = channel_internal_queue;
tracking_->set_channel_queue(channel_internal_queue_);
}
void GpsL1CaDllPllTrackingGPU::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro)
{
tracking_->set_gnss_synchro(p_gnss_synchro);
}
void GpsL1CaDllPllTrackingGPU::connect(gr::top_block_sptr top_block)
{
if(top_block) { /* top_block is not null */};
//nothing to connect, now the tracking uses gr_sync_decimator
}
void GpsL1CaDllPllTrackingGPU::disconnect(gr::top_block_sptr top_block)
{
if(top_block) { /* top_block is not null */};
//nothing to disconnect, now the tracking uses gr_sync_decimator
}
gr::basic_block_sptr GpsL1CaDllPllTrackingGPU::get_left_block()
{
return tracking_;
}
gr::basic_block_sptr GpsL1CaDllPllTrackingGPU::get_right_block()
{
return tracking_;
}

View File

@ -0,0 +1,113 @@
/*!
* \file gps_l1_ca_dll_pll_tracking_gpu.h
* \brief Implementation of an adapter of a DLL+PLL tracking loop block using GPU accelerated functions
* for GPS L1 C/A to a TrackingInterface
* \author Javier Arribas, 2015. jarribas(at)cttc.es
*
* Code DLL + carrier PLL according to the algorithms described in:
* K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
* Approach, Birkha user, 2007
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_GPU_H_
#define GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_GPU_H_
#include <string>
#include <gnuradio/msg_queue.h>
#include "tracking_interface.h"
#include "gps_l1_ca_dll_pll_tracking_gpu_cc.h"
class ConfigurationInterface;
/*!
* \brief This class implements a code DLL + carrier PLL tracking loop using GPU accelerated functions
*/
class GpsL1CaDllPllTrackingGPU : public TrackingInterface
{
public:
GpsL1CaDllPllTrackingGPU(ConfigurationInterface* configuration,
std::string role,
unsigned int in_streams,
unsigned int out_streams,
boost::shared_ptr<gr::msg_queue> queue);
virtual ~GpsL1CaDllPllTrackingGPU();
std::string role()
{
return role_;
}
//! Returns "GPS_L1_CA_DLL_PLL_Tracking"
std::string implementation()
{
return "GPS_L1_CA_DLL_PLL_Tracking_GPU";
}
size_t item_size()
{
return item_size_;
}
void connect(gr::top_block_sptr top_block);
void disconnect(gr::top_block_sptr top_block);
gr::basic_block_sptr get_left_block();
gr::basic_block_sptr get_right_block();
/*!
* \brief Set tracking channel unique ID
*/
void set_channel(unsigned int channel);
/*!
* \brief Set acquisition/tracking common Gnss_Synchro object pointer
* to efficiently exchange synchronization data between acquisition and tracking blocks
*/
void set_gnss_synchro(Gnss_Synchro* p_gnss_synchro);
/*!
* \brief Set tracking channel internal queue
*/
void set_channel_queue(concurrent_queue<int> *channel_internal_queue);
void start_tracking();
private:
gps_l1_ca_dll_pll_tracking_gpu_cc_sptr tracking_;
size_t item_size_;
unsigned int channel_;
std::string role_;
unsigned int in_streams_;
unsigned int out_streams_;
boost::shared_ptr<gr::msg_queue> queue_;
concurrent_queue<int> *channel_internal_queue_;
};
#endif // GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_GPU_H_

View File

@ -16,6 +16,12 @@
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
if(ENABLE_CUDA)
FIND_PACKAGE(CUDA REQUIRED)
set(OPT_TRACKING_BLOCKS ${OPT_TRACKING_BLOCKS} gps_l1_ca_dll_pll_tracking_gpu_cc.cc)
endif(ENABLE_CUDA)
set(TRACKING_GR_BLOCKS_SOURCES
galileo_e1_dll_pll_veml_tracking_cc.cc
galileo_volk_e1_dll_pll_veml_tracking_cc.cc
@ -26,6 +32,7 @@ set(TRACKING_GR_BLOCKS_SOURCES
gps_l1_ca_tcp_connector_tracking_cc.cc
galileo_e5a_dll_pll_tracking_cc.cc
gps_l2_m_dll_pll_tracking_cc.cc
${OPT_TRACKING_BLOCKS}
)
include_directories(
@ -40,6 +47,8 @@ include_directories(
${Boost_INCLUDE_DIRS}
${GNURADIO_RUNTIME_INCLUDE_DIRS}
${VOLK_GNSSSDR_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
${CMAKE_SOURCE_DIR}/src/algorithms/tracking/libs/cudahelpers
)
if(ENABLE_GENERIC_ARCH)
@ -49,7 +58,8 @@ endif(ENABLE_GENERIC_ARCH)
file(GLOB TRACKING_GR_BLOCKS_HEADERS "*.h")
add_library(tracking_gr_blocks ${TRACKING_GR_BLOCKS_SOURCES} ${TRACKING_GR_BLOCKS_HEADERS})
source_group(Headers FILES ${TRACKING_GR_BLOCKS_HEADERS})
target_link_libraries(tracking_gr_blocks tracking_lib ${GNURADIO_RUNTIME_LIBRARIES} gnss_sp_libs ${Boost_LIBRARIES} ${VOLK_GNSSSDR_LIBRARIES} ${ORC_LIBRARIES} )
target_link_libraries(tracking_gr_blocks tracking_lib ${GNURADIO_RUNTIME_LIBRARIES} gnss_sp_libs ${Boost_LIBRARIES} ${VOLK_GNSSSDR_LIBRARIES} ${ORC_LIBRARIES} ${CUDA_LIBRARIES})
if(NOT VOLK_GNSSSDR_FOUND)
add_dependencies(tracking_gr_blocks volk_gnsssdr_module)
endif(NOT VOLK_GNSSSDR_FOUND)

View File

@ -594,7 +594,8 @@ int Gps_L1_Ca_Dll_Pll_Tracking_cc::general_work (int noutput_items, gr_vector_in
// carrier and code frequency
d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(float));
d_dump_file.write(reinterpret_cast<char*>(&d_code_freq_chips), sizeof(float));
tmp_float=d_code_freq_chips;
d_dump_file.write(reinterpret_cast<char*>(&tmp_float), sizeof(float));
//PLL commands
d_dump_file.write(reinterpret_cast<char*>(&carr_error_hz), sizeof(float));

View File

@ -0,0 +1,610 @@
/*!
* \file gps_l1_ca_dll_pll_tracking_gpu_cc.cc
* \brief Implementation of a code DLL + carrier PLL tracking block, GPU ACCELERATED
* \author Javier Arribas, 2015. jarribas(at)cttc.es
*
* Code DLL + carrier PLL according to the algorithms described in:
* [1] K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
* Approach, Birkhauser, 2007
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#include "gps_l1_ca_dll_pll_tracking_gpu_cc.h"
#include <cmath>
#include <iostream>
#include <memory>
#include <sstream>
#include <boost/lexical_cast.hpp>
#include <gnuradio/io_signature.h>
#include <glog/logging.h>
#include "gnss_synchro.h"
#include "gps_sdr_signal_processing.h"
#include "tracking_discriminators.h"
#include "lock_detectors.h"
#include "GPS_L1_CA.h"
#include "control_message_factory.h"
#include <volk/volk.h> //volk_alignement
// includes
#include <cuda_profiler_api.h>
#include <helper_functions.h> // helper for shared functions common to CUDA Samples
#include <helper_cuda.h> // helper functions for CUDA error checking and initialization
/*!
* \todo Include in definition header file
*/
#define CN0_ESTIMATION_SAMPLES 20
#define MINIMUM_VALID_CN0 25
#define MAXIMUM_LOCK_FAIL_COUNTER 50
#define CARRIER_LOCK_THRESHOLD 0.85
using google::LogMessage;
gps_l1_ca_dll_pll_tracking_gpu_cc_sptr
gps_l1_ca_dll_pll_make_tracking_gpu_cc(
long if_freq,
long fs_in,
unsigned int vector_length,
boost::shared_ptr<gr::msg_queue> queue,
bool dump,
std::string dump_filename,
float pll_bw_hz,
float dll_bw_hz,
float early_late_space_chips)
{
return gps_l1_ca_dll_pll_tracking_gpu_cc_sptr(new Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(if_freq,
fs_in, vector_length, queue, dump, dump_filename, pll_bw_hz, dll_bw_hz, early_late_space_chips));
}
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::forecast (int noutput_items,
gr_vector_int &ninput_items_required)
{
ninput_items_required[0] = static_cast<int>(d_vector_length) * 2; //set the required available samples in each call
}
Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
long if_freq,
long fs_in,
unsigned int vector_length,
boost::shared_ptr<gr::msg_queue> queue,
bool dump,
std::string dump_filename,
float pll_bw_hz,
float dll_bw_hz,
float early_late_space_chips) :
gr::block("Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc", gr::io_signature::make(1, 1, sizeof(gr_complex)),
gr::io_signature::make(1, 1, sizeof(Gnss_Synchro)))
{
// initialize internal vars
d_queue = queue;
d_dump = dump;
d_if_freq = if_freq;
d_fs_in = fs_in;
d_vector_length = vector_length;
d_dump_filename = dump_filename;
// Initialize tracking ==========================================
d_code_loop_filter.set_DLL_BW(dll_bw_hz);
d_carrier_loop_filter.set_PLL_BW(pll_bw_hz);
//--- DLL variables --------------------------------------------------------
d_early_late_spc_chips = early_late_space_chips; // Define early-late offset (in chips)
// Initialization of local code replica
// Get space for a vector with the C/A code replica sampled 1x/chip
//d_ca_code = static_cast<gr_complex*>(volk_malloc((GPS_L1_CA_CODE_LENGTH_CHIPS + 2) * sizeof(gr_complex), volk_get_alignment()));
d_ca_code = static_cast<gr_complex*>(volk_malloc((GPS_L1_CA_CODE_LENGTH_CHIPS) * sizeof(gr_complex), volk_get_alignment()));
multicorrelator_gpu = new cuda_multicorrelator();
int N_CORRELATORS=3;
//local code resampler on CPU (old)
//multicorrelator_gpu->init_cuda(0, NULL, 2 * d_vector_length , 2 * d_vector_length , N_CORRELATORS);
//local code resampler on GPU (new)
multicorrelator_gpu->init_cuda_integrated_resampler(0, NULL, 2 * d_vector_length , GPS_L1_CA_CODE_LENGTH_CHIPS , N_CORRELATORS);
// Get space for the resampled early / prompt / late local replicas
checkCudaErrors(cudaHostAlloc((void**)&d_local_code_shift_chips, N_CORRELATORS * sizeof(float), cudaHostAllocMapped ));
//allocate host memory
//pinned memory mode - use special function to get OS-pinned memory
checkCudaErrors(cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped ));
//old local codes vector
//checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (V_LEN * sizeof(gr_complex))*N_CORRELATORS, cudaHostAllocWriteCombined ));
//new integrated shifts
//checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (2 * d_vector_length * sizeof(gr_complex)), cudaHostAllocWriteCombined ));
// correlator outputs (scalar)
checkCudaErrors(cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS, cudaHostAllocWriteCombined ));
//map to EPL pointers
d_Early = &d_corr_outs_gpu[0];
d_Prompt = &d_corr_outs_gpu[1];
d_Late = &d_corr_outs_gpu[2];
//--- Perform initializations ------------------------------
// define initial code frequency basis of NCO
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ;
// define residual code phase (in chips)
d_rem_code_phase_samples = 0.0;
// define residual carrier phase
d_rem_carr_phase_rad = 0.0;
// sample synchronization
d_sample_counter = 0;
//d_sample_counter_seconds = 0;
d_acq_sample_stamp = 0;
d_enable_tracking = false;
d_pull_in = false;
d_last_seg = 0;
d_current_prn_length_samples = static_cast<int>(d_vector_length);
// CN0 estimation and lock detector buffers
d_cn0_estimation_counter = 0;
d_Prompt_buffer = new gr_complex[CN0_ESTIMATION_SAMPLES];
d_carrier_lock_test = 1;
d_CN0_SNV_dB_Hz = 0;
d_carrier_lock_fail_counter = 0;
d_carrier_lock_threshold = CARRIER_LOCK_THRESHOLD;
systemName["G"] = std::string("GPS");
systemName["S"] = std::string("SBAS");
set_relative_rate(1.0/((double)d_vector_length*2));
d_channel_internal_queue = 0;
d_acquisition_gnss_synchro = 0;
d_channel = 0;
d_acq_code_phase_samples = 0.0;
d_acq_carrier_doppler_hz = 0.0;
d_carrier_doppler_hz = 0.0;
d_acc_carrier_phase_rad = 0.0;
d_code_phase_samples = 0.0;
d_acc_code_phase_secs = 0.0;
//set_min_output_buffer((long int)300);
}
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::start_tracking()
{
/*
* correct the code phase according to the delay between acq and trk
*/
d_acq_code_phase_samples = d_acquisition_gnss_synchro->Acq_delay_samples;
d_acq_carrier_doppler_hz = d_acquisition_gnss_synchro->Acq_doppler_hz;
d_acq_sample_stamp = d_acquisition_gnss_synchro->Acq_samplestamp_samples;
long int acq_trk_diff_samples;
float acq_trk_diff_seconds;
acq_trk_diff_samples = static_cast<long int>(d_sample_counter) - static_cast<long int>(d_acq_sample_stamp);//-d_vector_length;
DLOG(INFO) << "Number of samples between Acquisition and Tracking =" << acq_trk_diff_samples;
acq_trk_diff_seconds = static_cast<float>(acq_trk_diff_samples) / static_cast<float>(d_fs_in);
//doppler effect
// Fd=(C/(C+Vr))*F
float radial_velocity = (GPS_L1_FREQ_HZ + d_acq_carrier_doppler_hz) / GPS_L1_FREQ_HZ;
// new chip and prn sequence periods based on acq Doppler
float T_chip_mod_seconds;
float T_prn_mod_seconds;
float T_prn_mod_samples;
d_code_freq_chips = radial_velocity * GPS_L1_CA_CODE_RATE_HZ;
T_chip_mod_seconds = 1/d_code_freq_chips;
T_prn_mod_seconds = T_chip_mod_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
T_prn_mod_samples = T_prn_mod_seconds * static_cast<float>(d_fs_in);
d_current_prn_length_samples = round(T_prn_mod_samples);
float T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ;
float T_prn_true_samples = T_prn_true_seconds * static_cast<float>(d_fs_in);
float T_prn_diff_seconds= T_prn_true_seconds - T_prn_mod_seconds;
float N_prn_diff = acq_trk_diff_seconds / T_prn_true_seconds;
float corrected_acq_phase_samples, delay_correction_samples;
corrected_acq_phase_samples = fmod((d_acq_code_phase_samples + T_prn_diff_seconds * N_prn_diff * static_cast<float>(d_fs_in)), T_prn_true_samples);
if (corrected_acq_phase_samples < 0)
{
corrected_acq_phase_samples = T_prn_mod_samples + corrected_acq_phase_samples;
}
delay_correction_samples = d_acq_code_phase_samples - corrected_acq_phase_samples;
d_acq_code_phase_samples = corrected_acq_phase_samples;
d_carrier_doppler_hz = d_acq_carrier_doppler_hz;
// DLL/PLL filter initialization
d_carrier_loop_filter.initialize(); // initialize the carrier filter
d_code_loop_filter.initialize(); // initialize the code filter
// generate local reference ALWAYS starting at chip 1 (1 sample per chip)
gps_l1_ca_code_gen_complex(d_ca_code, d_acquisition_gnss_synchro->PRN, 0);
d_local_code_shift_chips[0]=-d_early_late_spc_chips;
d_local_code_shift_chips[1]=0.0;
d_local_code_shift_chips[2]=d_early_late_spc_chips;
multicorrelator_gpu->set_local_code_and_taps(GPS_L1_CA_CODE_LENGTH_CHIPS,d_ca_code, d_local_code_shift_chips,3);
d_carrier_lock_fail_counter = 0;
d_rem_code_phase_samples = 0;
d_rem_carr_phase_rad = 0;
d_acc_carrier_phase_rad = 0;
d_acc_code_phase_secs = 0;
d_code_phase_samples = d_acq_code_phase_samples;
std::string sys_ = &d_acquisition_gnss_synchro->System;
sys = sys_.substr(0,1);
// DEBUG OUTPUT
std::cout << "Tracking start on channel " << d_channel << " for satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) << std::endl;
LOG(INFO) << "Starting tracking of satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) << " on channel " << d_channel;
// enable tracking
d_pull_in = true;
d_enable_tracking = true;
LOG(INFO) << "PULL-IN Doppler [Hz]=" << d_carrier_doppler_hz
<< " Code Phase correction [samples]=" << delay_correction_samples
<< " PULL-IN Code Phase [samples]=" << d_acq_code_phase_samples;
}
Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::~Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc()
{
d_dump_file.close();
cudaFreeHost(in_gpu);
cudaFreeHost(d_carr_sign_gpu);
cudaFreeHost(d_corr_outs_gpu);
cudaFreeHost(d_local_code_shift_chips);
multicorrelator_gpu->free_cuda();
delete(multicorrelator_gpu);
volk_free(d_ca_code);
delete[] d_Prompt_buffer;
}
int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vector_int &ninput_items,
gr_vector_const_void_star &input_items, gr_vector_void_star &output_items)
{
// process vars
float carr_error_hz=0.0;
float carr_error_filt_hz=0.0;
float code_error_chips=0.0;
float code_error_filt_chips=0.0;
// Block input data and block output stream pointers
const gr_complex* in = (gr_complex*) input_items[0];
Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0];
// GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
Gnss_Synchro current_synchro_data = Gnss_Synchro();
if (d_enable_tracking == true)
{
// Receiver signal alignment
if (d_pull_in == true)
{
int samples_offset;
int acq_to_trk_delay_samples;
acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp;
samples_offset = round(d_acq_code_phase_samples)+d_current_prn_length_samples - acq_to_trk_delay_samples%d_current_prn_length_samples;
d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples
d_pull_in = false;
// Fill the acquisition data
current_synchro_data = *d_acquisition_gnss_synchro;
*out[0] = current_synchro_data;
consume_each(samples_offset); //shift input to perform alignment with local replica
return 1;
}
// Fill the acquisition data
current_synchro_data = *d_acquisition_gnss_synchro;
// UPDATE NCO COMMAND
float phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);
//code resampler on GPU (new)
float code_phase_step_chips = static_cast<float>(d_code_freq_chips) / static_cast<float>(d_fs_in);
float rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
cudaProfilerStart();
multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda(
d_corr_outs_gpu,
in,
d_rem_carr_phase_rad,
phase_step_rad,
code_phase_step_chips,
rem_code_phase_chips,
d_current_prn_length_samples,
3);
cudaProfilerStop();
// ################## PLL ##########################################################
// PLL discriminator
carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / static_cast<float>(GPS_TWO_PI);
// Carrier discriminator filter
carr_error_filt_hz = d_carrier_loop_filter.get_carrier_nco(carr_error_hz);
// New carrier Doppler frequency estimation
d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_error_filt_hz;
// New code Doppler frequency estimation
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ);
//carrier phase accumulator for (K) doppler estimation
d_acc_carrier_phase_rad = d_acc_carrier_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD;
//remanent carrier phase to prevent overflow in the code NCO
d_rem_carr_phase_rad = d_rem_carr_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD;
d_rem_carr_phase_rad = fmod(d_rem_carr_phase_rad, GPS_TWO_PI);
// ################## DLL ##########################################################
// DLL discriminator
code_error_chips = dll_nc_e_minus_l_normalized(*d_Early, *d_Late); //[chips/Ti]
// Code discriminator filter
code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips); //[chips/second]
//Code phase accumulator
float code_error_filt_secs;
code_error_filt_secs = (GPS_L1_CA_CODE_PERIOD * code_error_filt_chips) / GPS_L1_CA_CODE_RATE_HZ; //[seconds]
d_acc_code_phase_secs = d_acc_code_phase_secs + code_error_filt_secs;
// ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT #######################
// keep alignment parameters for the next input buffer
double T_chip_seconds;
double T_prn_seconds;
double T_prn_samples;
double K_blk_samples;
// Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation
T_chip_seconds = 1 / static_cast<double>(d_code_freq_chips);
T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
K_blk_samples = T_prn_samples + d_rem_code_phase_samples + static_cast<double>(code_error_filt_secs) * static_cast<double>(d_fs_in);
//d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
// ####### CN0 ESTIMATION AND LOCK DETECTORS ######
if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
{
// fill buffer with prompt correlator output values
d_Prompt_buffer[d_cn0_estimation_counter] = *d_Prompt;
d_cn0_estimation_counter++;
}
else
{
d_cn0_estimation_counter = 0;
// Code lock indicator
d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
// Carrier lock indicator
d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
// Loss of lock detection
if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
{
d_carrier_lock_fail_counter++;
}
else
{
if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
}
if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
{
std::cout << "Loss of lock in channel " << d_channel << "!" << std::endl;
LOG(INFO) << "Loss of lock in channel " << d_channel << "!";
std::unique_ptr<ControlMessageFactory> cmf(new ControlMessageFactory());
if (d_queue != gr::msg_queue::sptr())
{
d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
}
d_carrier_lock_fail_counter = 0;
d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
}
}
// ########### Output the tracking data to navigation and PVT ##########
current_synchro_data.Prompt_I = static_cast<double>((*d_Prompt).real());
current_synchro_data.Prompt_Q = static_cast<double>((*d_Prompt).imag());
// Tracking_timestamp_secs is aligned with the NEXT PRN start sample (Hybridization problem!)
//compute remnant code phase samples BEFORE the Tracking timestamp
//d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
//current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter + (double)d_current_prn_length_samples + (double)d_rem_code_phase_samples)/static_cast<double>(d_fs_in);
// Tracking_timestamp_secs is aligned with the CURRENT PRN start sample (Hybridization OK!, but some glitches??)
current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + static_cast<double>(d_rem_code_phase_samples)) / static_cast<double>(d_fs_in);
//compute remnant code phase samples AFTER the Tracking timestamp
d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
//current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter)/static_cast<double>(d_fs_in);
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
current_synchro_data.Code_phase_secs = 0;
current_synchro_data.Carrier_phase_rads = static_cast<double>(d_acc_carrier_phase_rad);
current_synchro_data.Carrier_Doppler_hz = static_cast<double>(d_carrier_doppler_hz);
current_synchro_data.CN0_dB_hz = static_cast<double>(d_CN0_SNV_dB_Hz);
current_synchro_data.Flag_valid_pseudorange = false;
*out[0] = current_synchro_data;
// ########## DEBUG OUTPUT
/*!
* \todo The stop timer has to be moved to the signal source!
*/
// debug: Second counter in channel 0
if (d_channel == 0)
{
if (floor(d_sample_counter / d_fs_in) != d_last_seg)
{
d_last_seg = floor(d_sample_counter / d_fs_in);
std::cout << "Current input signal time = " << d_last_seg << " [s]" << std::endl;
DLOG(INFO) << "GPS L1 C/A Tracking CH " << d_channel << ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN)
<< ", CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]" << std::endl;
//if (d_last_seg==5) d_carrier_lock_fail_counter=500; //DEBUG: force unlock!
}
}
else
{
if (floor(d_sample_counter / d_fs_in) != d_last_seg)
{
d_last_seg = floor(d_sample_counter / d_fs_in);
DLOG(INFO) << "Tracking CH " << d_channel << ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN)
<< ", CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]";
}
}
}
else
{
// ########## DEBUG OUTPUT (TIME ONLY for channel 0 when tracking is disabled)
/*!
* \todo The stop timer has to be moved to the signal source!
*/
// stream to collect cout calls to improve thread safety
std::stringstream tmp_str_stream;
if (floor(d_sample_counter / d_fs_in) != d_last_seg)
{
d_last_seg = floor(d_sample_counter / d_fs_in);
if (d_channel == 0)
{
// debug: Second counter in channel 0
tmp_str_stream << "Current input signal time = " << d_last_seg << " [s]" << std::endl << std::flush;
std::cout << tmp_str_stream.rdbuf() << std::flush;
}
}
*d_Early = gr_complex(0,0);
*d_Prompt = gr_complex(0,0);
*d_Late = gr_complex(0,0);
current_synchro_data.System = {'G'};
current_synchro_data.Flag_valid_pseudorange = false;
*out[0] = current_synchro_data;
}
if(d_dump)
{
// MULTIPLEXED FILE RECORDING - Record results to file
float prompt_I;
float prompt_Q;
float tmp_E, tmp_P, tmp_L;
float tmp_float;
double tmp_double;
prompt_I = (*d_Prompt).real();
prompt_Q = (*d_Prompt).imag();
tmp_E = std::abs<float>(*d_Early);
tmp_P = std::abs<float>(*d_Prompt);
tmp_L = std::abs<float>(*d_Late);
try
{
// EPR
d_dump_file.write(reinterpret_cast<char*>(&tmp_E), sizeof(float));
d_dump_file.write(reinterpret_cast<char*>(&tmp_P), sizeof(float));
d_dump_file.write(reinterpret_cast<char*>(&tmp_L), sizeof(float));
// PROMPT I and Q (to analyze navigation symbols)
d_dump_file.write(reinterpret_cast<char*>(&prompt_I), sizeof(float));
d_dump_file.write(reinterpret_cast<char*>(&prompt_Q), sizeof(float));
// PRN start sample stamp
//tmp_float=(float)d_sample_counter;
d_dump_file.write(reinterpret_cast<char*>(&d_sample_counter), sizeof(unsigned long int));
// accumulated carrier phase
d_dump_file.write(reinterpret_cast<char*>(&d_acc_carrier_phase_rad), sizeof(float));
// carrier and code frequency
d_dump_file.write(reinterpret_cast<char*>(&d_carrier_doppler_hz), sizeof(float));
tmp_float=d_code_freq_chips;
d_dump_file.write(reinterpret_cast<char*>(&tmp_float), sizeof(float));
//PLL commands
d_dump_file.write(reinterpret_cast<char*>(&carr_error_hz), sizeof(float));
d_dump_file.write(reinterpret_cast<char*>(&carr_error_filt_hz), sizeof(float));
//DLL commands
d_dump_file.write(reinterpret_cast<char*>(&code_error_chips), sizeof(float));
d_dump_file.write(reinterpret_cast<char*>(&code_error_filt_chips), sizeof(float));
// CN0 and carrier lock test
d_dump_file.write(reinterpret_cast<char*>(&d_CN0_SNV_dB_Hz), sizeof(float));
d_dump_file.write(reinterpret_cast<char*>(&d_carrier_lock_test), sizeof(float));
// AUX vars (for debug purposes)
tmp_float = d_rem_code_phase_samples;
d_dump_file.write(reinterpret_cast<char*>(&tmp_float), sizeof(float));
tmp_double = static_cast<double>(d_sample_counter + d_current_prn_length_samples);
d_dump_file.write(reinterpret_cast<char*>(&tmp_double), sizeof(double));
}
catch (std::ifstream::failure e)
{
LOG(WARNING) << "Exception writing trk dump file " << e.what();
}
}
consume_each(d_current_prn_length_samples); // this is necessary in gr::block derivates
d_sample_counter += d_current_prn_length_samples; //count for the processed samples
//LOG(INFO)<<"GPS tracking output end on CH="<<this->d_channel << " SAMPLE STAMP="<<d_sample_counter<<std::endl;
return 1; //output tracking result ALWAYS even in the case of d_enable_tracking==false
}
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_channel(unsigned int channel)
{
d_channel = channel;
LOG(INFO) << "Tracking Channel set to " << d_channel;
// ############# ENABLE DATA FILE LOG #################
if (d_dump == true)
{
if (d_dump_file.is_open() == false)
{
try
{
d_dump_filename.append(boost::lexical_cast<std::string>(d_channel));
d_dump_filename.append(".dat");
d_dump_file.exceptions (std::ifstream::failbit | std::ifstream::badbit);
d_dump_file.open(d_dump_filename.c_str(), std::ios::out | std::ios::binary);
LOG(INFO) << "Tracking dump enabled on channel " << d_channel << " Log file: " << d_dump_filename.c_str() << std::endl;
}
catch (std::ifstream::failure e)
{
LOG(WARNING) << "channel " << d_channel << " Exception opening trk dump file " << e.what() << std::endl;
}
}
}
}
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_channel_queue(concurrent_queue<int> *channel_internal_queue)
{
d_channel_internal_queue = channel_internal_queue;
}
void Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro)
{
d_acquisition_gnss_synchro = p_gnss_synchro;
}

View File

@ -0,0 +1,191 @@
/*!
* \file gps_l1_ca_dll_pll_tracking_gpu_cc.h
* \brief Implementation of a code DLL + carrier PLL tracking block, GPU ACCELERATED
* \author Javier Arribas, 2015. jarribas(at)cttc.es
*
* Code DLL + carrier PLL according to the algorithms described in:
* K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency Approach,
* Birkhauser, 2007
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_GPU_CC_H
#define GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_GPU_CC_H
#include <fstream>
#include <queue>
#include <map>
#include <string>
#include <boost/thread/mutex.hpp>
#include <boost/thread/thread.hpp>
#include <gnuradio/block.h>
#include <gnuradio/msg_queue.h>
#include "concurrent_queue.h"
#include "gps_sdr_signal_processing.h"
#include "gnss_synchro.h"
#include "tracking_2nd_DLL_filter.h"
#include "tracking_2nd_PLL_filter.h"
#include "cuda_multicorrelator.h"
class Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc;
typedef boost::shared_ptr<Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc>
gps_l1_ca_dll_pll_tracking_gpu_cc_sptr;
gps_l1_ca_dll_pll_tracking_gpu_cc_sptr
gps_l1_ca_dll_pll_make_tracking_gpu_cc(long if_freq,
long fs_in, unsigned
int vector_length,
boost::shared_ptr<gr::msg_queue> queue,
bool dump,
std::string dump_filename,
float pll_bw_hz,
float dll_bw_hz,
float early_late_space_chips);
/*!
* \brief This class implements a DLL + PLL tracking loop block
*/
class Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc: public gr::block
{
public:
~Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc();
void set_channel(unsigned int channel);
void set_gnss_synchro(Gnss_Synchro* p_gnss_synchro);
void start_tracking();
void set_channel_queue(concurrent_queue<int> *channel_internal_queue);
int general_work (int noutput_items, gr_vector_int &ninput_items,
gr_vector_const_void_star &input_items, gr_vector_void_star &output_items);
void forecast (int noutput_items, gr_vector_int &ninput_items_required);
private:
friend gps_l1_ca_dll_pll_tracking_gpu_cc_sptr
gps_l1_ca_dll_pll_make_tracking_gpu_cc(long if_freq,
long fs_in, unsigned
int vector_length,
boost::shared_ptr<gr::msg_queue> queue,
bool dump,
std::string dump_filename,
float pll_bw_hz,
float dll_bw_hz,
float early_late_space_chips);
Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(long if_freq,
long fs_in, unsigned
int vector_length,
boost::shared_ptr<gr::msg_queue> queue,
bool dump,
std::string dump_filename,
float pll_bw_hz,
float dll_bw_hz,
float early_late_space_chips);
void update_local_code();
void update_local_carrier();
// tracking configuration vars
boost::shared_ptr<gr::msg_queue> d_queue;
concurrent_queue<int> *d_channel_internal_queue;
unsigned int d_vector_length;
bool d_dump;
Gnss_Synchro* d_acquisition_gnss_synchro;
unsigned int d_channel;
int d_last_seg;
long d_if_freq;
long d_fs_in;
double d_early_late_spc_chips;
//GPU HOST PINNED MEMORY IN/OUT VECTORS
gr_complex* in_gpu;
gr_complex* d_carr_sign_gpu;
gr_complex* d_local_codes_gpu;
float* d_local_code_shift_chips;
gr_complex* d_corr_outs_gpu;
cuda_multicorrelator *multicorrelator_gpu;
gr_complex* d_ca_code;
gr_complex *d_Early;
gr_complex *d_Prompt;
gr_complex *d_Late;
// remaining code phase and carrier phase between tracking loops
double d_rem_code_phase_samples;
float d_rem_carr_phase_rad;
// PLL and DLL filter library
Tracking_2nd_DLL_filter d_code_loop_filter;
Tracking_2nd_PLL_filter d_carrier_loop_filter;
// acquisition
float d_acq_code_phase_samples;
float d_acq_carrier_doppler_hz;
// tracking vars
double d_code_freq_chips;
float d_carrier_doppler_hz;
float d_acc_carrier_phase_rad;
float d_code_phase_samples;
float d_acc_code_phase_secs;
//PRN period in samples
int d_current_prn_length_samples;
//processing samples counters
unsigned long int d_sample_counter;
unsigned long int d_acq_sample_stamp;
// CN0 estimation and lock detector
int d_cn0_estimation_counter;
gr_complex* d_Prompt_buffer;
float d_carrier_lock_test;
float d_CN0_SNV_dB_Hz;
float d_carrier_lock_threshold;
int d_carrier_lock_fail_counter;
// control vars
bool d_enable_tracking;
bool d_pull_in;
// file dump
std::string d_dump_filename;
std::ofstream d_dump_file;
std::map<std::string, std::string> systemName;
std::string sys;
};
#endif //GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_GPU_CC_H

View File

@ -16,6 +16,26 @@
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
if(ENABLE_CUDA)
FIND_PACKAGE(CUDA REQUIRED)
# Append current NVCC flags by something, eg comput capability
# set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --gpu-architecture sm_30)
list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_30,code=sm_30; -std=c++11;-O3; -use_fast_math -default-stream per-thread")
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
CUDA_INCLUDE_DIRECTORIES(
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/cudahelpers
)
SET(LIB_TYPE STATIC) #set the lib type
CUDA_ADD_LIBRARY(CUDA_CORRELATOR_LIB ${LIB_TYPE} cuda_multicorrelator.h cuda_multicorrelator.cu)
endif(ENABLE_CUDA)
set(TRACKING_LIB_SOURCES
correlator.cc
lock_detectors.cc
@ -24,7 +44,7 @@ set(TRACKING_LIB_SOURCES
tracking_2nd_DLL_filter.cc
tracking_2nd_PLL_filter.cc
tracking_discriminators.cc
tracking_FLL_PLL_filter.cc
tracking_FLL_PLL_filter.cc
)
include_directories(
@ -33,6 +53,7 @@ include_directories(
${CMAKE_SOURCE_DIR}/src/core/interfaces
${CMAKE_SOURCE_DIR}/src/core/receiver
${VOLK_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
)
if(ENABLE_GENERIC_ARCH)
@ -43,7 +64,8 @@ if (SSE3_AVAILABLE)
add_definitions( -DHAVE_SSE3=1 )
endif(SSE3_AVAILABLE)
file(GLOB TRACKING_LIB_HEADERS "*.h")
add_library(tracking_lib ${TRACKING_LIB_SOURCES} ${TRACKING_LIB_HEADERS})
source_group(Headers FILES ${TRACKING_LIB_HEADERS})
target_link_libraries(tracking_lib ${VOLK_LIBRARIES} ${GNURADIO_RUNTIME_LIBRARIES})
target_link_libraries(tracking_lib CUDA_CORRELATOR_LIB ${VOLK_LIBRARIES} ${GNURADIO_RUNTIME_LIBRARIES})

View File

@ -0,0 +1,714 @@
/*!
* \file cuda_multicorrelator.cu
* \brief High optimized CUDA GPU vector multiTAP correlator class
* \authors <ul>
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
* </ul>
*
* Class that implements a high optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
///////////////////////////////////////////////////////////////////////////////
// On G80-class hardware 24-bit multiplication takes 4 clocks per warp
// (the same as for floating point multiplication and addition),
// whereas full 32-bit multiplication takes 16 clocks per warp.
// So if integer multiplication operands are guaranteed to fit into 24 bits
// (always lie withtin [-8M, 8M - 1] range in signed case),
// explicit 24-bit multiplication is preferred for performance.
///////////////////////////////////////////////////////////////////////////////
#define IMUL(a, b) __mul24(a, b)
#include "cuda_multicorrelator.h"
#include <stdio.h>
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
// helper functions and utilities to work with CUDA
#include <helper_cuda.h>
#include <helper_functions.h>
#define ACCUM_N 256
__global__ void scalarProdGPUCPXxN_shifts_chips(
GPU_Complex *d_corr_out,
GPU_Complex *d_sig_in,
GPU_Complex *d_local_code_in,
float *d_shifts_chips,
float code_length_chips,
float code_phase_step_chips,
float rem_code_phase_chips,
int vectorN,
int elementN
)
{
//Accumulators cache
__shared__ GPU_Complex accumResult[ACCUM_N];
////////////////////////////////////////////////////////////////////////////
// Cycle through every pair of vectors,
// taking into account that vector counts can be different
// from total number of thread blocks
////////////////////////////////////////////////////////////////////////////
for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x)
{
//int vectorBase = IMUL(elementN, vec);
//int vectorEnd = elementN;
////////////////////////////////////////////////////////////////////////
// Each accumulator cycles through vectors with
// stride equal to number of total number of accumulators ACCUM_N
// At this stage ACCUM_N is only preferred be a multiple of warp size
// to meet memory coalescing alignment constraints.
////////////////////////////////////////////////////////////////////////
for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
{
GPU_Complex sum = GPU_Complex(0,0);
for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
{
//sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos];
//sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos];
//sum.multiply_acc(d_sig_in[pos],d_local_codes_in[pos+d_shifts_samples[vec]]);
// 1.resample local code for the current shift
float local_code_chip_index= fmod(code_phase_step_chips*(float)pos + d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips);
//TODO: Take into account that in multitap correlators, the shifts can be negative!
if (local_code_chip_index<0.0) local_code_chip_index+=code_length_chips;
// 2.correlate
sum.multiply_acc(d_sig_in[pos],d_local_code_in[__float2int_rd(local_code_chip_index)]);
}
accumResult[iAccum] = sum;
}
////////////////////////////////////////////////////////////////////////
// Perform tree-like reduction of accumulators' results.
// ACCUM_N has to be power of two at this stage
////////////////////////////////////////////////////////////////////////
for (int stride = ACCUM_N / 2; stride > 0; stride >>= 1)
{
__syncthreads();
for (int iAccum = threadIdx.x; iAccum < stride; iAccum += blockDim.x)
{
accumResult[iAccum] += accumResult[stride + iAccum];
}
}
if (threadIdx.x == 0)
{
d_corr_out[vec] = accumResult[0];
}
}
}
///////////////////////////////////////////////////////////////////////////////
// Calculate scalar products of VectorN vectors of ElementN elements on GPU
// Parameters restrictions:
// 1) ElementN is strongly preferred to be a multiple of warp size to
// meet alignment constraints of memory coalescing.
// 2) ACCUM_N must be a power of two.
///////////////////////////////////////////////////////////////////////////////
__global__ void scalarProdGPUCPXxN_shifts(
GPU_Complex *d_corr_out,
GPU_Complex *d_sig_in,
GPU_Complex *d_local_codes_in,
int *d_shifts_samples,
int vectorN,
int elementN
)
{
//Accumulators cache
__shared__ GPU_Complex accumResult[ACCUM_N];
////////////////////////////////////////////////////////////////////////////
// Cycle through every pair of vectors,
// taking into account that vector counts can be different
// from total number of thread blocks
////////////////////////////////////////////////////////////////////////////
for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x)
{
int vectorBase = IMUL(elementN, vec);
int vectorEnd = vectorBase + elementN;
////////////////////////////////////////////////////////////////////////
// Each accumulator cycles through vectors with
// stride equal to number of total number of accumulators ACCUM_N
// At this stage ACCUM_N is only preferred be a multiple of warp size
// to meet memory coalescing alignment constraints.
////////////////////////////////////////////////////////////////////////
for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
{
GPU_Complex sum = GPU_Complex(0,0);
for (int pos = vectorBase + iAccum; pos < vectorEnd; pos += ACCUM_N)
{
//sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos];
//sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos];
sum.multiply_acc(d_sig_in[pos-vectorBase],d_local_codes_in[pos-vectorBase+d_shifts_samples[vec]]);
}
accumResult[iAccum] = sum;
}
////////////////////////////////////////////////////////////////////////
// Perform tree-like reduction of accumulators' results.
// ACCUM_N has to be power of two at this stage
////////////////////////////////////////////////////////////////////////
for (int stride = ACCUM_N / 2; stride > 0; stride >>= 1)
{
__syncthreads();
for (int iAccum = threadIdx.x; iAccum < stride; iAccum += blockDim.x)
{
accumResult[iAccum] += accumResult[stride + iAccum];
}
}
if (threadIdx.x == 0)
{
d_corr_out[vec] = accumResult[0];
}
}
}
__global__ void scalarProdGPUCPXxN(
GPU_Complex *d_corr_out,
GPU_Complex *d_sig_in,
GPU_Complex *d_local_codes_in,
int vectorN,
int elementN
)
{
//Accumulators cache
__shared__ GPU_Complex accumResult[ACCUM_N];
////////////////////////////////////////////////////////////////////////////
// Cycle through every pair of vectors,
// taking into account that vector counts can be different
// from total number of thread blocks
////////////////////////////////////////////////////////////////////////////
for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x)
{
//int vectorBase = IMUL(elementN, vec);
//int vectorEnd = vectorBase + elementN;
////////////////////////////////////////////////////////////////////////
// Each accumulator cycles through vectors with
// stride equal to number of total number of accumulators ACCUM_N
// At this stage ACCUM_N is only preferred be a multiple of warp size
// to meet memory coalescing alignment constraints.
////////////////////////////////////////////////////////////////////////
for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
{
GPU_Complex sum = GPU_Complex(0,0);
//for (int pos = vectorBase + iAccum; pos < vectorEnd; pos += ACCUM_N)
for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
{
//sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos];
//sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos];
//sum.multiply_acc(d_sig_in[pos-vectorBase],d_local_codes_in[pos]);
sum.multiply_acc(d_sig_in[pos],d_local_codes_in[pos]);
}
accumResult[iAccum] = sum;
}
////////////////////////////////////////////////////////////////////////
// Perform tree-like reduction of accumulators' results.
// ACCUM_N has to be power of two at this stage
////////////////////////////////////////////////////////////////////////
for (int stride = ACCUM_N / 2; stride > 0; stride >>= 1)
{
__syncthreads();
for (int iAccum = threadIdx.x; iAccum < stride; iAccum += blockDim.x)
{
accumResult[iAccum] += accumResult[stride + iAccum];
}
}
if (threadIdx.x == 0)
{
d_corr_out[vec] = accumResult[0];
}
}
}
//*********** CUDA processing **************
// Treads: a minimal parallel execution code on GPU
// Blocks: a set of N threads
/**
* CUDA Kernel Device code
*
* Computes the vectorial product of A and B into C. The 3 vectors have the same
* number of elements numElements.
*/
__global__ void CUDA_32fc_x2_multiply_32fc( GPU_Complex *A, GPU_Complex *B, GPU_Complex *C, int numElements)
{
for (int i = blockIdx.x * blockDim.x + threadIdx.x;
i < numElements;
i += blockDim.x * gridDim.x)
{
C[i] = A[i] * B[i];
}
}
/**
* CUDA Kernel Device code
*
* Computes the carrier Doppler wipe-off by integrating the NCO in the CUDA kernel
*/
__global__ void
CUDA_32fc_Doppler_wipeoff( GPU_Complex *sig_out, GPU_Complex *sig_in, float rem_carrier_phase_in_rad, float phase_step_rad, int numElements)
{
//*** NCO CPU code (GNURadio FXP NCO)
//float sin_f, cos_f;
//float phase_step_rad = static_cast<float>(2 * GALILEO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);
//int phase_step_rad_i = gr::fxpt::float_to_fixed(phase_step_rad);
//int phase_rad_i = gr::fxpt::float_to_fixed(d_rem_carr_phase_rad);
//
//for(int i = 0; i < d_current_prn_length_samples; i++)
// {
// gr::fxpt::sincos(phase_rad_i, &sin_f, &cos_f);
// d_carr_sign[i] = std::complex<float>(cos_f, -sin_f);
// phase_rad_i += phase_step_rad_i;
// }
// CUDA version of floating point NCO and vector dot product integrated
float sin;
float cos;
for (int i = blockIdx.x * blockDim.x + threadIdx.x;
i < numElements;
i += blockDim.x * gridDim.x)
{
__sincosf(rem_carrier_phase_in_rad + i*phase_step_rad, &sin, &cos);
sig_out[i] = sig_in[i] * GPU_Complex(cos,-sin);
}
}
/**
* CUDA Kernel Device code
*
* Computes the vectorial product of A and B into C. The 3 vectors have the same
* number of elements numElements.
*/
__global__ void
CUDA_32fc_x2_add_32fc( GPU_Complex *A, GPU_Complex *B, GPU_Complex *C, int numElements)
{
for (int i = blockIdx.x * blockDim.x + threadIdx.x;
i < numElements;
i += blockDim.x * gridDim.x)
{
C[i] = A[i] + B[i];
}
}
bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int signal_length_samples, int local_codes_length_samples, int n_correlators)
{
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
// findCudaDevice(argc, (const char **)argv);
// cudaDeviceProp prop;
// int num_devices, device;
// cudaGetDeviceCount(&num_devices);
// if (num_devices > 1) {
// int max_multiprocessors = 0, max_device = 0;
// for (device = 0; device < num_devices; device++) {
// cudaDeviceProp properties;
// cudaGetDeviceProperties(&properties, device);
// if (max_multiprocessors < properties.multiProcessorCount) {
// max_multiprocessors = properties.multiProcessorCount;
// max_device = device;
// }
// printf("Found GPU device # %i\n",device);
// }
// //cudaSetDevice(max_device);
//
// //set random device!
// cudaSetDevice(rand() % num_devices); //generates a random number between 0 and num_devices to split the threads between GPUs
//
// cudaGetDeviceProperties( &prop, max_device );
// //debug code
// if (prop.canMapHostMemory != 1) {
// printf( "Device can not map memory.\n" );
// }
// printf("L2 Cache size= %u \n",prop.l2CacheSize);
// printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
// printf("maxGridSize= %i \n",prop.maxGridSize[0]);
// printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
// printf("deviceOverlap= %i \n",prop.deviceOverlap);
// printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
// }else{
// int whichDevice;
// cudaGetDevice( &whichDevice );
// cudaGetDeviceProperties( &prop, whichDevice );
// //debug code
// if (prop.canMapHostMemory != 1) {
// printf( "Device can not map memory.\n" );
// }
//
// printf("L2 Cache size= %u \n",prop.l2CacheSize);
// printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
// printf("maxGridSize= %i \n",prop.maxGridSize[0]);
// printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
// printf("deviceOverlap= %i \n",prop.deviceOverlap);
// printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
// }
//checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
// ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors
size_t size = signal_length_samples * sizeof(GPU_Complex);
checkCudaErrors(cudaMalloc((void **)&d_sig_in, size));
//checkCudaErrors(cudaMalloc((void **)&d_nco_in, size));
checkCudaErrors(cudaMalloc((void **)&d_sig_doppler_wiped, size));
// old version: all local codes are independent vectors
//checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size*n_correlators));
// new version: only one vector with extra samples to shift the local code for the correlator set
// Required: The last correlator tap in d_shifts_samples has the largest sample shift
size_t size_local_code_bytes = local_codes_length_samples * sizeof(GPU_Complex);
checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size_local_code_bytes));
checkCudaErrors(cudaMalloc((void **)&d_shifts_samples, sizeof(int)*n_correlators));
//scalars
checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
// Launch the Vector Add CUDA Kernel
threadsPerBlock = 256;
blocksPerGrid =(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock;
cudaStreamCreate (&stream1) ;
cudaStreamCreate (&stream2) ;
return true;
}
bool cuda_multicorrelator::init_cuda_integrated_resampler(
const int argc, const char **argv,
int signal_length_samples,
int code_length_chips,
int n_correlators
)
{
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
// findCudaDevice(argc, (const char **)argv);
// cudaDeviceProp prop;
// int num_devices, device;
// cudaGetDeviceCount(&num_devices);
// if (num_devices > 1) {
// int max_multiprocessors = 0, max_device = 0;
// for (device = 0; device < num_devices; device++) {
// cudaDeviceProp properties;
// cudaGetDeviceProperties(&properties, device);
// if (max_multiprocessors < properties.multiProcessorCount) {
// max_multiprocessors = properties.multiProcessorCount;
// max_device = device;
// }
// printf("Found GPU device # %i\n",device);
// }
// //cudaSetDevice(max_device);
//
// //set random device!
// cudaSetDevice(rand() % num_devices); //generates a random number between 0 and num_devices to split the threads between GPUs
//
// cudaGetDeviceProperties( &prop, max_device );
// //debug code
// if (prop.canMapHostMemory != 1) {
// printf( "Device can not map memory.\n" );
// }
// printf("L2 Cache size= %u \n",prop.l2CacheSize);
// printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
// printf("maxGridSize= %i \n",prop.maxGridSize[0]);
// printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
// printf("deviceOverlap= %i \n",prop.deviceOverlap);
// printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
// }else{
// int whichDevice;
// cudaGetDevice( &whichDevice );
// cudaGetDeviceProperties( &prop, whichDevice );
// //debug code
// if (prop.canMapHostMemory != 1) {
// printf( "Device can not map memory.\n" );
// }
//
// printf("L2 Cache size= %u \n",prop.l2CacheSize);
// printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
// printf("maxGridSize= %i \n",prop.maxGridSize[0]);
// printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
// printf("deviceOverlap= %i \n",prop.deviceOverlap);
// printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
// }
//checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
// ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors
size_t size = signal_length_samples * sizeof(GPU_Complex);
checkCudaErrors(cudaMalloc((void **)&d_sig_in, size));
checkCudaErrors(cudaMemset(d_sig_in,0,size));
//checkCudaErrors(cudaMalloc((void **)&d_nco_in, size));
checkCudaErrors(cudaMalloc((void **)&d_sig_doppler_wiped, size));
checkCudaErrors(cudaMemset(d_sig_doppler_wiped,0,size));
checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips));
checkCudaErrors(cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips));
d_code_length_chips=code_length_chips;
checkCudaErrors(cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators));
checkCudaErrors(cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators));
//scalars
checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
checkCudaErrors(cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators));
// Launch the Vector Add CUDA Kernel
threadsPerBlock = 256;
blocksPerGrid =(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock;
cudaStreamCreate (&stream1) ;
cudaStreamCreate (&stream2) ;
return true;
}
bool cuda_multicorrelator::set_local_code_and_taps(
int code_length_chips,
const std::complex<float>* local_codes_in,
float *shifts_chips,
int n_correlators
)
{
// local code CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1));
d_code_length_chips=(float)code_length_chips;
// Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
checkCudaErrors(cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
cudaMemcpyHostToDevice,stream1));
return true;
}
bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
std::complex<float>* corr_out,
const std::complex<float>* sig_in,
const std::complex<float>* local_codes_in,
float rem_carrier_phase_in_rad,
float phase_step_rad,
const int *shifts_samples,
int signal_length_samples,
int n_correlators)
{
size_t memSize = signal_length_samples * sizeof(std::complex<float>);
// input signal CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_sig_in, sig_in, memSize,
cudaMemcpyHostToDevice, stream1));
//***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
//checkCudaErrors(cudaMemcpyAsync(d_nco_in, nco_in, memSize,
// cudaMemcpyHostToDevice, stream1));
// old version: all local codes are independent vectors
//checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize*n_correlators,
// cudaMemcpyHostToDevice, stream2));
// new version: only one vector with extra samples to shift the local code for the correlator set
// Required: The last correlator tap in d_shifts_samples has the largest sample shift
// local code CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize+sizeof(std::complex<float>)*shifts_samples[n_correlators-1],
cudaMemcpyHostToDevice, stream2));
// Correlator shifts vector CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_shifts_samples, shifts_samples, sizeof(int)*n_correlators,
cudaMemcpyHostToDevice, stream2));
//Launch carrier wipe-off kernel here, while local codes are being copied to GPU!
checkCudaErrors(cudaStreamSynchronize(stream1));
CUDA_32fc_Doppler_wipeoff<<<blocksPerGrid, threadsPerBlock,0, stream1>>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples);
//printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid, threadsPerBlock);
//wait for Doppler wipeoff end...
checkCudaErrors(cudaStreamSynchronize(stream1));
checkCudaErrors(cudaStreamSynchronize(stream2));
//checkCudaErrors(cudaDeviceSynchronize());
//old
// scalarProdGPUCPXxN<<<blocksPerGrid, threadsPerBlock,0 ,stream2>>>(
// d_corr_out,
// d_sig_doppler_wiped,
// d_local_codes_in,
// 3,
// signal_length_samples
// );
//new
//launch the multitap correlator
scalarProdGPUCPXxN_shifts<<<blocksPerGrid, threadsPerBlock,0 ,stream2>>>(
d_corr_out,
d_sig_doppler_wiped,
d_local_codes_in,
d_shifts_samples,
n_correlators,
signal_length_samples
);
checkCudaErrors(cudaGetLastError());
//wait for correlators end...
checkCudaErrors(cudaStreamSynchronize(stream2));
// Copy the device result vector in device memory to the host result vector
// in host memory.
//scalar products (correlators outputs)
checkCudaErrors(cudaMemcpy(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
cudaMemcpyDeviceToHost));
return true;
}
bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
std::complex<float>* corr_out,
const std::complex<float>* sig_in,
float rem_carrier_phase_in_rad,
float phase_step_rad,
float code_phase_step_chips,
float rem_code_phase_chips,
int signal_length_samples,
int n_correlators)
{
size_t memSize = signal_length_samples * sizeof(std::complex<float>);
// input signal CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_sig_in, sig_in, memSize,
cudaMemcpyHostToDevice, stream2));
//***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
//Launch carrier wipe-off kernel here, while local codes are being copied to GPU!
checkCudaErrors(cudaStreamSynchronize(stream2));
CUDA_32fc_Doppler_wipeoff<<<blocksPerGrid, threadsPerBlock,0, stream2>>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples);
//wait for Doppler wipeoff end...
checkCudaErrors(cudaStreamSynchronize(stream1));
checkCudaErrors(cudaStreamSynchronize(stream2));
//launch the multitap correlator with integrated local code resampler!
scalarProdGPUCPXxN_shifts_chips<<<blocksPerGrid, threadsPerBlock,0 ,stream1>>>(
d_corr_out,
d_sig_doppler_wiped,
d_local_codes_in,
d_shifts_chips,
d_code_length_chips,
code_phase_step_chips,
rem_code_phase_chips,
n_correlators,
signal_length_samples
);
checkCudaErrors(cudaGetLastError());
//wait for correlators end...
checkCudaErrors(cudaStreamSynchronize(stream1));
// Copy the device result vector in device memory to the host result vector
// in host memory.
//scalar products (correlators outputs)
checkCudaErrors(cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
cudaMemcpyDeviceToHost,stream1));
checkCudaErrors(cudaStreamSynchronize(stream1));
return true;
}
cuda_multicorrelator::cuda_multicorrelator()
{
d_sig_in=NULL;
d_nco_in=NULL;
d_sig_doppler_wiped=NULL;
d_local_codes_in=NULL;
d_shifts_samples=NULL;
d_shifts_chips=NULL;
d_corr_out=NULL;
threadsPerBlock=0;
blocksPerGrid=0;
d_code_length_chips=0;
}
bool cuda_multicorrelator::free_cuda()
{
// Free device global memory
if (d_sig_in!=NULL) cudaFree(d_sig_in);
if (d_nco_in!=NULL) cudaFree(d_nco_in);
if (d_sig_doppler_wiped!=NULL) cudaFree(d_sig_doppler_wiped);
if (d_local_codes_in!=NULL) cudaFree(d_local_codes_in);
if (d_corr_out!=NULL) cudaFree(d_corr_out);
if (d_shifts_samples!=NULL) cudaFree(d_shifts_samples);
if (d_shifts_chips!=NULL) cudaFree(d_shifts_chips);
cudaStreamDestroy(stream1) ;
cudaStreamDestroy(stream2) ;
// Reset the device and exit
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
//checkCudaErrors(cudaDeviceReset());
return true;
}

View File

@ -0,0 +1,171 @@
/*!
* \file cuda_multicorrelator.h
* \brief High optimized CUDA GPU vector multiTAP correlator class
* \authors <ul>
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
* </ul>
*
* Class that implements a high optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef CUDA_MULTICORRELATOR_H_
#define CUDA_MULTICORRELATOR_H_
#ifdef __CUDACC__
#define CUDA_CALLABLE_MEMBER_GLOBAL __global__
#define CUDA_CALLABLE_MEMBER_DEVICE __device__
#else
#define CUDA_CALLABLE_MEMBER_GLOBAL
#define CUDA_CALLABLE_MEMBER_DEVICE
#endif
#include <complex>
#include <cuda.h>
// CUDA runtime
#include <cuda_runtime.h>
// GPU new internal data types for complex numbers
struct GPU_Complex {
float r;
float i;
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex() {};
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex( float a, float b ) : r(a), i(b) {}
CUDA_CALLABLE_MEMBER_DEVICE float magnitude2( void ) {
return r * r + i * i;
}
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator*(const GPU_Complex& a) {
#ifdef __CUDACC__
return GPU_Complex(__fmul_rn(r,a.r) - __fmul_rn(i,a.i), __fmul_rn(i,a.r) + __fmul_rn(r,a.i));
#else
return GPU_Complex(r*a.r - i*a.i, i*a.r + r*a.i);
#endif
}
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator+(const GPU_Complex& a) {
return GPU_Complex(r+a.r, i+a.i);
}
CUDA_CALLABLE_MEMBER_DEVICE void operator+=(const GPU_Complex& a) {
r+=a.r;
i+=a.i;
}
CUDA_CALLABLE_MEMBER_DEVICE void multiply_acc(const GPU_Complex& a, const GPU_Complex& b)
{
//c=a*b+c
//real part
//c.r=(a.r*b.r - a.i*b.i)+c.r
#ifdef __CUDACC__
r=__fmaf_rn(a.r,b.r,r);
r=__fmaf_rn(-a.i,b.i,r);
//imag part
i=__fmaf_rn(a.i,b.r,i);
i=__fmaf_rn(a.r,b.i,i);
#else
r=(a.r*b.r - a.i*b.i)+r;
i=(a.i*b.r - a.r*b.i)+i;
#endif
}
};
struct GPU_Complex_Short {
float r;
float i;
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short( short int a, short int b ) : r(a), i(b) {}
CUDA_CALLABLE_MEMBER_DEVICE float magnitude2( void ) {
return r * r + i * i;
}
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator*(const GPU_Complex_Short& a) {
return GPU_Complex_Short(r*a.r - i*a.i, i*a.r + r*a.i);
}
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator+(const GPU_Complex_Short& a) {
return GPU_Complex_Short(r+a.r, i+a.i);
}
};
/*!
* \brief Class that implements carrier wipe-off and correlators using NVIDIA CUDA GPU accelerators.
*/
class cuda_multicorrelator
{
public:
cuda_multicorrelator();
bool init_cuda(const int argc, const char **argv, int signal_length_samples, int local_codes_length_samples, int n_correlators);
bool init_cuda_integrated_resampler(
const int argc, const char **argv,
int signal_length_samples,
int code_length_chips,
int n_correlators
);
bool set_local_code_and_taps(
int code_length_chips,
const std::complex<float>* local_codes_in,
float *shifts_chips,
int n_correlators
);
bool free_cuda();
bool Carrier_wipeoff_multicorrelator_cuda(
std::complex<float>* corr_out,
const std::complex<float>* sig_in,
const std::complex<float>* local_codes_in,
float rem_carrier_phase_in_rad,
float phase_step_rad,
const int *shifts_samples,
int signal_length_samples,
int n_correlators);
bool Carrier_wipeoff_multicorrelator_resampler_cuda(
std::complex<float>* corr_out,
const std::complex<float>* sig_in,
float rem_carrier_phase_in_rad,
float phase_step_rad,
float code_phase_step_chips,
float rem_code_phase_chips,
int signal_length_samples,
int n_correlators);
private:
// Allocate the device input vectors
GPU_Complex *d_sig_in;
GPU_Complex *d_nco_in;
GPU_Complex *d_sig_doppler_wiped;
GPU_Complex *d_local_codes_in;
GPU_Complex *d_corr_out;
int *d_shifts_samples;
float *d_shifts_chips;
float d_code_length_chips;
int threadsPerBlock;
int blocksPerGrid;
cudaStream_t stream1;
cudaStream_t stream2;
int num_gpu_devices;
int selected_device;
};
#endif /* CUDA_MULTICORRELATOR_H_ */

View File

@ -0,0 +1,151 @@
/*
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* CUda UTility Library */
#ifndef _EXCEPTION_H_
#define _EXCEPTION_H_
// includes, system
#include <exception>
#include <stdexcept>
#include <iostream>
#include <stdlib.h>
//! Exception wrapper.
//! @param Std_Exception Exception out of namespace std for easy typing.
template<class Std_Exception>
class Exception : public Std_Exception
{
public:
//! @brief Static construction interface
//! @return Alwayss throws ( Located_Exception<Exception>)
//! @param file file in which the Exception occurs
//! @param line line in which the Exception occurs
//! @param detailed details on the code fragment causing the Exception
static void throw_it(const char *file,
const int line,
const char *detailed = "-");
//! Static construction interface
//! @return Alwayss throws ( Located_Exception<Exception>)
//! @param file file in which the Exception occurs
//! @param line line in which the Exception occurs
//! @param detailed details on the code fragment causing the Exception
static void throw_it(const char *file,
const int line,
const std::string &detailed);
//! Destructor
virtual ~Exception() throw();
private:
//! Constructor, default (private)
Exception();
//! Constructor, standard
//! @param str string returned by what()
Exception(const std::string &str);
};
////////////////////////////////////////////////////////////////////////////////
//! Exception handler function for arbitrary exceptions
//! @param ex exception to handle
////////////////////////////////////////////////////////////////////////////////
template<class Exception_Typ>
inline void
handleException(const Exception_Typ &ex)
{
std::cerr << ex.what() << std::endl;
exit(EXIT_FAILURE);
}
//! Convenience macros
//! Exception caused by dynamic program behavior, e.g. file does not exist
#define RUNTIME_EXCEPTION( msg) \
Exception<std::runtime_error>::throw_it( __FILE__, __LINE__, msg)
//! Logic exception in program, e.g. an assert failed
#define LOGIC_EXCEPTION( msg) \
Exception<std::logic_error>::throw_it( __FILE__, __LINE__, msg)
//! Out of range exception
#define RANGE_EXCEPTION( msg) \
Exception<std::range_error>::throw_it( __FILE__, __LINE__, msg)
////////////////////////////////////////////////////////////////////////////////
//! Implementation
// includes, system
#include <sstream>
////////////////////////////////////////////////////////////////////////////////
//! Static construction interface.
//! @param Exception causing code fragment (file and line) and detailed infos.
////////////////////////////////////////////////////////////////////////////////
/*static*/ template<class Std_Exception>
void
Exception<Std_Exception>::
throw_it(const char *file, const int line, const char *detailed)
{
std::stringstream s;
// Quiet heavy-weight but exceptions are not for
// performance / release versions
s << "Exception in file '" << file << "' in line " << line << "\n"
<< "Detailed description: " << detailed << "\n";
throw Exception(s.str());
}
////////////////////////////////////////////////////////////////////////////////
//! Static construction interface.
//! @param Exception causing code fragment (file and line) and detailed infos.
////////////////////////////////////////////////////////////////////////////////
/*static*/ template<class Std_Exception>
void
Exception<Std_Exception>::
throw_it(const char *file, const int line, const std::string &msg)
{
throw_it(file, line, msg.c_str());
}
////////////////////////////////////////////////////////////////////////////////
//! Constructor, default (private).
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::Exception() :
Std_Exception("Unknown Exception.\n")
{ }
////////////////////////////////////////////////////////////////////////////////
//! Constructor, standard (private).
//! String returned by what().
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::Exception(const std::string &s) :
Std_Exception(s)
{ }
////////////////////////////////////////////////////////////////////////////////
//! Destructor
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::~Exception() throw() { }
// functions, exported
#endif // #ifndef _EXCEPTION_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,517 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// Helper functions for CUDA Driver API error handling (make sure that CUDA_H is included in your projects)
#ifndef HELPER_CUDA_DRVAPI_H
#define HELPER_CUDA_DRVAPI_H
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <helper_string.h>
#include <drvapi_error_string.h>
#ifndef MAX
#define MAX(a,b) (a > b ? a : b)
#endif
#ifndef HELPER_CUDA_H
inline int ftoi(float value)
{
return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
}
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
////////////////////////////////////////////////////////////////////////////////
// These are CUDA Helper functions
// add a level of protection to the CUDA SDK samples, let's force samples to explicitly include CUDA.H
#ifdef __cuda_cuda_h__
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#ifndef checkCudaErrors
#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
// These are the inline versions for all of the SDK helper functions
inline void __checkCudaErrors(CUresult err, const char *file, const int line)
{
if (CUDA_SUCCESS != err)
{
fprintf(stderr, "checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, line %i.\n",
err, getCudaDrvErrorString(err), file, line);
exit(EXIT_FAILURE);
}
}
#endif
#ifdef getLastCudaDrvErrorMsg
#undef getLastCudaDrvErrorMsg
#endif
#define getLastCudaDrvErrorMsg(msg) __getLastCudaDrvErrorMsg (msg, __FILE__, __LINE__)
inline void __getLastCudaDrvErrorMsg(const char *msg, const char *file, const int line)
{
CUresult err = cuCtxSynchronize();
if (CUDA_SUCCESS != err)
{
fprintf(stderr, "getLastCudaDrvErrorMsg -> %s", msg);
fprintf(stderr, "getLastCudaDrvErrorMsg -> cuCtxSynchronize API error = %04d \"%s\" in file <%s>, line %i.\n",
err, getCudaDrvErrorString(err), file, line);
exit(EXIT_FAILURE);
}
}
// This function wraps the CUDA Driver API into a template function
template <class T>
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
{
CUresult error_result = cuDeviceGetAttribute(attribute, device_attribute, device);
if (error_result != CUDA_SUCCESS)
{
printf("cuDeviceGetAttribute returned %d\n-> %s\n", (int)error_result, getCudaDrvErrorString(error_result));
exit(EXIT_SUCCESS);
}
}
#endif
// Beginning of GPU Architecture definitions
inline int _ConvertSMVer2CoresDRV(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct
{
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] =
{
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
{ 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
{ 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
{ 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
{ 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
{ -1, -1 }
};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1)
{
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
{
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
// If we don't find the values, we default use the previous one to run properly
printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
return nGpuArchCoresPerSM[index-1].Cores;
}
// end of GPU Architecture definitions
#ifdef __cuda_cuda_h__
// General GPU Device CUDA Initialization
inline int gpuDeviceInitDRV(int ARGC, const char **ARGV)
{
int cuDevice = 0;
int deviceCount = 0;
CUresult err = cuInit(0);
if (CUDA_SUCCESS == err)
{
checkCudaErrors(cuDeviceGetCount(&deviceCount));
}
if (deviceCount == 0)
{
fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n");
exit(EXIT_FAILURE);
}
int dev = 0;
dev = getCmdLineArgumentInt(ARGC, (const char **) ARGV, "device=");
if (dev < 0)
{
dev = 0;
}
if (dev > deviceCount-1)
{
fprintf(stderr, "\n");
fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
fprintf(stderr, ">> cudaDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
fprintf(stderr, "\n");
return -dev;
}
checkCudaErrors(cuDeviceGet(&cuDevice, dev));
char name[100];
cuDeviceGetName(name, 100, cuDevice);
int computeMode;
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
if (computeMode == CU_COMPUTEMODE_PROHIBITED)
{
fprintf(stderr, "Error: device is running in <CU_COMPUTEMODE_PROHIBITED>, no threads can use this CUDA Device.\n");
return -1;
}
if (checkCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == false)
{
printf("gpuDeviceInitDRV() Using CUDA Device [%d]: %s\n", dev, name);
}
return dev;
}
// This function returns the best GPU based on performance
inline int gpuGetMaxGflopsDeviceIdDRV()
{
CUdevice current_device = 0;
CUdevice max_perf_device = 0;
int device_count = 0;
int sm_per_multiproc = 0;
unsigned long long max_compute_perf = 0;
int best_SM_arch = 0;
int major = 0;
int minor = 0;
int multiProcessorCount;
int clockRate;
int devices_prohibited = 0;
cuInit(0);
checkCudaErrors(cuDeviceGetCount(&device_count));
if (device_count == 0)
{
fprintf(stderr, "gpuGetMaxGflopsDeviceIdDRV error: no devices supporting CUDA\n");
exit(EXIT_FAILURE);
}
// Find the best major SM Architecture GPU device
while (current_device < device_count)
{
checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
if (major > 0 && major < 9999)
{
best_SM_arch = MAX(best_SM_arch, major);
}
current_device++;
}
// Find the best CUDA capable GPU device
current_device = 0;
while (current_device < device_count)
{
checkCudaErrors(cuDeviceGetAttribute(&multiProcessorCount,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
current_device));
checkCudaErrors(cuDeviceGetAttribute(&clockRate,
CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
current_device));
checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
int computeMode;
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
if (computeMode != CU_COMPUTEMODE_PROHIBITED)
{
if (major == 9999 && minor == 9999)
{
sm_per_multiproc = 1;
}
else
{
sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor);
}
unsigned long long compute_perf = (unsigned long long) (multiProcessorCount * sm_per_multiproc * clockRate);
if (compute_perf > max_compute_perf)
{
// If we find GPU with SM major > 2, search only these
if (best_SM_arch > 2)
{
// If our device==dest_SM_arch, choose this, or else pass
if (major == best_SM_arch)
{
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
else
{
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
}
else
{
devices_prohibited++;
}
++current_device;
}
if (devices_prohibited == device_count)
{
fprintf(stderr, "gpuGetMaxGflopsDeviceIdDRV error: all devices have compute mode prohibited.\n");
exit(EXIT_FAILURE);
}
return max_perf_device;
}
// This function returns the best Graphics GPU based on performance
inline int gpuGetMaxGflopsGLDeviceIdDRV()
{
CUdevice current_device = 0, max_perf_device = 0;
int device_count = 0, sm_per_multiproc = 0;
int max_compute_perf = 0, best_SM_arch = 0;
int major = 0, minor = 0, multiProcessorCount, clockRate;
int bTCC = 0;
int devices_prohibited = 0;
char deviceName[256];
cuInit(0);
checkCudaErrors(cuDeviceGetCount(&device_count));
if (device_count == 0)
{
fprintf(stderr, "gpuGetMaxGflopsGLDeviceIdDRV error: no devices supporting CUDA\n");
exit(EXIT_FAILURE);
}
// Find the best major SM Architecture GPU device that are graphics devices
while (current_device < device_count)
{
checkCudaErrors(cuDeviceGetName(deviceName, 256, current_device));
checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
#if CUDA_VERSION >= 3020
checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device));
#else
// Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
if (deviceName[0] == 'T')
{
bTCC = 1;
}
#endif
int computeMode;
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
if (computeMode != CU_COMPUTEMODE_PROHIBITED)
{
if (!bTCC)
{
if (major > 0 && major < 9999)
{
best_SM_arch = MAX(best_SM_arch, major);
}
}
}
else
{
devices_prohibited++;
}
current_device++;
}
if (devices_prohibited == device_count)
{
fprintf(stderr, "gpuGetMaxGflopsGLDeviceIdDRV error: all devices have compute mode prohibited.\n");
exit(EXIT_FAILURE);
}
// Find the best CUDA capable GPU device
current_device = 0;
while (current_device < device_count)
{
checkCudaErrors(cuDeviceGetAttribute(&multiProcessorCount,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
current_device));
checkCudaErrors(cuDeviceGetAttribute(&clockRate,
CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
current_device));
checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
#if CUDA_VERSION >= 3020
checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device));
#else
// Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
if (deviceName[0] == 'T')
{
bTCC = 1;
}
#endif
int computeMode;
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
if (computeMode != CU_COMPUTEMODE_PROHIBITED)
{
if (major == 9999 && minor == 9999)
{
sm_per_multiproc = 1;
}
else
{
sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor);
}
// If this is a Tesla based GPU and SM 2.0, and TCC is disabled, this is a contendor
if (!bTCC) // Is this GPU running the TCC driver? If so we pass on this
{
int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate;
if (compute_perf > max_compute_perf)
{
// If we find GPU with SM major > 2, search only these
if (best_SM_arch > 2)
{
// If our device = dest_SM_arch, then we pick this one
if (major == best_SM_arch)
{
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
else
{
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
}
}
++current_device;
}
return max_perf_device;
}
// General initialization call to pick the best CUDA Device
inline CUdevice findCudaDeviceDRV(int argc, const char **argv)
{
CUdevice cuDevice;
int devID = 0;
// If the command-line has a device number specified, use it
if (checkCmdLineFlag(argc, (const char **)argv, "device"))
{
devID = gpuDeviceInitDRV(argc, argv);
if (devID < 0)
{
printf("exiting...\n");
exit(EXIT_SUCCESS);
}
}
else
{
// Otherwise pick the device with highest Gflops/s
char name[100];
devID = gpuGetMaxGflopsDeviceIdDRV();
checkCudaErrors(cuDeviceGet(&cuDevice, devID));
cuDeviceGetName(name, 100, cuDevice);
printf("> Using CUDA Device [%d]: %s\n", devID, name);
}
cuDeviceGet(&cuDevice, devID);
return cuDevice;
}
// This function will pick the best CUDA device available with OpenGL interop
inline CUdevice findCudaGLDeviceDRV(int argc, const char **argv)
{
CUdevice cuDevice;
int devID = 0;
// If the command-line has a device number specified, use it
if (checkCmdLineFlag(argc, (const char **)argv, "device"))
{
devID = gpuDeviceInitDRV(argc, (const char **)argv);
if (devID < 0)
{
printf("no CUDA capable devices found, exiting...\n");
exit(EXIT_SUCCESS);
}
}
else
{
char name[100];
// Otherwise pick the device with highest Gflops/s
devID = gpuGetMaxGflopsGLDeviceIdDRV();
checkCudaErrors(cuDeviceGet(&cuDevice, devID));
cuDeviceGetName(name, 100, cuDevice);
printf("> Using CUDA/GL Device [%d]: %s\n", devID, name);
}
return devID;
}
// General check for CUDA GPU SM Capabilities
inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version, int devID)
{
CUdevice cuDevice;
char name[256];
int major = 0, minor = 0;
checkCudaErrors(cuDeviceGet(&cuDevice, devID));
checkCudaErrors(cuDeviceGetName(name, 100, cuDevice));
checkCudaErrors(cuDeviceComputeCapability(&major, &minor, devID));
if ((major > major_version) ||
(major == major_version && minor >= minor_version))
{
printf("> Device %d: <%16s >, Compute SM %d.%d detected\n", devID, name, major, minor);
return true;
}
else
{
printf("No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
return false;
}
}
#endif
// end of CUDA Helper Functions
#endif

View File

@ -0,0 +1,165 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#ifndef HELPER_CUDA_GL_H
#define HELPER_CUDA_GL_H
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// includes, graphics
#if defined (__APPLE__) || defined(MACOSX)
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#else
#include <GL/gl.h>
#include <GL/glu.h>
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
#ifdef __DRIVER_TYPES_H__
#ifndef DEVICE_RESET
#define DEVICE_RESET cudaDeviceReset()
#endif
#else
#ifndef DEVICE_RESET
#define DEVICE_RESET
#endif
#endif
#ifdef __CUDA_GL_INTEROP_H__
////////////////////////////////////////////////////////////////////////////////
// These are CUDA OpenGL Helper functions
inline int gpuGLDeviceInit(int ARGC, const char **ARGV)
{
int deviceCount;
checkCudaErrors(cudaGetDeviceCount(&deviceCount));
if (deviceCount == 0)
{
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
int dev = 0;
dev = getCmdLineArgumentInt(ARGC, ARGV, "device=");
if (dev < 0)
{
dev = 0;
}
if (dev > deviceCount-1)
{
fprintf(stderr, "\n");
fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
fprintf(stderr, ">> gpuGLDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
fprintf(stderr, "\n");
return -dev;
}
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
if (deviceProp.computeMode == cudaComputeModeProhibited)
{
fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
return -1;
}
if (deviceProp.major < 1)
{
fprintf(stderr, "Error: device does not support CUDA.\n");
exit(EXIT_FAILURE);
}
if (checkCmdLineFlag(ARGC, ARGV, "quiet") == false)
{
fprintf(stderr, "Using device %d: %s\n", dev, deviceProp.name);
}
checkCudaErrors(cudaGLSetGLDevice(dev));
return dev;
}
// This function will pick the best CUDA device available with OpenGL interop
inline int findCudaGLDevice(int argc, const char **argv)
{
int devID = 0;
// If the command-line has a device number specified, use it
if (checkCmdLineFlag(argc, (const char **)argv, "device"))
{
devID = gpuGLDeviceInit(argc, (const char **)argv);
if (devID < 0)
{
printf("no CUDA capable devices found, exiting...\n");
DEVICE_RESET
exit(EXIT_SUCCESS);
}
}
else
{
// Otherwise pick the device with highest Gflops/s
devID = gpuGetMaxGflopsDeviceId();
cudaGLSetGLDevice(devID);
}
return devID;
}
////////////////////////////////////////////////////////////////////////////
//! Check for OpenGL error
//! @return bool if no GL error has been encountered, otherwise 0
//! @param file __FILE__ macro
//! @param line __LINE__ macro
//! @note The GL error is listed on stderr
//! @note This function should be used via the CHECK_ERROR_GL() macro
////////////////////////////////////////////////////////////////////////////
inline bool
sdkCheckErrorGL(const char *file, const int line)
{
bool ret_val = true;
// check for error
GLenum gl_error = glGetError();
if (gl_error != GL_NO_ERROR)
{
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
char tmpStr[512];
// NOTE: "%s(%i) : " allows Visual Studio to directly jump to the file at the right line
// when the user double clicks on the error line in the Output pane. Like any compile error.
sprintf_s(tmpStr, 255, "\n%s(%i) : GL Error : %s\n\n", file, line, gluErrorString(gl_error));
fprintf(stderr, "%s", tmpStr);
#endif
fprintf(stderr, "GL Error in file '%s' in line %d :\n", file, line);
fprintf(stderr, "%s\n", gluErrorString(gl_error));
ret_val = false;
}
return ret_val;
}
#define SDK_CHECK_ERROR_GL() \
if( false == sdkCheckErrorGL( __FILE__, __LINE__)) { \
DEVICE_RESET \
exit(EXIT_FAILURE); \
}
#endif
#endif

View File

@ -0,0 +1,42 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// These are helper functions for the SDK samples (string parsing, timers, image helpers, etc)
#ifndef HELPER_FUNCTIONS_H
#define HELPER_FUNCTIONS_H
#ifdef WIN32
#pragma warning(disable:4996)
#endif
// includes, project
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <assert.h>
#include <exception.h>
#include <math.h>
#include <fstream>
#include <vector>
#include <iostream>
#include <algorithm>
// includes, timer, string parsing, image helpers
#include <helper_timer.h> // helper functions for timers
#include <helper_string.h> // helper functions for string parsing
#include <helper_image.h> // helper functions for image compare, dump, data comparisons
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
#endif // HELPER_FUNCTIONS_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,516 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// These are helper functions for the SDK samples (string parsing, timers, etc)
#ifndef STRING_HELPER_H
#define STRING_HELPER_H
#include <stdio.h>
#include <stdlib.h>
#include <fstream>
#include <string>
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#ifndef _CRT_SECURE_NO_DEPRECATE
#define _CRT_SECURE_NO_DEPRECATE
#endif
#ifndef STRCASECMP
#define STRCASECMP _stricmp
#endif
#ifndef STRNCASECMP
#define STRNCASECMP _strnicmp
#endif
#ifndef STRCPY
#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
#endif
#ifndef FOPEN
#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result != 0)
#endif
#ifndef SSCANF
#define SSCANF sscanf_s
#endif
#ifndef SPRINTF
#define SPRINTF sprintf_s
#endif
#else // Linux Includes
#include <string.h>
#include <strings.h>
#ifndef STRCASECMP
#define STRCASECMP strcasecmp
#endif
#ifndef STRNCASECMP
#define STRNCASECMP strncasecmp
#endif
#ifndef STRCPY
#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
#endif
#ifndef FOPEN
#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result == NULL)
#endif
#ifndef SSCANF
#define SSCANF sscanf
#endif
#ifndef SPRINTF
#define SPRINTF sprintf
#endif
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// CUDA Utility Helper Functions
inline int stringRemoveDelimiter(char delimiter, const char *string)
{
int string_start = 0;
while (string[string_start] == delimiter)
{
string_start++;
}
if (string_start >= (int)strlen(string)-1)
{
return 0;
}
return string_start;
}
inline int getFileExtension(char *filename, char **extension)
{
int string_length = (int)strlen(filename);
while (filename[string_length--] != '.')
{
if (string_length == 0)
break;
}
if (string_length > 0) string_length += 2;
if (string_length == 0)
*extension = NULL;
else
*extension = &filename[string_length];
return string_length;
}
inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
{
bool bFound = false;
if (argc >= 1)
{
for (int i=1; i < argc; i++)
{
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
const char *equal_pos = strchr(string_argv, '=');
int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
int length = (int)strlen(string_ref);
if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length))
{
bFound = true;
continue;
}
}
}
return bFound;
}
// This function wraps the CUDA Driver API into a template function
template <class T>
inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value)
{
bool bFound = false;
if (argc >= 1)
{
for (int i=1; i < argc; i++)
{
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = (int)strlen(string_ref);
if (!STRNCASECMP(string_argv, string_ref, length))
{
if (length+1 <= (int)strlen(string_argv))
{
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
*value = (T)atoi(&string_argv[length + auto_inc]);
}
bFound = true;
i=argc;
}
}
}
return bFound;
}
inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
{
bool bFound = false;
int value = -1;
if (argc >= 1)
{
for (int i=1; i < argc; i++)
{
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = (int)strlen(string_ref);
if (!STRNCASECMP(string_argv, string_ref, length))
{
if (length+1 <= (int)strlen(string_argv))
{
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
value = atoi(&string_argv[length + auto_inc]);
}
else
{
value = 0;
}
bFound = true;
continue;
}
}
}
if (bFound)
{
return value;
}
else
{
return 0;
}
}
inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref)
{
bool bFound = false;
float value = -1;
if (argc >= 1)
{
for (int i=1; i < argc; i++)
{
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = (int)strlen(string_ref);
if (!STRNCASECMP(string_argv, string_ref, length))
{
if (length+1 <= (int)strlen(string_argv))
{
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
value = (float)atof(&string_argv[length + auto_inc]);
}
else
{
value = 0.f;
}
bFound = true;
continue;
}
}
}
if (bFound)
{
return value;
}
else
{
return 0;
}
}
inline bool getCmdLineArgumentString(const int argc, const char **argv,
const char *string_ref, char **string_retval)
{
bool bFound = false;
if (argc >= 1)
{
for (int i=1; i < argc; i++)
{
int string_start = stringRemoveDelimiter('-', argv[i]);
char *string_argv = (char *)&argv[i][string_start];
int length = (int)strlen(string_ref);
if (!STRNCASECMP(string_argv, string_ref, length))
{
*string_retval = &string_argv[length+1];
bFound = true;
continue;
}
}
}
if (!bFound)
{
*string_retval = NULL;
}
return bFound;
}
//////////////////////////////////////////////////////////////////////////////
//! Find the path for a file assuming that
//! files are found in the searchPath.
//!
//! @return the path if succeeded, otherwise 0
//! @param filename name of the file
//! @param executable_path optional absolute path of the executable
//////////////////////////////////////////////////////////////////////////////
inline char *sdkFindFilePath(const char *filename, const char *executable_path)
{
// <executable_name> defines a variable that is replaced with the name of the executable
// Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files)
// The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc
const char *searchPath[] =
{
"./", // same dir
"./common/", // "/common/" subdir
"./common/data/", // "/common/data/" subdir
"./data/", // "/data/" subdir
"./src/", // "/src/" subdir
"./src/<executable_name>/data/", // "/src/<executable_name>/data/" subdir
"./inc/", // "/inc/" subdir
"./0_Simple/", // "/0_Simple/" subdir
"./1_Utilities/", // "/1_Utilities/" subdir
"./2_Graphics/", // "/2_Graphics/" subdir
"./3_Imaging/", // "/3_Imaging/" subdir
"./4_Finance/", // "/4_Finance/" subdir
"./5_Simulations/", // "/5_Simulations/" subdir
"./6_Advanced/", // "/6_Advanced/" subdir
"./7_CUDALibraries/", // "/7_CUDALibraries/" subdir
"./8_Android/", // "/8_Android/" subdir
"./samples/", // "/samples/" subdir
"../", // up 1 in tree
"../common/", // up 1 in tree, "/common/" subdir
"../common/data/", // up 1 in tree, "/common/data/" subdir
"../data/", // up 1 in tree, "/data/" subdir
"../src/", // up 1 in tree, "/src/" subdir
"../inc/", // up 1 in tree, "/inc/" subdir
"../0_Simple/<executable_name>/data/", // up 1 in tree, "/0_Simple/<executable_name>/" subdir
"../1_Utilities/<executable_name>/data/", // up 1 in tree, "/1_Utilities/<executable_name>/" subdir
"../2_Graphics/<executable_name>/data/", // up 1 in tree, "/2_Graphics/<executable_name>/" subdir
"../3_Imaging/<executable_name>/data/", // up 1 in tree, "/3_Imaging/<executable_name>/" subdir
"../4_Finance/<executable_name>/data/", // up 1 in tree, "/4_Finance/<executable_name>/" subdir
"../5_Simulations/<executable_name>/data/", // up 1 in tree, "/5_Simulations/<executable_name>/" subdir
"../6_Advanced/<executable_name>/data/", // up 1 in tree, "/6_Advanced/<executable_name>/" subdir
"../7_CUDALibraries/<executable_name>/data/",// up 1 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../8_Android/<executable_name>/data/", // up 1 in tree, "/8_Android/<executable_name>/" subdir
"../samples/<executable_name>/data/", // up 1 in tree, "/samples/<executable_name>/" subdir
"../../", // up 2 in tree
"../../common/", // up 2 in tree, "/common/" subdir
"../../common/data/", // up 2 in tree, "/common/data/" subdir
"../../data/", // up 2 in tree, "/data/" subdir
"../../src/", // up 2 in tree, "/src/" subdir
"../../inc/", // up 2 in tree, "/inc/" subdir
"../../sandbox/<executable_name>/data/", // up 2 in tree, "/sandbox/<executable_name>/" subdir
"../../0_Simple/<executable_name>/data/", // up 2 in tree, "/0_Simple/<executable_name>/" subdir
"../../1_Utilities/<executable_name>/data/", // up 2 in tree, "/1_Utilities/<executable_name>/" subdir
"../../2_Graphics/<executable_name>/data/", // up 2 in tree, "/2_Graphics/<executable_name>/" subdir
"../../3_Imaging/<executable_name>/data/", // up 2 in tree, "/3_Imaging/<executable_name>/" subdir
"../../4_Finance/<executable_name>/data/", // up 2 in tree, "/4_Finance/<executable_name>/" subdir
"../../5_Simulations/<executable_name>/data/", // up 2 in tree, "/5_Simulations/<executable_name>/" subdir
"../../6_Advanced/<executable_name>/data/", // up 2 in tree, "/6_Advanced/<executable_name>/" subdir
"../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../8_Android/<executable_name>/data/", // up 2 in tree, "/8_Android/<executable_name>/" subdir
"../../samples/<executable_name>/data/", // up 2 in tree, "/samples/<executable_name>/" subdir
"../../../", // up 3 in tree
"../../../src/<executable_name>/", // up 3 in tree, "/src/<executable_name>/" subdir
"../../../src/<executable_name>/data/", // up 3 in tree, "/src/<executable_name>/data/" subdir
"../../../src/<executable_name>/src/", // up 3 in tree, "/src/<executable_name>/src/" subdir
"../../../src/<executable_name>/inc/", // up 3 in tree, "/src/<executable_name>/inc/" subdir
"../../../sandbox/<executable_name>/", // up 3 in tree, "/sandbox/<executable_name>/" subdir
"../../../sandbox/<executable_name>/data/", // up 3 in tree, "/sandbox/<executable_name>/data/" subdir
"../../../sandbox/<executable_name>/src/", // up 3 in tree, "/sandbox/<executable_name>/src/" subdir
"../../../sandbox/<executable_name>/inc/", // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir
"../../../0_Simple/<executable_name>/data/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir
"../../../1_Utilities/<executable_name>/data/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
"../../../2_Graphics/<executable_name>/data/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
"../../../3_Imaging/<executable_name>/data/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
"../../../4_Finance/<executable_name>/data/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir
"../../../5_Simulations/<executable_name>/data/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
"../../../6_Advanced/<executable_name>/data/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
"../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../../8_Android/<executable_name>/data/", // up 3 in tree, "/8_Android/<executable_name>/" subdir
"../../../0_Simple/<executable_name>/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir
"../../../1_Utilities/<executable_name>/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
"../../../2_Graphics/<executable_name>/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
"../../../3_Imaging/<executable_name>/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
"../../../4_Finance/<executable_name>/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir
"../../../5_Simulations/<executable_name>/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
"../../../6_Advanced/<executable_name>/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
"../../../7_CUDALibraries/<executable_name>/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../../8_Android/<executable_name>/", // up 3 in tree, "/8_Android/<executable_name>/" subdir
"../../../samples/<executable_name>/data/", // up 3 in tree, "/samples/<executable_name>/" subdir
"../../../common/", // up 3 in tree, "../../../common/" subdir
"../../../common/data/", // up 3 in tree, "../../../common/data/" subdir
"../../../data/", // up 3 in tree, "../../../data/" subdir
"../../../../", // up 4 in tree
"../../../../src/<executable_name>/", // up 4 in tree, "/src/<executable_name>/" subdir
"../../../../src/<executable_name>/data/", // up 4 in tree, "/src/<executable_name>/data/" subdir
"../../../../src/<executable_name>/src/", // up 4 in tree, "/src/<executable_name>/src/" subdir
"../../../../src/<executable_name>/inc/", // up 4 in tree, "/src/<executable_name>/inc/" subdir
"../../../../sandbox/<executable_name>/", // up 4 in tree, "/sandbox/<executable_name>/" subdir
"../../../../sandbox/<executable_name>/data/", // up 4 in tree, "/sandbox/<executable_name>/data/" subdir
"../../../../sandbox/<executable_name>/src/", // up 4 in tree, "/sandbox/<executable_name>/src/" subdir
"../../../../sandbox/<executable_name>/inc/", // up 4 in tree, "/sandbox/<executable_name>/inc/" subdir
"../../../../0_Simple/<executable_name>/data/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir
"../../../../1_Utilities/<executable_name>/data/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
"../../../../2_Graphics/<executable_name>/data/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
"../../../../3_Imaging/<executable_name>/data/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
"../../../../4_Finance/<executable_name>/data/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir
"../../../../5_Simulations/<executable_name>/data/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
"../../../../6_Advanced/<executable_name>/data/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
"../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../../../8_Android/<executable_name>/data/", // up 4 in tree, "/8_Android/<executable_name>/" subdir
"../../../../0_Simple/<executable_name>/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir
"../../../../1_Utilities/<executable_name>/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
"../../../../2_Graphics/<executable_name>/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
"../../../../3_Imaging/<executable_name>/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
"../../../../4_Finance/<executable_name>/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir
"../../../../5_Simulations/<executable_name>/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
"../../../../6_Advanced/<executable_name>/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
"../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../../../8_Android/<executable_name>/", // up 4 in tree, "/8_Android/<executable_name>/" subdir
"../../../../samples/<executable_name>/data/", // up 4 in tree, "/samples/<executable_name>/" subdir
"../../../../common/", // up 4 in tree, "../../../common/" subdir
"../../../../common/data/", // up 4 in tree, "../../../common/data/" subdir
"../../../../data/", // up 4 in tree, "../../../data/" subdir
"../../../../../", // up 5 in tree
"../../../../../src/<executable_name>/", // up 5 in tree, "/src/<executable_name>/" subdir
"../../../../../src/<executable_name>/data/", // up 5 in tree, "/src/<executable_name>/data/" subdir
"../../../../../src/<executable_name>/src/", // up 5 in tree, "/src/<executable_name>/src/" subdir
"../../../../../src/<executable_name>/inc/", // up 5 in tree, "/src/<executable_name>/inc/" subdir
"../../../../../sandbox/<executable_name>/", // up 5 in tree, "/sandbox/<executable_name>/" subdir
"../../../../../sandbox/<executable_name>/data/", // up 5 in tree, "/sandbox/<executable_name>/data/" subdir
"../../../../../sandbox/<executable_name>/src/", // up 5 in tree, "/sandbox/<executable_name>/src/" subdir
"../../../../../sandbox/<executable_name>/inc/", // up 5 in tree, "/sandbox/<executable_name>/inc/" subdir
"../../../../../0_Simple/<executable_name>/data/", // up 5 in tree, "/0_Simple/<executable_name>/" subdir
"../../../../../1_Utilities/<executable_name>/data/", // up 5 in tree, "/1_Utilities/<executable_name>/" subdir
"../../../../../2_Graphics/<executable_name>/data/", // up 5 in tree, "/2_Graphics/<executable_name>/" subdir
"../../../../../3_Imaging/<executable_name>/data/", // up 5 in tree, "/3_Imaging/<executable_name>/" subdir
"../../../../../4_Finance/<executable_name>/data/", // up 5 in tree, "/4_Finance/<executable_name>/" subdir
"../../../../../5_Simulations/<executable_name>/data/",// up 5 in tree, "/5_Simulations/<executable_name>/" subdir
"../../../../../6_Advanced/<executable_name>/data/", // up 5 in tree, "/6_Advanced/<executable_name>/" subdir
"../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../../../../8_Android/<executable_name>/data/", // up 5 in tree, "/8_Android/<executable_name>/" subdir
"../../../../../samples/<executable_name>/data/", // up 5 in tree, "/samples/<executable_name>/" subdir
"../../../../../common/", // up 5 in tree, "../../../common/" subdir
"../../../../../common/data/", // up 5 in tree, "../../../common/data/" subdir
};
// Extract the executable name
std::string executable_name;
if (executable_path != 0)
{
executable_name = std::string(executable_path);
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// Windows path delimiter
size_t delimiter_pos = executable_name.find_last_of('\\');
executable_name.erase(0, delimiter_pos + 1);
if (executable_name.rfind(".exe") != std::string::npos)
{
// we strip .exe, only if the .exe is found
executable_name.resize(executable_name.size() - 4);
}
#else
// Linux & OSX path delimiter
size_t delimiter_pos = executable_name.find_last_of('/');
executable_name.erase(0,delimiter_pos+1);
#endif
}
// Loop over all search paths and return the first hit
for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i)
{
std::string path(searchPath[i]);
size_t executable_name_pos = path.find("<executable_name>");
// If there is executable_name variable in the searchPath
// replace it with the value
if (executable_name_pos != std::string::npos)
{
if (executable_path != 0)
{
path.replace(executable_name_pos, strlen("<executable_name>"), executable_name);
}
else
{
// Skip this path entry if no executable argument is given
continue;
}
}
#ifdef _DEBUG
printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
#endif
// Test if the file exists
path.append(filename);
FILE *fp;
FOPEN(fp, path.c_str(), "rb");
if (fp != NULL)
{
fclose(fp);
// File found
// returning an allocated array here for backwards compatibility reasons
char *file_path = (char *) malloc(path.length() + 1);
STRCPY(file_path, path.length() + 1, path.c_str());
return file_path;
}
if (fp)
{
fclose(fp);
}
}
// File not found
return 0;
}
#endif

View File

@ -0,0 +1,499 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// Helper Timing Functions
#ifndef HELPER_TIMER_H
#define HELPER_TIMER_H
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// includes, system
#include <vector>
// includes, project
#include <exception.h>
// Definition of the StopWatch Interface, this is used if we don't want to use the CUT functions
// But rather in a self contained class interface
class StopWatchInterface
{
public:
StopWatchInterface() {};
virtual ~StopWatchInterface() {};
public:
//! Start time measurement
virtual void start() = 0;
//! Stop time measurement
virtual void stop() = 0;
//! Reset time counters to zero
virtual void reset() = 0;
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
virtual float getTime() = 0;
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
virtual float getAverageTime() = 0;
};
//////////////////////////////////////////////////////////////////
// Begin Stopwatch timer class definitions for all OS platforms //
//////////////////////////////////////////////////////////////////
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// includes, system
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
#undef min
#undef max
//! Windows specific implementation of StopWatch
class StopWatchWin : public StopWatchInterface
{
public:
//! Constructor, default
StopWatchWin() :
start_time(), end_time(),
diff_time(0.0f), total_time(0.0f),
running(false), clock_sessions(0), freq(0), freq_set(false)
{
if (! freq_set)
{
// helper variable
LARGE_INTEGER temp;
// get the tick frequency from the OS
QueryPerformanceFrequency((LARGE_INTEGER *) &temp);
// convert to type in which it is needed
freq = ((double) temp.QuadPart) / 1000.0;
// rememeber query
freq_set = true;
}
};
// Destructor
~StopWatchWin() { };
public:
//! Start time measurement
inline void start();
//! Stop time measurement
inline void stop();
//! Reset time counters to zero
inline void reset();
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
inline float getTime();
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
inline float getAverageTime();
private:
// member variables
//! Start of measurement
LARGE_INTEGER start_time;
//! End of measurement
LARGE_INTEGER end_time;
//! Time difference between the last start and stop
float diff_time;
//! TOTAL time difference between starts and stops
float total_time;
//! flag if the stop watch is running
bool running;
//! Number of times clock has been started
//! and stopped to allow averaging
int clock_sessions;
//! tick frequency
double freq;
//! flag if the frequency has been set
bool freq_set;
};
// functions, inlined
////////////////////////////////////////////////////////////////////////////////
//! Start time measurement
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchWin::start()
{
QueryPerformanceCounter((LARGE_INTEGER *) &start_time);
running = true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop time measurement and increment add to the current diff_time summation
//! variable. Also increment the number of times this clock has been run.
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchWin::stop()
{
QueryPerformanceCounter((LARGE_INTEGER *) &end_time);
diff_time = (float)
(((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
total_time += diff_time;
clock_sessions++;
running = false;
}
////////////////////////////////////////////////////////////////////////////////
//! Reset the timer to 0. Does not change the timer running state but does
//! recapture this point in time as the current start time if it is running.
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchWin::reset()
{
diff_time = 0;
total_time = 0;
clock_sessions = 0;
if (running)
{
QueryPerformanceCounter((LARGE_INTEGER *) &start_time);
}
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned added to the
//! current diff_time sum, otherwise the current summed time difference alone
//! is returned.
////////////////////////////////////////////////////////////////////////////////
inline float
StopWatchWin::getTime()
{
// Return the TOTAL time to date
float retval = total_time;
if (running)
{
LARGE_INTEGER temp;
QueryPerformanceCounter((LARGE_INTEGER *) &temp);
retval += (float)
(((double)(temp.QuadPart - start_time.QuadPart)) / freq);
}
return retval;
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. for a single run based on the total number of COMPLETED runs
//! and the total time.
////////////////////////////////////////////////////////////////////////////////
inline float
StopWatchWin::getAverageTime()
{
return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f;
}
#else
// Declarations for Stopwatch on Linux and Mac OSX
// includes, system
#include <ctime>
#include <sys/time.h>
//! Windows specific implementation of StopWatch
class StopWatchLinux : public StopWatchInterface
{
public:
//! Constructor, default
StopWatchLinux() :
start_time(), diff_time(0.0), total_time(0.0),
running(false), clock_sessions(0)
{ };
// Destructor
virtual ~StopWatchLinux()
{ };
public:
//! Start time measurement
inline void start();
//! Stop time measurement
inline void stop();
//! Reset time counters to zero
inline void reset();
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
inline float getTime();
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
inline float getAverageTime();
private:
// helper functions
//! Get difference between start time and current time
inline float getDiffTime();
private:
// member variables
//! Start of measurement
struct timeval start_time;
//! Time difference between the last start and stop
float diff_time;
//! TOTAL time difference between starts and stops
float total_time;
//! flag if the stop watch is running
bool running;
//! Number of times clock has been started
//! and stopped to allow averaging
int clock_sessions;
};
// functions, inlined
////////////////////////////////////////////////////////////////////////////////
//! Start time measurement
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchLinux::start()
{
gettimeofday(&start_time, 0);
running = true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop time measurement and increment add to the current diff_time summation
//! variable. Also increment the number of times this clock has been run.
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchLinux::stop()
{
diff_time = getDiffTime();
total_time += diff_time;
running = false;
clock_sessions++;
}
////////////////////////////////////////////////////////////////////////////////
//! Reset the timer to 0. Does not change the timer running state but does
//! recapture this point in time as the current start time if it is running.
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchLinux::reset()
{
diff_time = 0;
total_time = 0;
clock_sessions = 0;
if (running)
{
gettimeofday(&start_time, 0);
}
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned added to the
//! current diff_time sum, otherwise the current summed time difference alone
//! is returned.
////////////////////////////////////////////////////////////////////////////////
inline float
StopWatchLinux::getTime()
{
// Return the TOTAL time to date
float retval = total_time;
if (running)
{
retval += getDiffTime();
}
return retval;
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. for a single run based on the total number of COMPLETED runs
//! and the total time.
////////////////////////////////////////////////////////////////////////////////
inline float
StopWatchLinux::getAverageTime()
{
return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f;
}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
inline float
StopWatchLinux::getDiffTime()
{
struct timeval t_time;
gettimeofday(&t_time, 0);
// time difference in milli-seconds
return (float)(1000.0 * (t_time.tv_sec - start_time.tv_sec)
+ (0.001 * (t_time.tv_usec - start_time.tv_usec)));
}
#endif // WIN32
////////////////////////////////////////////////////////////////////////////////
//! Timer functionality exported
////////////////////////////////////////////////////////////////////////////////
//! Create a new timer
//! @return true if a time has been created, otherwise false
//! @param name of the new timer, 0 if the creation failed
////////////////////////////////////////////////////////////////////////////////
inline bool
sdkCreateTimer(StopWatchInterface **timer_interface)
{
//printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface);
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
*timer_interface = (StopWatchInterface *)new StopWatchWin();
#else
*timer_interface = (StopWatchInterface *)new StopWatchLinux();
#endif
return (*timer_interface != NULL) ? true : false;
}
////////////////////////////////////////////////////////////////////////////////
//! Delete a timer
//! @return true if a time has been deleted, otherwise false
//! @param name of the timer to delete
////////////////////////////////////////////////////////////////////////////////
inline bool
sdkDeleteTimer(StopWatchInterface **timer_interface)
{
//printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
delete *timer_interface;
*timer_interface = NULL;
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Start the time with name \a name
//! @param name name of the timer to start
////////////////////////////////////////////////////////////////////////////////
inline bool
sdkStartTimer(StopWatchInterface **timer_interface)
{
//printf("sdkStartTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
(*timer_interface)->start();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop the time with name \a name. Does not reset.
//! @param name name of the timer to stop
////////////////////////////////////////////////////////////////////////////////
inline bool
sdkStopTimer(StopWatchInterface **timer_interface)
{
// printf("sdkStopTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
(*timer_interface)->stop();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Resets the timer's counter.
//! @param name name of the timer to reset.
////////////////////////////////////////////////////////////////////////////////
inline bool
sdkResetTimer(StopWatchInterface **timer_interface)
{
// printf("sdkResetTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
(*timer_interface)->reset();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Return the average time for timer execution as the total time
//! for the timer dividied by the number of completed (stopped) runs the timer
//! has made.
//! Excludes the current running time if the timer is currently running.
//! @param name name of the timer to return the time of
////////////////////////////////////////////////////////////////////////////////
inline float
sdkGetAverageTimerValue(StopWatchInterface **timer_interface)
{
// printf("sdkGetAverageTimerValue called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
return (*timer_interface)->getAverageTime();
}
else
{
return 0.0f;
}
}
////////////////////////////////////////////////////////////////////////////////
//! Total execution time for the timer over all runs since the last reset
//! or timer creation.
//! @param name name of the timer to obtain the value of.
////////////////////////////////////////////////////////////////////////////////
inline float
sdkGetTimerValue(StopWatchInterface **timer_interface)
{
// printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
return (*timer_interface)->getTime();
}
else
{
return 0.0f;
}
}
#endif // HELPER_TIMER_H

View File

@ -94,7 +94,7 @@ Tracking_2nd_PLL_filter::Tracking_2nd_PLL_filter ()
{
//--- PLL variables --------------------------------------------------------
d_pdi_carr = 0.001;// Summation interval for carrier
d_plldampingratio = 0.65;
d_plldampingratio = 0.7;
}

View File

@ -16,6 +16,11 @@
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
if(ENABLE_CUDA)
FIND_PACKAGE(CUDA REQUIRED)
add_definitions(-DCUDA_GPU_ACCEL=1)
endif(ENABLE_CUDA)
set(GNSS_RECEIVER_SOURCES
control_thread.cc
control_message_factory.cc
@ -70,8 +75,9 @@ include_directories(
${GFlags_INCLUDE_DIRS}
${Boost_INCLUDE_DIRS}
${GNURADIO_RUNTIME_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
)
if(Boost_VERSION LESS 105000)
add_definitions(-DOLD_BOOST=1)
endif(Boost_VERSION LESS 105000)

View File

@ -77,6 +77,7 @@
#include "galileo_e1_pcps_quicksync_ambiguous_acquisition.h"
#include "galileo_e5a_noncoherent_iq_acquisition_caf.h"
#include "gps_l1_ca_dll_pll_tracking.h"
#include "gps_l1_ca_dll_pll_tracking_gpu.h"
#include "gps_l1_ca_dll_pll_optim_tracking.h"
#include "gps_l1_ca_dll_fll_pll_tracking.h"
#include "gps_l1_ca_tcp_connector_tracking.h"
@ -1611,6 +1612,14 @@ std::unique_ptr<TrackingInterface> GNSSBlockFactory::GetTrkBlock(
out_streams, queue));
block = std::move(block_);
}
#if CUDA_GPU_ACCEL
else if (implementation.compare("GPS_L1_CA_DLL_PLL_Tracking_GPU") == 0)
{
std::unique_ptr<TrackingInterface> block_(new GpsL1CaDllPllTrackingGPU(configuration.get(), role, in_streams,
out_streams, queue));
block = std::move(block_);
}
#endif
else
{
// Log fatal. This causes execution to stop.

View File

@ -16,6 +16,7 @@
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
set(GNSS_SDR_OPTIONAL_LIBS "")
set(GNSS_SDR_OPTIONAL_HEADERS "")
@ -32,6 +33,12 @@ if(ENABLE_UHD)
set(GNSS_SDR_OPTIONAL_HEADERS ${GNSS_SDR_OPTIONAL_HEADERS} ${UHD_INCLUDE_DIRS})
endif(ENABLE_UHD)
if(ENABLE_CUDA)
FIND_PACKAGE(CUDA REQUIRED)
add_definitions(-DCUDA_GPU_ACCEL=1)
endif(ENABLE_CUDA)
include_directories(
${CMAKE_SOURCE_DIR}/src/core/system_parameters
${CMAKE_SOURCE_DIR}/src/core/interfaces
@ -47,6 +54,7 @@ include_directories(
${GNURADIO_RUNTIME_INCLUDE_DIRS}
${GNSS_SDR_OPTIONAL_HEADERS}
${VOLK_GNSSSDR_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
)
add_definitions( -DGNSS_SDR_VERSION="${VERSION}" )
@ -78,6 +86,7 @@ target_link_libraries(gnss-sdr ${MAC_LIBRARIES}
${GNSS_SDR_OPTIONAL_LIBS}
gnss_sp_libs
gnss_rx
${CUDA_LIBRARIES}
)

View File

@ -68,6 +68,11 @@
#include "sbas_ephemeris.h"
#include "sbas_time.h"
#if CUDA_GPU_ACCEL
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
#endif
using google::LogMessage;
@ -143,6 +148,17 @@ int main(int argc, char** argv)
google::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "Initializing GNSS-SDR v" << gnss_sdr_version << " ... Please wait." << std::endl;
#if CUDA_GPU_ACCEL
// Reset the device
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
std::cout << "Reset CUDA device done " << std::endl;
#endif
if(GOOGLE_STRIP_LOG == 0)
{
google::InitGoogleLogging(argv[0]);