1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2025-11-07 02:33:03 +00:00

First working version of the GPU GPS tracking block (it requires NVIDIA

CUDA
3.0 GPU hardware)
This commit is contained in:
Javier Arribas
2015-07-24 17:21:25 +02:00
parent f722f5f8f7
commit 26cf90cdd4
12 changed files with 631 additions and 36 deletions

View File

@@ -16,6 +16,9 @@
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
if(ENABLE_CUDA)
FIND_PACKAGE(CUDA REQUIRED)
endif(ENABLE_CUDA)
set(TRACKING_ADAPTER_SOURCES
galileo_e1_dll_pll_veml_tracking.cc
@@ -27,6 +30,7 @@ set(TRACKING_ADAPTER_SOURCES
gps_l1_ca_tcp_connector_tracking.cc
galileo_e5a_dll_pll_tracking.cc
gps_l2_m_dll_pll_tracking.cc
gps_l1_ca_dll_pll_tracking_gpu.cc
)
include_directories(
@@ -40,6 +44,7 @@ include_directories(
${GLOG_INCLUDE_DIRS}
${GFlags_INCLUDE_DIRS}
${GNURADIO_RUNTIME_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
)
file(GLOB TRACKING_ADAPTER_HEADERS "*.h")

View File

@@ -0,0 +1,158 @@
/*!
* \file gps_l1_ca_dll_pll_tracking_gpu.cc
* \brief Implementation of an adapter of a DLL+PLL tracking loop block using GPU accelerated functions
* for GPS L1 C/A to a TrackingInterface
* \author Javier Arribas, 2015. jarribas(at)cttc.es
*
* Code DLL + carrier PLL according to the algorithms described in:
* K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
* Approach, Birkhauser, 2007
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#include "gps_l1_ca_dll_pll_tracking_gpu.h"
#include <glog/logging.h>
#include "GPS_L1_CA.h"
#include "configuration_interface.h"
using google::LogMessage;
GpsL1CaDllPllTrackingGPU::GpsL1CaDllPllTrackingGPU(
ConfigurationInterface* configuration, std::string role,
unsigned int in_streams, unsigned int out_streams,
boost::shared_ptr<gr::msg_queue> queue) :
role_(role), in_streams_(in_streams), out_streams_(out_streams),
queue_(queue)
{
DLOG(INFO) << "role " << role;
//################# CONFIGURATION PARAMETERS ########################
int fs_in;
int vector_length;
int f_if;
bool dump;
std::string dump_filename;
std::string item_type;
std::string default_item_type = "gr_complex";
float pll_bw_hz;
float dll_bw_hz;
float early_late_space_chips;
item_type = configuration->property(role + ".item_type", default_item_type);
//vector_length = configuration->property(role + ".vector_length", 2048);
fs_in = configuration->property("GNSS-SDR.internal_fs_hz", 2048000);
f_if = configuration->property(role + ".if", 0);
dump = configuration->property(role + ".dump", false);
pll_bw_hz = configuration->property(role + ".pll_bw_hz", 50.0);
dll_bw_hz = configuration->property(role + ".dll_bw_hz", 2.0);
early_late_space_chips = configuration->property(role + ".early_late_space_chips", 0.5);
std::string default_dump_filename = "./track_ch";
dump_filename = configuration->property(role + ".dump_filename",
default_dump_filename); //unused!
vector_length = std::round(fs_in / (GPS_L1_CA_CODE_RATE_HZ / GPS_L1_CA_CODE_LENGTH_CHIPS));
//################# MAKE TRACKING GNURadio object ###################
if (item_type.compare("gr_complex") == 0)
{
item_size_ = sizeof(gr_complex);
tracking_ = gps_l1_ca_dll_pll_make_tracking_gpu_cc(
f_if,
fs_in,
vector_length,
queue_,
dump,
dump_filename,
pll_bw_hz,
dll_bw_hz,
early_late_space_chips);
}
else
{
item_size_ = sizeof(gr_complex);
LOG(WARNING) << item_type << " unknown tracking item type.";
}
channel_ = 0;
channel_internal_queue_ = 0;
DLOG(INFO) << "tracking(" << tracking_->unique_id() << ")";
}
GpsL1CaDllPllTrackingGPU::~GpsL1CaDllPllTrackingGPU()
{}
void GpsL1CaDllPllTrackingGPU::start_tracking()
{
tracking_->start_tracking();
}
/*
* Set tracking channel unique ID
*/
void GpsL1CaDllPllTrackingGPU::set_channel(unsigned int channel)
{
channel_ = channel;
tracking_->set_channel(channel);
}
/*
* Set tracking channel internal queue
*/
void GpsL1CaDllPllTrackingGPU::set_channel_queue(
concurrent_queue<int> *channel_internal_queue)
{
channel_internal_queue_ = channel_internal_queue;
tracking_->set_channel_queue(channel_internal_queue_);
}
void GpsL1CaDllPllTrackingGPU::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro)
{
tracking_->set_gnss_synchro(p_gnss_synchro);
}
void GpsL1CaDllPllTrackingGPU::connect(gr::top_block_sptr top_block)
{
if(top_block) { /* top_block is not null */};
//nothing to connect, now the tracking uses gr_sync_decimator
}
void GpsL1CaDllPllTrackingGPU::disconnect(gr::top_block_sptr top_block)
{
if(top_block) { /* top_block is not null */};
//nothing to disconnect, now the tracking uses gr_sync_decimator
}
gr::basic_block_sptr GpsL1CaDllPllTrackingGPU::get_left_block()
{
return tracking_;
}
gr::basic_block_sptr GpsL1CaDllPllTrackingGPU::get_right_block()
{
return tracking_;
}

View File

@@ -0,0 +1,113 @@
/*!
* \file gps_l1_ca_dll_pll_tracking_gpu.h
* \brief Implementation of an adapter of a DLL+PLL tracking loop block using GPU accelerated functions
* for GPS L1 C/A to a TrackingInterface
* \author Javier Arribas, 2015. jarribas(at)cttc.es
*
* Code DLL + carrier PLL according to the algorithms described in:
* K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
* Approach, Birkha user, 2007
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_GPU_H_
#define GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_GPU_H_
#include <string>
#include <gnuradio/msg_queue.h>
#include "tracking_interface.h"
#include "gps_l1_ca_dll_pll_tracking_gpu_cc.h"
class ConfigurationInterface;
/*!
* \brief This class implements a code DLL + carrier PLL tracking loop using GPU accelerated functions
*/
class GpsL1CaDllPllTrackingGPU : public TrackingInterface
{
public:
GpsL1CaDllPllTrackingGPU(ConfigurationInterface* configuration,
std::string role,
unsigned int in_streams,
unsigned int out_streams,
boost::shared_ptr<gr::msg_queue> queue);
virtual ~GpsL1CaDllPllTrackingGPU();
std::string role()
{
return role_;
}
//! Returns "GPS_L1_CA_DLL_PLL_Tracking"
std::string implementation()
{
return "GPS_L1_CA_DLL_PLL_Tracking_GPU";
}
size_t item_size()
{
return item_size_;
}
void connect(gr::top_block_sptr top_block);
void disconnect(gr::top_block_sptr top_block);
gr::basic_block_sptr get_left_block();
gr::basic_block_sptr get_right_block();
/*!
* \brief Set tracking channel unique ID
*/
void set_channel(unsigned int channel);
/*!
* \brief Set acquisition/tracking common Gnss_Synchro object pointer
* to efficiently exchange synchronization data between acquisition and tracking blocks
*/
void set_gnss_synchro(Gnss_Synchro* p_gnss_synchro);
/*!
* \brief Set tracking channel internal queue
*/
void set_channel_queue(concurrent_queue<int> *channel_internal_queue);
void start_tracking();
private:
gps_l1_ca_dll_pll_tracking_gpu_cc_sptr tracking_;
size_t item_size_;
unsigned int channel_;
std::string role_;
unsigned int in_streams_;
unsigned int out_streams_;
boost::shared_ptr<gr::msg_queue> queue_;
concurrent_queue<int> *channel_internal_queue_;
};
#endif // GNSS_SDR_GPS_L1_CA_DLL_PLL_TRACKING_GPU_H_

View File

@@ -57,7 +57,8 @@ endif(ENABLE_GENERIC_ARCH)
file(GLOB TRACKING_GR_BLOCKS_HEADERS "*.h")
add_library(tracking_gr_blocks ${TRACKING_GR_BLOCKS_SOURCES} ${TRACKING_GR_BLOCKS_HEADERS})
source_group(Headers FILES ${TRACKING_GR_BLOCKS_HEADERS})
target_link_libraries(tracking_gr_blocks tracking_lib ${GNURADIO_RUNTIME_LIBRARIES} gnss_sp_libs ${Boost_LIBRARIES} ${VOLK_GNSSSDR_LIBRARIES} ${ORC_LIBRARIES} )
target_link_libraries(tracking_gr_blocks tracking_lib ${GNURADIO_RUNTIME_LIBRARIES} gnss_sp_libs ${Boost_LIBRARIES} ${VOLK_GNSSSDR_LIBRARIES} ${ORC_LIBRARIES} ${CUDA_LIBRARIES})
if(NOT VOLK_GNSSSDR_FOUND)
add_dependencies(tracking_gr_blocks volk_gnsssdr_module)
endif(NOT VOLK_GNSSSDR_FOUND)

View File

@@ -48,17 +48,11 @@
#include "GPS_L1_CA.h"
#include "control_message_factory.h"
#include <volk/volk.h> //volk_alignement
#include <cuda.h>
// CUDA runtime
#include <cuda_runtime.h>
// includes
#include <cuda_profiler_api.h>
#include <helper_functions.h> // helper for shared functions common to CUDA Samples
#include <helper_cuda.h> // helper functions for CUDA error checking and initialization
/*!
* \todo Include in definition header file
*/
@@ -130,8 +124,9 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
multicorrelator_gpu = new cuda_multicorrelator();
// Get space for the resampled early / prompt / late local replicas
int N_CORRELATORS=3;
multicorrelator_gpu->init_cuda(0, NULL, 2 * d_vector_length , 2 * d_vector_length , N_CORRELATORS);
// Get space for the resampled early / prompt / late local replicas
checkCudaErrors(cudaHostAlloc((void**)&d_local_code_shift_samples, N_CORRELATORS * sizeof(int), cudaHostAllocMapped ));
@@ -323,9 +318,6 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::~Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc()
multicorrelator_gpu->free_cuda();
delete(multicorrelator_gpu);
volk_free(d_Early);
volk_free(d_Prompt);
volk_free(d_Late);
volk_free(d_ca_code);
delete[] d_Prompt_buffer;
@@ -381,18 +373,20 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
// UPDATE NCO COMMAND
float phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);
//std::cout<<"d_current_prn_length_samples="<<d_current_prn_length_samples<<std::endl;
// perform carrier wipe-off and compute Early, Prompt and Late correlation
cudaProfilerStart();
multicorrelator_gpu->Carrier_wipeoff_multicorrelator_cuda(
d_corr_outs_gpu,
in,//in_gpu,
in,
d_local_codes_gpu,
d_rem_carr_phase_rad,
phase_step_rad,
d_local_code_shift_samples,
d_current_prn_length_samples,
3);
cudaProfilerStop();
//std::cout<<"d_Prompt="<<*d_Prompt<<"d_Early="<<*d_Early<<"d_Late="<<*d_Late<<std::endl;
// check for samples consistency (this should be done before in the receiver / here only if the source is a file)
if (std::isnan((*d_Prompt).real()) == true or std::isnan((*d_Prompt).imag()) == true ) // or std::isinf(in[i].real())==true or std::isinf(in[i].imag())==true)
{

View File

@@ -21,9 +21,9 @@ if(ENABLE_CUDA)
FIND_PACKAGE(CUDA REQUIRED)
# Append current NVCC flags by something, eg comput capability
# set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --gpu-architecture sm_30 --default-stream-per-thread)
# set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --gpu-architecture sm_30)
list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_30,code=sm_30; -std=c++11;-O3; -use_fast_math")
list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_30,code=sm_30; -std=c++11;-O3; -use_fast_math -default-stream per-thread")
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
CUDA_INCLUDE_DIRECTORIES(
@@ -33,8 +33,6 @@ if(ENABLE_CUDA)
SET(LIB_TYPE STATIC) #set the lib type
CUDA_ADD_LIBRARY(CUDA_CORRELATOR_LIB ${LIB_TYPE} cuda_multicorrelator.h cuda_multicorrelator.cu)
set(OPT_LIBRARIES ${OPT_LIBRARIES} ${CUDA_CORRELATOR_LIB})
endif(ENABLE_CUDA)
@@ -71,4 +69,4 @@ endif(SSE3_AVAILABLE)
file(GLOB TRACKING_LIB_HEADERS "*.h")
add_library(tracking_lib ${TRACKING_LIB_SOURCES} ${TRACKING_LIB_HEADERS})
source_group(Headers FILES ${TRACKING_LIB_HEADERS})
target_link_libraries(tracking_lib ${VOLK_LIBRARIES} ${GNURADIO_RUNTIME_LIBRARIES} ${OPT_LIBRARIES})
target_link_libraries(tracking_lib CUDA_CORRELATOR_LIB ${VOLK_LIBRARIES} ${GNURADIO_RUNTIME_LIBRARIES})

View File

@@ -261,7 +261,7 @@ CUDA_32fc_x2_add_32fc( GPU_Complex *A, GPU_Complex *B, GPU_Complex *C, int
}
bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int signal_length_samples, int *shifts_samples, int n_correlators)
bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int signal_length_samples, int local_codes_length_samples, int n_correlators)
{
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
findCudaDevice(argc, (const char **)argv);
@@ -298,7 +298,8 @@ bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int sign
// new version: only one vector with extra samples to shift the local code for the correlator set
// Required: The last correlator tap in d_shifts_samples has the largest sample shift
checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size+sizeof(GPU_Complex)*shifts_samples[n_correlators-1]));
size_t size_local_code_bytes = local_codes_length_samples * sizeof(GPU_Complex);
checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size_local_code_bytes));
checkCudaErrors(cudaMalloc((void **)&d_shifts_samples, size+sizeof(int)*n_correlators));
//scalars
@@ -308,6 +309,8 @@ bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int sign
threadsPerBlock = 256;
blocksPerGrid =(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock;
cudaStreamCreate (&stream1) ;
cudaStreamCreate (&stream2) ;
return true;
}
@@ -323,14 +326,10 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
int n_correlators)
{
cudaStream_t stream1;
cudaStream_t stream2;
cudaStreamCreate ( &stream1) ;
cudaStreamCreate ( &stream2) ;
size_t memSize = signal_length_samples * sizeof(std::complex<float>);
// input signal CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_sig_in, sig_in, memSize,
cudaMemcpyHostToDevice, stream1));
@@ -362,7 +361,9 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
//printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid, threadsPerBlock);
//wait for Doppler wipeoff end...
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaStreamSynchronize(stream1));
checkCudaErrors(cudaStreamSynchronize(stream2));
//checkCudaErrors(cudaDeviceSynchronize());
//old
// scalarProdGPUCPXxN<<<blocksPerGrid, threadsPerBlock,0 ,stream2>>>(
@@ -385,16 +386,13 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
);
checkCudaErrors(cudaGetLastError());
//wait for correlators end...
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaStreamSynchronize(stream2));
// Copy the device result vector in device memory to the host result vector
// in host memory.
//scalar products (correlators outputs)
checkCudaErrors(cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
cudaMemcpyDeviceToHost, 0));
cudaStreamDestroy(stream1) ;
cudaStreamDestroy(stream2) ;
checkCudaErrors(cudaMemcpy(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
cudaMemcpyDeviceToHost));
return true;
}
@@ -406,13 +404,16 @@ bool cuda_multicorrelator::free_cuda()
cudaFree(d_local_codes_in);
cudaFree(d_corr_out);
cudaStreamDestroy(stream1) ;
cudaStreamDestroy(stream2) ;
// Reset the device and exit
// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice. It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
checkCudaErrors(cudaDeviceReset());
//checkCudaErrors(cudaDeviceReset());
return true;
}

View File

@@ -46,6 +46,10 @@
#include <complex>
#include <cuda.h>
// CUDA runtime
#include <cuda_runtime.h>
// GPU new internal data types for complex numbers
struct GPU_Complex {
@@ -109,7 +113,7 @@ struct GPU_Complex_Short {
class cuda_multicorrelator
{
public:
bool init_cuda(const int argc, const char **argv, int signal_length_samples, int *shifts_samples, int n_correlators);
bool init_cuda(const int argc, const char **argv, int signal_length_samples, int local_codes_length_samples, int n_correlators);
bool free_cuda();
bool Carrier_wipeoff_multicorrelator_cuda(
@@ -132,6 +136,9 @@ private:
int threadsPerBlock;
int blocksPerGrid;
cudaStream_t stream1;
cudaStream_t stream2;
};