mirror of https://github.com/gnss-sdr/gnss-sdr
Some CUDA cleaning and documentation
This commit is contained in:
parent
429e4e8776
commit
ef136e5c74
|
@ -47,6 +47,7 @@ option(ENABLE_GPROF "Enable the use of the GNU profiler tool 'gprof'" OFF)
|
|||
|
||||
# Acceleration
|
||||
option(ENABLE_OPENCL "Enable building of processing blocks implemented with OpenCL (experimental)" OFF)
|
||||
option(ENABLE_CUDA "Enable building of processing blocks implemented with CUDA (experimental, requires CUDA SDK)" OFF)
|
||||
|
||||
# Building and packaging options
|
||||
option(ENABLE_GENERIC_ARCH "Builds a portable binary" OFF)
|
||||
|
@ -883,6 +884,24 @@ else(ENABLE_OPENCL)
|
|||
endif(ENABLE_OPENCL)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# CUDA (OPTIONAL)
|
||||
###############################################################################
|
||||
if($ENV{CUDA_GPU_ACCEL})
|
||||
message(STATUS "CUDA_GPU_ACCEL environment variable found." )
|
||||
set(ENABLE_CUDA ON)
|
||||
endif($ENV{CUDA_GPU_ACCEL})
|
||||
|
||||
if(ENABLE_CUDA)
|
||||
FIND_PACKAGE(CUDA REQUIRED)
|
||||
message(STATUS "NVIDIA CUDA GPU Acceleration will be enabled." )
|
||||
message(STATUS "You can disable it with 'cmake -DENABLE_CUDA=OFF ../'" )
|
||||
else(ENABLE_CUDA)
|
||||
message(STATUS "NVIDIA CUDA GPU Acceleration will be not enabled." )
|
||||
message(STATUS "Enable it with 'cmake -DENABLE_CUDA=ON ../' to add support for GPU-based acceleration using CUDA." )
|
||||
endif(ENABLE_CUDA)
|
||||
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
|
@ -955,20 +974,6 @@ else(ENABLE_OSMOSDR)
|
|||
message(STATUS "Enable it with 'cmake -DENABLE_OSMOSDR=ON ../' to add support for OsmoSDR and other front-ends (HackRF, bladeRF, Realtek's RTL2832U-based USB dongles, etc.)" )
|
||||
endif(ENABLE_OSMOSDR)
|
||||
|
||||
if($ENV{CUDA_GPU_ACCEL})
|
||||
message(STATUS "CUDA_GPU_ACCEL environment variable found." )
|
||||
set(ENABLE_CUDA ON)
|
||||
endif($ENV{CUDA_GPU_ACCEL})
|
||||
|
||||
if(ENABLE_CUDA)
|
||||
FIND_PACKAGE(CUDA REQUIRED)
|
||||
message(STATUS "NVIDIA CUDA GPU Acceleration will be enabled." )
|
||||
message(STATUS "You can disable it with 'cmake -DENABLE_CUDA=OFF ../'" )
|
||||
else(ENABLE_CUDA)
|
||||
message(STATUS "NVIDIA CUDA GPU Acceleration will is not enabled." )
|
||||
message(STATUS "Enable it with 'cmake -DENABLE_CUDA=ON ../' to add support for GPU-based acceleration using CUDA." )
|
||||
endif(ENABLE_CUDA)
|
||||
|
||||
|
||||
if($ENV{FLEXIBAND_DRIVER})
|
||||
message(STATUS "FLEXIBAND_DRIVER environment variable found." )
|
||||
|
|
14
README.md
14
README.md
|
@ -345,6 +345,19 @@ $ sudo make install
|
|||
~~~~~~
|
||||
|
||||
|
||||
###### Build CUDA support (OPTIONAL):
|
||||
|
||||
In order to enable the building of blocks that use CUDA, NVIDIA's parallel programming model that enables graphics processing unit (GPU) acceleration for data-parallel computations, first you need to install the CUDA Toolkit from [NVIDIA Developers Download page](https://developer.nvidia.com/cuda-downloads "CUDA Downloads"). Then, build GNSS-SDR doing:
|
||||
|
||||
~~~~~~
|
||||
$ cmake -DENABLE_CUDA=ON ../
|
||||
$ make
|
||||
$ sudo make install
|
||||
~~~~~~
|
||||
|
||||
Of course, you will also need a GPU that [supports CUDA](https://developer.nvidia.com/cuda-gpus "CUDA GPUs").
|
||||
|
||||
|
||||
###### Build a portable binary
|
||||
|
||||
In order to build an executable that not depends on the specific SIMD instruction set that is present in the processor of the compiling machine, so other users can execute it in other machines without those particular sets, use:
|
||||
|
@ -841,7 +854,6 @@ Tracking_1C.dump=false ; Enable internal binary data file logging [true] or [fal
|
|||
Tracking_1C.dump_filename=./tracking_ch_ ; Log path and filename. Notice that the tracking channel will add "x.dat" where x is the channel number.
|
||||
Tracking_1C.pll_bw_hz=50.0 ; PLL loop filter bandwidth [Hz]
|
||||
Tracking_1C.dll_bw_hz=2.0 ; DLL loop filter bandwidth [Hz]
|
||||
Tracking_1C.fll_bw_hz=10.0 ; FLL loop filter bandwidth [Hz]
|
||||
Tracking_1C.order=3 ; PLL/DLL loop filter order [2] or [3]
|
||||
Tracking_1C.early_late_space_chips=0.5 ; correlator early-late space [chips].
|
||||
~~~~~~
|
||||
|
|
|
@ -51,7 +51,6 @@ class ConfigurationInterface;
|
|||
class GpsL1CaDllPllTrackingGPU : public TrackingInterface
|
||||
{
|
||||
public:
|
||||
|
||||
GpsL1CaDllPllTrackingGPU(ConfigurationInterface* configuration,
|
||||
std::string role,
|
||||
unsigned int in_streams,
|
||||
|
@ -65,7 +64,7 @@ public:
|
|||
return role_;
|
||||
}
|
||||
|
||||
//! Returns "GPS_L1_CA_DLL_PLL_Tracking"
|
||||
//! Returns "GPS_L1_CA_DLL_PLL_Tracking_GPU"
|
||||
std::string implementation()
|
||||
{
|
||||
return "GPS_L1_CA_DLL_PLL_Tracking_GPU";
|
||||
|
|
|
@ -45,35 +45,31 @@
|
|||
#endif
|
||||
|
||||
#include <complex>
|
||||
|
||||
#include <cuda.h>
|
||||
// CUDA runtime
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
// GPU new internal data types for complex numbers
|
||||
|
||||
struct GPU_Complex {
|
||||
struct GPU_Complex
|
||||
{
|
||||
float r;
|
||||
float i;
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex() {};
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex( float a, float b ) : r(a), i(b) {}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE float magnitude2( void ) {
|
||||
return r * r + i * i;
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator*(const GPU_Complex& a) {
|
||||
CUDA_CALLABLE_MEMBER_DEVICE float magnitude2( void ) { return r * r + i * i; }
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator*(const GPU_Complex& a)
|
||||
{
|
||||
#ifdef __CUDACC__
|
||||
return GPU_Complex(__fmul_rn(r, a.r) - __fmul_rn(i, a.i), __fmul_rn(i, a.r) + __fmul_rn(r, a.i));
|
||||
#else
|
||||
return GPU_Complex(r*a.r - i*a.i, i*a.r + r*a.i);
|
||||
#endif
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator+(const GPU_Complex& a) {
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator+(const GPU_Complex& a)
|
||||
{
|
||||
return GPU_Complex(r + a.r, i + a.i);
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE void operator+=(const GPU_Complex& a) {
|
||||
r+=a.r;
|
||||
i+=a.i;
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE void operator+=(const GPU_Complex& a) { r += a.r; i += a.i; }
|
||||
CUDA_CALLABLE_MEMBER_DEVICE void multiply_acc(const GPU_Complex& a, const GPU_Complex& b)
|
||||
{
|
||||
//c=a*b+c
|
||||
|
@ -93,17 +89,21 @@ struct GPU_Complex {
|
|||
}
|
||||
};
|
||||
|
||||
struct GPU_Complex_Short {
|
||||
struct GPU_Complex_Short
|
||||
{
|
||||
float r;
|
||||
float i;
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short( short int a, short int b ) : r(a), i(b) {}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE float magnitude2( void ) {
|
||||
CUDA_CALLABLE_MEMBER_DEVICE float magnitude2( void )
|
||||
{
|
||||
return r * r + i * i;
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator*(const GPU_Complex_Short& a) {
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator*(const GPU_Complex_Short& a)
|
||||
{
|
||||
return GPU_Complex_Short(r*a.r - i*a.i, i*a.r + r*a.i);
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator+(const GPU_Complex_Short& a) {
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator+(const GPU_Complex_Short& a)
|
||||
{
|
||||
return GPU_Complex_Short(r+a.r, i+a.i);
|
||||
}
|
||||
};
|
||||
|
@ -146,6 +146,7 @@ public:
|
|||
float rem_code_phase_chips,
|
||||
int signal_length_samples,
|
||||
int n_correlators);
|
||||
|
||||
private:
|
||||
// Allocate the device input vectors
|
||||
GPU_Complex *d_sig_in;
|
||||
|
@ -164,7 +165,6 @@ private:
|
|||
cudaStream_t stream2;
|
||||
int num_gpu_devices;
|
||||
int selected_device;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue