mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2025-12-11 02:58:05 +00:00
Some CUDA cleaning and documentation
This commit is contained in:
@@ -45,67 +45,67 @@
|
||||
#endif
|
||||
|
||||
#include <complex>
|
||||
|
||||
#include <cuda.h>
|
||||
// CUDA runtime
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
// GPU new internal data types for complex numbers
|
||||
|
||||
struct GPU_Complex {
|
||||
float r;
|
||||
float i;
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex() {};
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex( float a, float b ) : r(a), i(b) {}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE float magnitude2( void ) {
|
||||
return r * r + i * i;
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator*(const GPU_Complex& a) {
|
||||
#ifdef __CUDACC__
|
||||
return GPU_Complex(__fmul_rn(r,a.r) - __fmul_rn(i,a.i), __fmul_rn(i,a.r) + __fmul_rn(r,a.i));
|
||||
#else
|
||||
return GPU_Complex(r*a.r - i*a.i, i*a.r + r*a.i);
|
||||
#endif
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator+(const GPU_Complex& a) {
|
||||
return GPU_Complex(r+a.r, i+a.i);
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE void operator+=(const GPU_Complex& a) {
|
||||
r+=a.r;
|
||||
i+=a.i;
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE void multiply_acc(const GPU_Complex& a, const GPU_Complex& b)
|
||||
{
|
||||
//c=a*b+c
|
||||
//real part
|
||||
//c.r=(a.r*b.r - a.i*b.i)+c.r
|
||||
#ifdef __CUDACC__
|
||||
r=__fmaf_rn(a.r,b.r,r);
|
||||
r=__fmaf_rn(-a.i,b.i,r);
|
||||
//imag part
|
||||
i=__fmaf_rn(a.i,b.r,i);
|
||||
i=__fmaf_rn(a.r,b.i,i);
|
||||
#else
|
||||
r=(a.r*b.r - a.i*b.i)+r;
|
||||
i=(a.i*b.r - a.r*b.i)+i;
|
||||
#endif
|
||||
struct GPU_Complex
|
||||
{
|
||||
float r;
|
||||
float i;
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex() {};
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex( float a, float b ) : r(a), i(b) {}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE float magnitude2( void ) { return r * r + i * i; }
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator*(const GPU_Complex& a)
|
||||
{
|
||||
#ifdef __CUDACC__
|
||||
return GPU_Complex(__fmul_rn(r, a.r) - __fmul_rn(i, a.i), __fmul_rn(i, a.r) + __fmul_rn(r, a.i));
|
||||
#else
|
||||
return GPU_Complex(r*a.r - i*a.i, i*a.r + r*a.i);
|
||||
#endif
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex operator+(const GPU_Complex& a)
|
||||
{
|
||||
return GPU_Complex(r + a.r, i + a.i);
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE void operator+=(const GPU_Complex& a) { r += a.r; i += a.i; }
|
||||
CUDA_CALLABLE_MEMBER_DEVICE void multiply_acc(const GPU_Complex& a, const GPU_Complex& b)
|
||||
{
|
||||
//c=a*b+c
|
||||
//real part
|
||||
//c.r=(a.r*b.r - a.i*b.i)+c.r
|
||||
#ifdef __CUDACC__
|
||||
r = __fmaf_rn(a.r, b.r, r);
|
||||
r = __fmaf_rn(-a.i, b.i, r);
|
||||
//imag part
|
||||
i = __fmaf_rn(a.i, b.r, i);
|
||||
i = __fmaf_rn(a.r, b.i, i);
|
||||
#else
|
||||
r = (a.r*b.r - a.i*b.i) + r;
|
||||
i = (a.i*b.r - a.r*b.i) + i;
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct GPU_Complex_Short {
|
||||
float r;
|
||||
float i;
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short( short int a, short int b ) : r(a), i(b) {}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE float magnitude2( void ) {
|
||||
return r * r + i * i;
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator*(const GPU_Complex_Short& a) {
|
||||
return GPU_Complex_Short(r*a.r - i*a.i, i*a.r + r*a.i);
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator+(const GPU_Complex_Short& a) {
|
||||
return GPU_Complex_Short(r+a.r, i+a.i);
|
||||
}
|
||||
struct GPU_Complex_Short
|
||||
{
|
||||
float r;
|
||||
float i;
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short( short int a, short int b ) : r(a), i(b) {}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE float magnitude2( void )
|
||||
{
|
||||
return r * r + i * i;
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator*(const GPU_Complex_Short& a)
|
||||
{
|
||||
return GPU_Complex_Short(r*a.r - i*a.i, i*a.r + r*a.i);
|
||||
}
|
||||
CUDA_CALLABLE_MEMBER_DEVICE GPU_Complex_Short operator+(const GPU_Complex_Short& a)
|
||||
{
|
||||
return GPU_Complex_Short(r+a.r, i+a.i);
|
||||
}
|
||||
};
|
||||
/*!
|
||||
* \brief Class that implements carrier wipe-off and correlators using NVIDIA CUDA GPU accelerators.
|
||||
@@ -113,58 +113,58 @@ struct GPU_Complex_Short {
|
||||
class cuda_multicorrelator
|
||||
{
|
||||
public:
|
||||
cuda_multicorrelator();
|
||||
bool init_cuda(const int argc, const char **argv, int signal_length_samples, int local_codes_length_samples, int n_correlators);
|
||||
bool init_cuda_integrated_resampler(
|
||||
const int argc, const char **argv,
|
||||
int signal_length_samples,
|
||||
int code_length_chips,
|
||||
int n_correlators
|
||||
);
|
||||
bool set_local_code_and_taps(
|
||||
int code_length_chips,
|
||||
const std::complex<float>* local_codes_in,
|
||||
float *shifts_chips,
|
||||
int n_correlators
|
||||
);
|
||||
bool free_cuda();
|
||||
bool Carrier_wipeoff_multicorrelator_cuda(
|
||||
std::complex<float>* corr_out,
|
||||
const std::complex<float>* sig_in,
|
||||
const std::complex<float>* local_codes_in,
|
||||
float rem_carrier_phase_in_rad,
|
||||
float phase_step_rad,
|
||||
const int *shifts_samples,
|
||||
int signal_length_samples,
|
||||
int n_correlators);
|
||||
bool Carrier_wipeoff_multicorrelator_resampler_cuda(
|
||||
std::complex<float>* corr_out,
|
||||
const std::complex<float>* sig_in,
|
||||
float rem_carrier_phase_in_rad,
|
||||
float phase_step_rad,
|
||||
float code_phase_step_chips,
|
||||
float rem_code_phase_chips,
|
||||
int signal_length_samples,
|
||||
int n_correlators);
|
||||
cuda_multicorrelator();
|
||||
bool init_cuda(const int argc, const char **argv, int signal_length_samples, int local_codes_length_samples, int n_correlators);
|
||||
bool init_cuda_integrated_resampler(
|
||||
const int argc, const char **argv,
|
||||
int signal_length_samples,
|
||||
int code_length_chips,
|
||||
int n_correlators
|
||||
);
|
||||
bool set_local_code_and_taps(
|
||||
int code_length_chips,
|
||||
const std::complex<float>* local_codes_in,
|
||||
float *shifts_chips,
|
||||
int n_correlators
|
||||
);
|
||||
bool free_cuda();
|
||||
bool Carrier_wipeoff_multicorrelator_cuda(
|
||||
std::complex<float>* corr_out,
|
||||
const std::complex<float>* sig_in,
|
||||
const std::complex<float>* local_codes_in,
|
||||
float rem_carrier_phase_in_rad,
|
||||
float phase_step_rad,
|
||||
const int *shifts_samples,
|
||||
int signal_length_samples,
|
||||
int n_correlators);
|
||||
bool Carrier_wipeoff_multicorrelator_resampler_cuda(
|
||||
std::complex<float>* corr_out,
|
||||
const std::complex<float>* sig_in,
|
||||
float rem_carrier_phase_in_rad,
|
||||
float phase_step_rad,
|
||||
float code_phase_step_chips,
|
||||
float rem_code_phase_chips,
|
||||
int signal_length_samples,
|
||||
int n_correlators);
|
||||
|
||||
private:
|
||||
// Allocate the device input vectors
|
||||
GPU_Complex *d_sig_in;
|
||||
GPU_Complex *d_nco_in;
|
||||
GPU_Complex *d_sig_doppler_wiped;
|
||||
GPU_Complex *d_local_codes_in;
|
||||
GPU_Complex *d_corr_out;
|
||||
int *d_shifts_samples;
|
||||
float *d_shifts_chips;
|
||||
float d_code_length_chips;
|
||||
// Allocate the device input vectors
|
||||
GPU_Complex *d_sig_in;
|
||||
GPU_Complex *d_nco_in;
|
||||
GPU_Complex *d_sig_doppler_wiped;
|
||||
GPU_Complex *d_local_codes_in;
|
||||
GPU_Complex *d_corr_out;
|
||||
int *d_shifts_samples;
|
||||
float *d_shifts_chips;
|
||||
float d_code_length_chips;
|
||||
|
||||
int threadsPerBlock;
|
||||
int blocksPerGrid;
|
||||
|
||||
cudaStream_t stream1;
|
||||
cudaStream_t stream2;
|
||||
int num_gpu_devices;
|
||||
int selected_device;
|
||||
int threadsPerBlock;
|
||||
int blocksPerGrid;
|
||||
|
||||
cudaStream_t stream1;
|
||||
cudaStream_t stream2;
|
||||
int num_gpu_devices;
|
||||
int selected_device;
|
||||
};
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user