mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-12-14 20:20:35 +00:00
Fix typo
This commit is contained in:
parent
c8d27eb97c
commit
10d73da839
@ -1,11 +1,11 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file cpu_multicorrelator.cc
|
* \file cpu_multicorrelator.cc
|
||||||
* \brief High optimized CPU vector multiTAP correlator class
|
* \brief Highly optimized CPU vector multiTAP correlator class
|
||||||
* \authors <ul>
|
* \authors <ul>
|
||||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for CPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
|
@ -65,4 +65,4 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif /* CPU_MULTICORRELATOR_H_ */
|
#endif /* GNSS_SDR_CPU_MULTICORRELATOR_H_ */
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file cpu_multicorrelator_16sc.cc
|
* \file cpu_multicorrelator_16sc.cc
|
||||||
* \brief High optimized CPU vector multiTAP correlator class
|
* \brief Highly optimized CPU vector multiTAP correlator class
|
||||||
* \authors <ul>
|
* \authors <ul>
|
||||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for CPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file cpu_multicorrelator_16sc.h
|
* \file cpu_multicorrelator_16sc.h
|
||||||
* \brief High optimized CPU vector multiTAP correlator class for lv_16sc_t (short int complex)
|
* \brief Highly optimized CPU vector multiTAP correlator class for lv_16sc_t (short int complex)
|
||||||
* \authors <ul>
|
* \authors <ul>
|
||||||
* <li> Javier Arribas, 2016. jarribas(at)cttc.es
|
* <li> Javier Arribas, 2016. jarribas(at)cttc.es
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for CPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
* <li> Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com
|
* <li> Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for CPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
* <li> Cillian O'Driscoll, 2017, cillian.odriscoll(at)gmail.com
|
* <li> Cillian O'Driscoll, 2017, cillian.odriscoll(at)gmail.com
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for CPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file cuda_multicorrelator.cu
|
* \file cuda_multicorrelator.cu
|
||||||
* \brief High optimized CUDA GPU vector multiTAP correlator class
|
* \brief Highly optimized CUDA GPU vector multiTAP correlator class
|
||||||
* \authors <ul>
|
* \authors <ul>
|
||||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
@ -33,9 +33,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cuda_multicorrelator.h"
|
#include "cuda_multicorrelator.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <stdio.h>
|
||||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
@ -53,8 +52,7 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
|
|||||||
int vectorN,
|
int vectorN,
|
||||||
int elementN,
|
int elementN,
|
||||||
float rem_carrier_phase_in_rad,
|
float rem_carrier_phase_in_rad,
|
||||||
float phase_step_rad
|
float phase_step_rad)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
//Accumulators cache
|
//Accumulators cache
|
||||||
__shared__ GPU_Complex accumResult[ACCUM_N];
|
__shared__ GPU_Complex accumResult[ACCUM_N];
|
||||||
@ -90,7 +88,8 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
|
|||||||
for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
|
for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
|
||||||
{
|
{
|
||||||
GPU_Complex sum = GPU_Complex(0, 0);
|
GPU_Complex sum = GPU_Complex(0, 0);
|
||||||
float local_code_chip_index=0.0;;
|
float local_code_chip_index = 0.0;
|
||||||
|
;
|
||||||
//float code_phase;
|
//float code_phase;
|
||||||
for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
|
for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
|
||||||
{
|
{
|
||||||
@ -109,7 +108,6 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
|
|||||||
//printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index);
|
//printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index);
|
||||||
// 2.correlate
|
// 2.correlate
|
||||||
sum.multiply_acc(d_sig_wiped[pos], d_local_code_in[__float2int_rd(local_code_chip_index)]);
|
sum.multiply_acc(d_sig_wiped[pos], d_local_code_in[__float2int_rd(local_code_chip_index)]);
|
||||||
|
|
||||||
}
|
}
|
||||||
accumResult[iAccum] = sum;
|
accumResult[iAccum] = sum;
|
||||||
}
|
}
|
||||||
@ -135,23 +133,26 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cuda_multicorrelator::init_cuda_integrated_resampler(
|
bool cuda_multicorrelator::init_cuda_integrated_resampler(
|
||||||
int signal_length_samples,
|
int signal_length_samples,
|
||||||
int code_length_chips,
|
int code_length_chips,
|
||||||
int n_correlators
|
int n_correlators)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
|
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
|
||||||
// findCudaDevice(argc, (const char **)argv);
|
// findCudaDevice(argc, (const char **)argv);
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
int num_devices, device;
|
int num_devices, device;
|
||||||
cudaGetDeviceCount(&num_devices);
|
cudaGetDeviceCount(&num_devices);
|
||||||
if (num_devices > 1) {
|
if (num_devices > 1)
|
||||||
|
{
|
||||||
int max_multiprocessors = 0, max_device = 0;
|
int max_multiprocessors = 0, max_device = 0;
|
||||||
for (device = 0; device < num_devices; device++) {
|
for (device = 0; device < num_devices; device++)
|
||||||
|
{
|
||||||
cudaDeviceProp properties;
|
cudaDeviceProp properties;
|
||||||
cudaGetDeviceProperties(&properties, device);
|
cudaGetDeviceProperties(&properties, device);
|
||||||
if (max_multiprocessors < properties.multiProcessorCount) {
|
if (max_multiprocessors < properties.multiProcessorCount)
|
||||||
|
{
|
||||||
max_multiprocessors = properties.multiProcessorCount;
|
max_multiprocessors = properties.multiProcessorCount;
|
||||||
max_device = device;
|
max_device = device;
|
||||||
}
|
}
|
||||||
@ -165,7 +166,8 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
|
|||||||
|
|
||||||
cudaGetDeviceProperties(&prop, max_device);
|
cudaGetDeviceProperties(&prop, max_device);
|
||||||
//debug code
|
//debug code
|
||||||
if (prop.canMapHostMemory != 1) {
|
if (prop.canMapHostMemory != 1)
|
||||||
|
{
|
||||||
printf("Device can not map memory.\n");
|
printf("Device can not map memory.\n");
|
||||||
}
|
}
|
||||||
printf("L2 Cache size= %u \n", prop.l2CacheSize);
|
printf("L2 Cache size= %u \n", prop.l2CacheSize);
|
||||||
@ -174,11 +176,14 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
|
|||||||
printf("sharedMemPerBlock= %lu \n", prop.sharedMemPerBlock);
|
printf("sharedMemPerBlock= %lu \n", prop.sharedMemPerBlock);
|
||||||
printf("deviceOverlap= %i \n", prop.deviceOverlap);
|
printf("deviceOverlap= %i \n", prop.deviceOverlap);
|
||||||
printf("multiProcessorCount= %i \n", prop.multiProcessorCount);
|
printf("multiProcessorCount= %i \n", prop.multiProcessorCount);
|
||||||
}else{
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
cudaGetDevice(&selected_gps_device);
|
cudaGetDevice(&selected_gps_device);
|
||||||
cudaGetDeviceProperties(&prop, selected_gps_device);
|
cudaGetDeviceProperties(&prop, selected_gps_device);
|
||||||
//debug code
|
//debug code
|
||||||
if (prop.canMapHostMemory != 1) {
|
if (prop.canMapHostMemory != 1)
|
||||||
|
{
|
||||||
printf("Device can not map memory.\n");
|
printf("Device can not map memory.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,14 +240,13 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cuda_multicorrelator::set_local_code_and_taps(
|
bool cuda_multicorrelator::set_local_code_and_taps(
|
||||||
int code_length_chips,
|
int code_length_chips,
|
||||||
const std::complex<float> *local_codes_in,
|
const std::complex<float> *local_codes_in,
|
||||||
float *shifts_chips,
|
float *shifts_chips,
|
||||||
int n_correlators
|
int n_correlators)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
|
|
||||||
cudaSetDevice(selected_gps_device);
|
cudaSetDevice(selected_gps_device);
|
||||||
//********* ZERO COPY VERSION ************
|
//********* ZERO COPY VERSION ************
|
||||||
// // Get device pointer from host memory. No allocation or memcpy
|
// // Get device pointer from host memory. No allocation or memcpy
|
||||||
@ -272,12 +276,11 @@ bool cuda_multicorrelator::set_local_code_and_taps(
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cuda_multicorrelator::set_input_output_vectors(
|
bool cuda_multicorrelator::set_input_output_vectors(
|
||||||
std::complex<float> *corr_out,
|
std::complex<float> *corr_out,
|
||||||
std::complex<float>* sig_in
|
std::complex<float> *sig_in)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
|
|
||||||
cudaSetDevice(selected_gps_device);
|
cudaSetDevice(selected_gps_device);
|
||||||
// Save CPU pointers
|
// Save CPU pointers
|
||||||
d_sig_in_cpu = sig_in;
|
d_sig_in_cpu = sig_in;
|
||||||
@ -293,10 +296,12 @@ bool cuda_multicorrelator::set_input_output_vectors(
|
|||||||
printf("cuda cudaHostGetDevicePointer error \r\n");
|
printf("cuda cudaHostGetDevicePointer error \r\n");
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
|
#define gpuErrchk(ans) \
|
||||||
|
{ \
|
||||||
|
gpuAssert((ans), __FILE__, __LINE__); \
|
||||||
|
}
|
||||||
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
|
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
|
||||||
{
|
{
|
||||||
if (code != cudaSuccess)
|
if (code != cudaSuccess)
|
||||||
@ -306,6 +311,7 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
|
bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
|
||||||
float rem_carrier_phase_in_rad,
|
float rem_carrier_phase_in_rad,
|
||||||
float phase_step_rad,
|
float phase_step_rad,
|
||||||
@ -314,7 +320,6 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
|
|||||||
int signal_length_samples,
|
int signal_length_samples,
|
||||||
int n_correlators)
|
int n_correlators)
|
||||||
{
|
{
|
||||||
|
|
||||||
cudaSetDevice(selected_gps_device);
|
cudaSetDevice(selected_gps_device);
|
||||||
// cudaMemCpy version
|
// cudaMemCpy version
|
||||||
//size_t memSize = signal_length_samples * sizeof(std::complex<float>);
|
//size_t memSize = signal_length_samples * sizeof(std::complex<float>);
|
||||||
@ -337,8 +342,7 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
|
|||||||
n_correlators,
|
n_correlators,
|
||||||
signal_length_samples,
|
signal_length_samples,
|
||||||
rem_carrier_phase_in_rad,
|
rem_carrier_phase_in_rad,
|
||||||
phase_step_rad
|
phase_step_rad);
|
||||||
);
|
|
||||||
|
|
||||||
gpuErrchk(cudaPeekAtLastError());
|
gpuErrchk(cudaPeekAtLastError());
|
||||||
gpuErrchk(cudaStreamSynchronize(stream1));
|
gpuErrchk(cudaStreamSynchronize(stream1));
|
||||||
@ -352,6 +356,7 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cuda_multicorrelator::cuda_multicorrelator()
|
cuda_multicorrelator::cuda_multicorrelator()
|
||||||
{
|
{
|
||||||
d_sig_in = NULL;
|
d_sig_in = NULL;
|
||||||
@ -366,6 +371,7 @@ cuda_multicorrelator::cuda_multicorrelator()
|
|||||||
d_code_length_chips = 0;
|
d_code_length_chips = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cuda_multicorrelator::free_cuda()
|
bool cuda_multicorrelator::free_cuda()
|
||||||
{
|
{
|
||||||
// Free device global memory
|
// Free device global memory
|
||||||
@ -385,4 +391,3 @@ bool cuda_multicorrelator::free_cuda()
|
|||||||
cudaDeviceReset();
|
cudaDeviceReset();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file cuda_multicorrelator.h
|
* \file cuda_multicorrelator.h
|
||||||
* \brief High optimized CUDA GPU vector multiTAP correlator class
|
* \brief Highly optimized CUDA GPU vector multiTAP correlator class
|
||||||
* \authors <ul>
|
* \authors <ul>
|
||||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
@ -92,6 +92,7 @@ struct GPU_Complex
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct GPU_Complex_Short
|
struct GPU_Complex_Short
|
||||||
{
|
{
|
||||||
float r;
|
float r;
|
||||||
@ -149,7 +150,6 @@ private:
|
|||||||
GPU_Complex* d_local_codes_in;
|
GPU_Complex* d_local_codes_in;
|
||||||
GPU_Complex* d_corr_out;
|
GPU_Complex* d_corr_out;
|
||||||
|
|
||||||
//
|
|
||||||
std::complex<float>* d_sig_in_cpu;
|
std::complex<float>* d_sig_in_cpu;
|
||||||
std::complex<float>* d_corr_out_cpu;
|
std::complex<float>* d_corr_out_cpu;
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
// All the outputs are in RADIANS
|
// All the outputs are in RADIANS
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FLL four quadrant arctan discriminator:
|
* FLL four quadrant arctan discriminator:
|
||||||
* \f{equation}
|
* \f{equation}
|
||||||
@ -45,7 +46,6 @@
|
|||||||
* \f$I_{PS1},Q_{PS1}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_1\f$, and
|
* \f$I_{PS1},Q_{PS1}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_1\f$, and
|
||||||
* \f$I_{PS2},Q_{PS2}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_2\f$. The output is in [radians/second].
|
* \f$I_{PS2},Q_{PS2}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_2\f$. The output is in [radians/second].
|
||||||
*/
|
*/
|
||||||
|
|
||||||
double fll_four_quadrant_atan(gr_complex prompt_s1, gr_complex prompt_s2, double t1, double t2)
|
double fll_four_quadrant_atan(gr_complex prompt_s1, gr_complex prompt_s2, double t1, double t2)
|
||||||
{
|
{
|
||||||
double cross, dot;
|
double cross, dot;
|
||||||
@ -105,6 +105,7 @@ double dll_nc_e_minus_l_normalized(gr_complex early_s1, gr_complex late_s1)
|
|||||||
return 0.5 * (P_early - P_late) / (P_early + P_late);
|
return 0.5 * (P_early - P_late) / (P_early + P_late);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DLL Noncoherent Very Early Minus Late Power (VEMLP) normalized discriminator, using the outputs
|
* DLL Noncoherent Very Early Minus Late Power (VEMLP) normalized discriminator, using the outputs
|
||||||
* of four correlators, Very Early (VE), Early (E), Late (L) and Very Late (VL):
|
* of four correlators, Very Early (VE), Early (E), Late (L) and Very Late (VL):
|
||||||
|
Loading…
Reference in New Issue
Block a user