mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-12-15 12:40:35 +00:00
Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next
This commit is contained in:
commit
a0ed4c1500
@ -512,7 +512,6 @@ int hybrid_observables_gs::general_work(int noutput_items __attribute__((unused)
|
|||||||
{
|
{
|
||||||
T_rx_clock_step_samples = std::round(static_cast<double>(in[d_nchannels_in - 1][0].fs) * 1e-3); // 1 ms
|
T_rx_clock_step_samples = std::round(static_cast<double>(in[d_nchannels_in - 1][0].fs) * 1e-3); // 1 ms
|
||||||
LOG(INFO) << "Observables clock step samples set to " << T_rx_clock_step_samples;
|
LOG(INFO) << "Observables clock step samples set to " << T_rx_clock_step_samples;
|
||||||
usleep(1000000);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Consume one item from the clock channel (last of the input channels)
|
// Consume one item from the clock channel (last of the input channels)
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file cpu_multicorrelator.cc
|
* \file cpu_multicorrelator.cc
|
||||||
* \brief High optimized CPU vector multiTAP correlator class
|
* \brief Highly optimized CPU vector multiTAP correlator class
|
||||||
* \authors <ul>
|
* \authors <ul>
|
||||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for CPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
|
@ -65,4 +65,4 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif /* CPU_MULTICORRELATOR_H_ */
|
#endif /* GNSS_SDR_CPU_MULTICORRELATOR_H_ */
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file cpu_multicorrelator_16sc.cc
|
* \file cpu_multicorrelator_16sc.cc
|
||||||
* \brief High optimized CPU vector multiTAP correlator class
|
* \brief Highly optimized CPU vector multiTAP correlator class
|
||||||
* \authors <ul>
|
* \authors <ul>
|
||||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for CPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file cpu_multicorrelator_16sc.h
|
* \file cpu_multicorrelator_16sc.h
|
||||||
* \brief High optimized CPU vector multiTAP correlator class for lv_16sc_t (short int complex)
|
* \brief Highly optimized CPU vector multiTAP correlator class for lv_16sc_t (short int complex)
|
||||||
* \authors <ul>
|
* \authors <ul>
|
||||||
* <li> Javier Arribas, 2016. jarribas(at)cttc.es
|
* <li> Javier Arribas, 2016. jarribas(at)cttc.es
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for CPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
* <li> Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com
|
* <li> Cillian O'Driscoll, 2017. cillian.odriscoll(at)gmail.com
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for CPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
@ -125,7 +125,7 @@ void Cpu_Multicorrelator_Real_Codes::update_local_code(int correlator_length_sam
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Overload Carrier_wipeoff_multicorrelator_resampler to ensure back compatibility
|
|
||||||
bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler(
|
bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler(
|
||||||
float rem_carrier_phase_in_rad,
|
float rem_carrier_phase_in_rad,
|
||||||
float phase_step_rad,
|
float phase_step_rad,
|
||||||
@ -150,7 +150,8 @@ bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler(
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// Overload Carrier_wipeoff_multicorrelator_resampler to ensure back compatibility
|
|
||||||
|
|
||||||
bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler(
|
bool Cpu_Multicorrelator_Real_Codes::Carrier_wipeoff_multicorrelator_resampler(
|
||||||
float rem_carrier_phase_in_rad,
|
float rem_carrier_phase_in_rad,
|
||||||
float phase_step_rad,
|
float phase_step_rad,
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
* <li> Cillian O'Driscoll, 2017, cillian.odriscoll(at)gmail.com
|
* <li> Cillian O'Driscoll, 2017, cillian.odriscoll(at)gmail.com
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for CPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for CPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
@ -52,7 +52,6 @@ public:
|
|||||||
bool set_local_code_and_taps(int code_length_chips, const float *local_code_in, float *shifts_chips);
|
bool set_local_code_and_taps(int code_length_chips, const float *local_code_in, float *shifts_chips);
|
||||||
bool set_input_output_vectors(std::complex<float> *corr_out, const std::complex<float> *sig_in);
|
bool set_input_output_vectors(std::complex<float> *corr_out, const std::complex<float> *sig_in);
|
||||||
void update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips = 0.0);
|
void update_local_code(int correlator_length_samples, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips = 0.0);
|
||||||
// Overload Carrier_wipeoff_multicorrelator_resampler to ensure back compatibility
|
|
||||||
bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float phase_rate_step_rad, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, int signal_length_samples);
|
bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float phase_rate_step_rad, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, int signal_length_samples);
|
||||||
bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, int signal_length_samples);
|
bool Carrier_wipeoff_multicorrelator_resampler(float rem_carrier_phase_in_rad, float phase_step_rad, float rem_code_phase_chips, float code_phase_step_chips, float code_phase_rate_step_chips, int signal_length_samples);
|
||||||
bool free();
|
bool free();
|
||||||
@ -70,4 +69,4 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#endif /* CPU_MULTICORRELATOR_REAL_CODES_H_ */
|
#endif /* GNSS_SDR_CPU_MULTICORRELATOR_REAL_CODES_H_ */
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file cuda_multicorrelator.cu
|
* \file cuda_multicorrelator.cu
|
||||||
* \brief High optimized CUDA GPU vector multiTAP correlator class
|
* \brief Highly optimized CUDA GPU vector multiTAP correlator class
|
||||||
* \authors <ul>
|
* \authors <ul>
|
||||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
@ -33,9 +33,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "cuda_multicorrelator.h"
|
#include "cuda_multicorrelator.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <stdio.h>
|
||||||
// For the CUDA runtime routines (prefixed with "cuda_")
|
// For the CUDA runtime routines (prefixed with "cuda_")
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
@ -53,22 +52,21 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
|
|||||||
int vectorN,
|
int vectorN,
|
||||||
int elementN,
|
int elementN,
|
||||||
float rem_carrier_phase_in_rad,
|
float rem_carrier_phase_in_rad,
|
||||||
float phase_step_rad
|
float phase_step_rad)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
//Accumulators cache
|
//Accumulators cache
|
||||||
__shared__ GPU_Complex accumResult[ACCUM_N];
|
__shared__ GPU_Complex accumResult[ACCUM_N];
|
||||||
|
|
||||||
// CUDA version of floating point NCO and vector dot product integrated
|
// CUDA version of floating point NCO and vector dot product integrated
|
||||||
float sin;
|
float sin;
|
||||||
float cos;
|
float cos;
|
||||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x;
|
for (int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
i < elementN;
|
i < elementN;
|
||||||
i += blockDim.x * gridDim.x)
|
i += blockDim.x * gridDim.x)
|
||||||
{
|
{
|
||||||
__sincosf(rem_carrier_phase_in_rad + i*phase_step_rad, &sin, &cos);
|
__sincosf(rem_carrier_phase_in_rad + i * phase_step_rad, &sin, &cos);
|
||||||
d_sig_wiped[i] = d_sig_in[i] * GPU_Complex(cos,-sin);
|
d_sig_wiped[i] = d_sig_in[i] * GPU_Complex(cos, -sin);
|
||||||
}
|
}
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
@ -77,273 +75,279 @@ __global__ void Doppler_wippe_scalarProdGPUCPXxN_shifts_chips(
|
|||||||
// from total number of thread blocks
|
// from total number of thread blocks
|
||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x)
|
for (int vec = blockIdx.x; vec < vectorN; vec += gridDim.x)
|
||||||
{
|
|
||||||
//int vectorBase = IMUL(elementN, vec);
|
|
||||||
//int vectorEnd = elementN;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Each accumulator cycles through vectors with
|
|
||||||
// stride equal to number of total number of accumulators ACCUM_N
|
|
||||||
// At this stage ACCUM_N is only preferred be a multiple of warp size
|
|
||||||
// to meet memory coalescing alignment constraints.
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
|
|
||||||
{
|
{
|
||||||
GPU_Complex sum = GPU_Complex(0,0);
|
//int vectorBase = IMUL(elementN, vec);
|
||||||
float local_code_chip_index=0.0;;
|
//int vectorEnd = elementN;
|
||||||
//float code_phase;
|
|
||||||
for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
|
|
||||||
{
|
|
||||||
//original sample code
|
|
||||||
//sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos];
|
|
||||||
//sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos];
|
|
||||||
//sum.multiply_acc(d_sig_in[pos],d_local_codes_in[pos+d_shifts_samples[vec]]);
|
|
||||||
|
|
||||||
//custom code for multitap correlator
|
////////////////////////////////////////////////////////////////////////
|
||||||
// 1.resample local code for the current shift
|
// Each accumulator cycles through vectors with
|
||||||
|
// stride equal to number of total number of accumulators ACCUM_N
|
||||||
|
// At this stage ACCUM_N is only preferred be a multiple of warp size
|
||||||
|
// to meet memory coalescing alignment constraints.
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
for (int iAccum = threadIdx.x; iAccum < ACCUM_N; iAccum += blockDim.x)
|
||||||
|
{
|
||||||
|
GPU_Complex sum = GPU_Complex(0, 0);
|
||||||
|
float local_code_chip_index = 0.0;
|
||||||
|
;
|
||||||
|
//float code_phase;
|
||||||
|
for (int pos = iAccum; pos < elementN; pos += ACCUM_N)
|
||||||
|
{
|
||||||
|
//original sample code
|
||||||
|
//sum = sum + d_sig_in[pos-vectorBase] * d_nco_in[pos-vectorBase] * d_local_codes_in[pos];
|
||||||
|
//sum = sum + d_sig_in[pos-vectorBase] * d_local_codes_in[pos];
|
||||||
|
//sum.multiply_acc(d_sig_in[pos],d_local_codes_in[pos+d_shifts_samples[vec]]);
|
||||||
|
|
||||||
local_code_chip_index= fmodf(code_phase_step_chips*__int2float_rd(pos)+ d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips);
|
//custom code for multitap correlator
|
||||||
|
// 1.resample local code for the current shift
|
||||||
|
|
||||||
//Take into account that in multitap correlators, the shifts can be negative!
|
local_code_chip_index = fmodf(code_phase_step_chips * __int2float_rd(pos) + d_shifts_chips[vec] - rem_code_phase_chips, code_length_chips);
|
||||||
if (local_code_chip_index<0.0) local_code_chip_index+=(code_length_chips-1);
|
|
||||||
//printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index);
|
|
||||||
// 2.correlate
|
|
||||||
sum.multiply_acc(d_sig_wiped[pos],d_local_code_in[__float2int_rd(local_code_chip_index)]);
|
|
||||||
|
|
||||||
}
|
//Take into account that in multitap correlators, the shifts can be negative!
|
||||||
accumResult[iAccum] = sum;
|
if (local_code_chip_index < 0.0) local_code_chip_index += (code_length_chips - 1);
|
||||||
|
//printf("vec= %i, pos %i, chip_idx=%i chip_shift=%f \r\n",vec, pos,__float2int_rd(local_code_chip_index),local_code_chip_index);
|
||||||
|
// 2.correlate
|
||||||
|
sum.multiply_acc(d_sig_wiped[pos], d_local_code_in[__float2int_rd(local_code_chip_index)]);
|
||||||
|
}
|
||||||
|
accumResult[iAccum] = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
// Perform tree-like reduction of accumulators' results.
|
||||||
|
// ACCUM_N has to be power of two at this stage
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
for (int stride = ACCUM_N / 2; stride > 0; stride >>= 1)
|
||||||
|
{
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
for (int iAccum = threadIdx.x; iAccum < stride; iAccum += blockDim.x)
|
||||||
|
{
|
||||||
|
accumResult[iAccum] += accumResult[stride + iAccum];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (threadIdx.x == 0)
|
||||||
|
{
|
||||||
|
d_corr_out[vec] = accumResult[0];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
// Perform tree-like reduction of accumulators' results.
|
|
||||||
// ACCUM_N has to be power of two at this stage
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
for (int stride = ACCUM_N / 2; stride > 0; stride >>= 1)
|
|
||||||
{
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
for (int iAccum = threadIdx.x; iAccum < stride; iAccum += blockDim.x)
|
|
||||||
{
|
|
||||||
accumResult[iAccum] += accumResult[stride + iAccum];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (threadIdx.x == 0)
|
|
||||||
{
|
|
||||||
d_corr_out[vec] = accumResult[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cuda_multicorrelator::init_cuda_integrated_resampler(
|
bool cuda_multicorrelator::init_cuda_integrated_resampler(
|
||||||
int signal_length_samples,
|
int signal_length_samples,
|
||||||
int code_length_chips,
|
int code_length_chips,
|
||||||
int n_correlators
|
int n_correlators)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
|
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
|
||||||
// findCudaDevice(argc, (const char **)argv);
|
// findCudaDevice(argc, (const char **)argv);
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
int num_devices, device;
|
int num_devices, device;
|
||||||
cudaGetDeviceCount(&num_devices);
|
cudaGetDeviceCount(&num_devices);
|
||||||
if (num_devices > 1) {
|
if (num_devices > 1)
|
||||||
int max_multiprocessors = 0, max_device = 0;
|
{
|
||||||
for (device = 0; device < num_devices; device++) {
|
int max_multiprocessors = 0, max_device = 0;
|
||||||
cudaDeviceProp properties;
|
for (device = 0; device < num_devices; device++)
|
||||||
cudaGetDeviceProperties(&properties, device);
|
{
|
||||||
if (max_multiprocessors < properties.multiProcessorCount) {
|
cudaDeviceProp properties;
|
||||||
max_multiprocessors = properties.multiProcessorCount;
|
cudaGetDeviceProperties(&properties, device);
|
||||||
max_device = device;
|
if (max_multiprocessors < properties.multiProcessorCount)
|
||||||
}
|
{
|
||||||
printf("Found GPU device # %i\n",device);
|
max_multiprocessors = properties.multiProcessorCount;
|
||||||
}
|
max_device = device;
|
||||||
//cudaSetDevice(max_device);
|
}
|
||||||
|
printf("Found GPU device # %i\n", device);
|
||||||
|
}
|
||||||
|
//cudaSetDevice(max_device);
|
||||||
|
|
||||||
//set random device!
|
//set random device!
|
||||||
selected_gps_device=rand() % num_devices;//generates a random number between 0 and num_devices to split the threads between GPUs
|
selected_gps_device = rand() % num_devices; //generates a random number between 0 and num_devices to split the threads between GPUs
|
||||||
cudaSetDevice(selected_gps_device);
|
cudaSetDevice(selected_gps_device);
|
||||||
|
|
||||||
cudaGetDeviceProperties( &prop, max_device );
|
cudaGetDeviceProperties(&prop, max_device);
|
||||||
//debug code
|
//debug code
|
||||||
if (prop.canMapHostMemory != 1) {
|
if (prop.canMapHostMemory != 1)
|
||||||
printf( "Device can not map memory.\n" );
|
{
|
||||||
}
|
printf("Device can not map memory.\n");
|
||||||
printf("L2 Cache size= %u \n",prop.l2CacheSize);
|
}
|
||||||
printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
|
printf("L2 Cache size= %u \n", prop.l2CacheSize);
|
||||||
printf("maxGridSize= %i \n",prop.maxGridSize[0]);
|
printf("maxThreadsPerBlock= %u \n", prop.maxThreadsPerBlock);
|
||||||
printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
|
printf("maxGridSize= %i \n", prop.maxGridSize[0]);
|
||||||
printf("deviceOverlap= %i \n",prop.deviceOverlap);
|
printf("sharedMemPerBlock= %lu \n", prop.sharedMemPerBlock);
|
||||||
printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
|
printf("deviceOverlap= %i \n", prop.deviceOverlap);
|
||||||
}else{
|
printf("multiProcessorCount= %i \n", prop.multiProcessorCount);
|
||||||
cudaGetDevice( &selected_gps_device);
|
}
|
||||||
cudaGetDeviceProperties( &prop, selected_gps_device );
|
else
|
||||||
//debug code
|
{
|
||||||
if (prop.canMapHostMemory != 1) {
|
cudaGetDevice(&selected_gps_device);
|
||||||
printf( "Device can not map memory.\n" );
|
cudaGetDeviceProperties(&prop, selected_gps_device);
|
||||||
}
|
//debug code
|
||||||
|
if (prop.canMapHostMemory != 1)
|
||||||
|
{
|
||||||
|
printf("Device can not map memory.\n");
|
||||||
|
}
|
||||||
|
|
||||||
printf("L2 Cache size= %u \n",prop.l2CacheSize);
|
printf("L2 Cache size= %u \n", prop.l2CacheSize);
|
||||||
printf("maxThreadsPerBlock= %u \n",prop.maxThreadsPerBlock);
|
printf("maxThreadsPerBlock= %u \n", prop.maxThreadsPerBlock);
|
||||||
printf("maxGridSize= %i \n",prop.maxGridSize[0]);
|
printf("maxGridSize= %i \n", prop.maxGridSize[0]);
|
||||||
printf("sharedMemPerBlock= %lu \n",prop.sharedMemPerBlock);
|
printf("sharedMemPerBlock= %lu \n", prop.sharedMemPerBlock);
|
||||||
printf("deviceOverlap= %i \n",prop.deviceOverlap);
|
printf("deviceOverlap= %i \n", prop.deviceOverlap);
|
||||||
printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
|
printf("multiProcessorCount= %i \n", prop.multiProcessorCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
// (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
|
// (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
|
||||||
|
|
||||||
// ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors
|
// ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors
|
||||||
size_t size = signal_length_samples * sizeof(GPU_Complex);
|
size_t size = signal_length_samples * sizeof(GPU_Complex);
|
||||||
|
|
||||||
//********* ZERO COPY VERSION ************
|
//********* ZERO COPY VERSION ************
|
||||||
// Set flag to enable zero copy access
|
// Set flag to enable zero copy access
|
||||||
// Optimal in shared memory devices (like Jetson K1)
|
// Optimal in shared memory devices (like Jetson K1)
|
||||||
//cudaSetDeviceFlags(cudaDeviceMapHost);
|
//cudaSetDeviceFlags(cudaDeviceMapHost);
|
||||||
|
|
||||||
//******** CudaMalloc version ***********
|
//******** CudaMalloc version ***********
|
||||||
|
|
||||||
// input signal GPU memory (can be mapped to CPU memory in shared memory devices!)
|
// input signal GPU memory (can be mapped to CPU memory in shared memory devices!)
|
||||||
// cudaMalloc((void **)&d_sig_in, size);
|
// cudaMalloc((void **)&d_sig_in, size);
|
||||||
// cudaMemset(d_sig_in,0,size);
|
// cudaMemset(d_sig_in,0,size);
|
||||||
|
|
||||||
// Doppler-free signal (internal GPU memory)
|
// Doppler-free signal (internal GPU memory)
|
||||||
cudaMalloc((void **)&d_sig_doppler_wiped, size);
|
cudaMalloc((void **)&d_sig_doppler_wiped, size);
|
||||||
cudaMemset(d_sig_doppler_wiped,0,size);
|
cudaMemset(d_sig_doppler_wiped, 0, size);
|
||||||
|
|
||||||
// Local code GPU memory (can be mapped to CPU memory in shared memory devices!)
|
// Local code GPU memory (can be mapped to CPU memory in shared memory devices!)
|
||||||
cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips);
|
cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>) * code_length_chips);
|
||||||
cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips);
|
cudaMemset(d_local_codes_in, 0, sizeof(std::complex<float>) * code_length_chips);
|
||||||
|
|
||||||
d_code_length_chips=code_length_chips;
|
d_code_length_chips = code_length_chips;
|
||||||
|
|
||||||
// Vector with the chip shifts for each correlator tap
|
// Vector with the chip shifts for each correlator tap
|
||||||
//GPU memory (can be mapped to CPU memory in shared memory devices!)
|
//GPU memory (can be mapped to CPU memory in shared memory devices!)
|
||||||
cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators);
|
cudaMalloc((void **)&d_shifts_chips, sizeof(float) * n_correlators);
|
||||||
cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators);
|
cudaMemset(d_shifts_chips, 0, sizeof(float) * n_correlators);
|
||||||
|
|
||||||
//scalars
|
//scalars
|
||||||
//cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators);
|
//cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators);
|
||||||
//cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators);
|
//cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators);
|
||||||
|
|
||||||
// Launch the Vector Add CUDA Kernel
|
// Launch the Vector Add CUDA Kernel
|
||||||
// TODO: write a smart load balance using device info!
|
// TODO: write a smart load balance using device info!
|
||||||
threadsPerBlock = 64;
|
threadsPerBlock = 64;
|
||||||
blocksPerGrid = 128;//(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock;
|
blocksPerGrid = 128; //(int)(signal_length_samples+threadsPerBlock-1)/threadsPerBlock;
|
||||||
|
|
||||||
cudaStreamCreate (&stream1) ;
|
cudaStreamCreate(&stream1);
|
||||||
//cudaStreamCreate (&stream2) ;
|
//cudaStreamCreate (&stream2) ;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cuda_multicorrelator::set_local_code_and_taps(
|
bool cuda_multicorrelator::set_local_code_and_taps(
|
||||||
int code_length_chips,
|
int code_length_chips,
|
||||||
const std::complex<float>* local_codes_in,
|
const std::complex<float> *local_codes_in,
|
||||||
float *shifts_chips,
|
float *shifts_chips,
|
||||||
int n_correlators
|
int n_correlators)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
|
cudaSetDevice(selected_gps_device);
|
||||||
|
//********* ZERO COPY VERSION ************
|
||||||
|
// // Get device pointer from host memory. No allocation or memcpy
|
||||||
|
// cudaError_t code;
|
||||||
|
// // local code CPU -> GPU copy memory
|
||||||
|
// code=cudaHostGetDevicePointer((void **)&d_local_codes_in, (void *) local_codes_in, 0);
|
||||||
|
// if (code!=cudaSuccess)
|
||||||
|
// {
|
||||||
|
// printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n");
|
||||||
|
// }
|
||||||
|
// // Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
|
||||||
|
// code=cudaHostGetDevicePointer((void **)&d_shifts_chips, (void *) shifts_chips, 0);
|
||||||
|
// if (code!=cudaSuccess)
|
||||||
|
// {
|
||||||
|
// printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n");
|
||||||
|
// }
|
||||||
|
|
||||||
cudaSetDevice(selected_gps_device);
|
//******** CudaMalloc version ***********
|
||||||
//********* ZERO COPY VERSION ************
|
|
||||||
// // Get device pointer from host memory. No allocation or memcpy
|
|
||||||
// cudaError_t code;
|
|
||||||
// // local code CPU -> GPU copy memory
|
|
||||||
// code=cudaHostGetDevicePointer((void **)&d_local_codes_in, (void *) local_codes_in, 0);
|
|
||||||
// if (code!=cudaSuccess)
|
|
||||||
// {
|
|
||||||
// printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n");
|
|
||||||
// }
|
|
||||||
// // Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
|
|
||||||
// code=cudaHostGetDevicePointer((void **)&d_shifts_chips, (void *) shifts_chips, 0);
|
|
||||||
// if (code!=cudaSuccess)
|
|
||||||
// {
|
|
||||||
// printf("cuda cudaHostGetDevicePointer error in set_local_code_and_taps \r\n");
|
|
||||||
// }
|
|
||||||
|
|
||||||
//******** CudaMalloc version ***********
|
|
||||||
//local code CPU -> GPU copy memory
|
//local code CPU -> GPU copy memory
|
||||||
cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1);
|
cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex) * code_length_chips, cudaMemcpyHostToDevice, stream1);
|
||||||
d_code_length_chips=code_length_chips;
|
d_code_length_chips = code_length_chips;
|
||||||
|
|
||||||
//Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
|
//Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
|
||||||
cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
|
cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float) * n_correlators,
|
||||||
cudaMemcpyHostToDevice,stream1);
|
cudaMemcpyHostToDevice, stream1);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cuda_multicorrelator::set_input_output_vectors(
|
bool cuda_multicorrelator::set_input_output_vectors(
|
||||||
std::complex<float>* corr_out,
|
std::complex<float> *corr_out,
|
||||||
std::complex<float>* sig_in
|
std::complex<float> *sig_in)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
|
cudaSetDevice(selected_gps_device);
|
||||||
|
// Save CPU pointers
|
||||||
|
d_sig_in_cpu = sig_in;
|
||||||
|
d_corr_out_cpu = corr_out;
|
||||||
|
|
||||||
cudaSetDevice(selected_gps_device);
|
// Zero Copy version
|
||||||
// Save CPU pointers
|
// Get device pointer from host memory. No allocation or memcpy
|
||||||
d_sig_in_cpu =sig_in;
|
cudaError_t code;
|
||||||
d_corr_out_cpu = corr_out;
|
code = cudaHostGetDevicePointer((void **)&d_sig_in, (void *)sig_in, 0);
|
||||||
|
code = cudaHostGetDevicePointer((void **)&d_corr_out, (void *)corr_out, 0);
|
||||||
// Zero Copy version
|
if (code != cudaSuccess)
|
||||||
// Get device pointer from host memory. No allocation or memcpy
|
{
|
||||||
cudaError_t code;
|
printf("cuda cudaHostGetDevicePointer error \r\n");
|
||||||
code=cudaHostGetDevicePointer((void **)&d_sig_in, (void *) sig_in, 0);
|
}
|
||||||
code=cudaHostGetDevicePointer((void **)&d_corr_out, (void *) corr_out, 0);
|
return true;
|
||||||
if (code!=cudaSuccess)
|
|
||||||
{
|
|
||||||
printf("cuda cudaHostGetDevicePointer error \r\n");
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
|
#define gpuErrchk(ans) \
|
||||||
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
|
{ \
|
||||||
|
gpuAssert((ans), __FILE__, __LINE__); \
|
||||||
|
}
|
||||||
|
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true)
|
||||||
{
|
{
|
||||||
if (code != cudaSuccess)
|
if (code != cudaSuccess)
|
||||||
{
|
{
|
||||||
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
|
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
|
||||||
if (abort) exit(code);
|
if (abort) exit(code);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
|
bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
|
||||||
float rem_carrier_phase_in_rad,
|
float rem_carrier_phase_in_rad,
|
||||||
float phase_step_rad,
|
float phase_step_rad,
|
||||||
float code_phase_step_chips,
|
float code_phase_step_chips,
|
||||||
float rem_code_phase_chips,
|
float rem_code_phase_chips,
|
||||||
int signal_length_samples,
|
int signal_length_samples,
|
||||||
int n_correlators)
|
int n_correlators)
|
||||||
{
|
{
|
||||||
|
cudaSetDevice(selected_gps_device);
|
||||||
cudaSetDevice(selected_gps_device);
|
// cudaMemCpy version
|
||||||
// cudaMemCpy version
|
//size_t memSize = signal_length_samples * sizeof(std::complex<float>);
|
||||||
//size_t memSize = signal_length_samples * sizeof(std::complex<float>);
|
// input signal CPU -> GPU copy memory
|
||||||
// input signal CPU -> GPU copy memory
|
|
||||||
//cudaMemcpyAsync(d_sig_in, d_sig_in_cpu, memSize,
|
//cudaMemcpyAsync(d_sig_in, d_sig_in_cpu, memSize,
|
||||||
// cudaMemcpyHostToDevice, stream2);
|
// cudaMemcpyHostToDevice, stream2);
|
||||||
//***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
|
//***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
|
||||||
|
|
||||||
//launch the multitap correlator with integrated local code resampler!
|
//launch the multitap correlator with integrated local code resampler!
|
||||||
|
|
||||||
Doppler_wippe_scalarProdGPUCPXxN_shifts_chips<<<blocksPerGrid, threadsPerBlock,0 ,stream1>>>(
|
Doppler_wippe_scalarProdGPUCPXxN_shifts_chips<<<blocksPerGrid, threadsPerBlock, 0, stream1>>>(
|
||||||
d_corr_out,
|
d_corr_out,
|
||||||
d_sig_in,
|
d_sig_in,
|
||||||
d_sig_doppler_wiped,
|
d_sig_doppler_wiped,
|
||||||
d_local_codes_in,
|
d_local_codes_in,
|
||||||
d_shifts_chips,
|
d_shifts_chips,
|
||||||
d_code_length_chips,
|
d_code_length_chips,
|
||||||
code_phase_step_chips,
|
code_phase_step_chips,
|
||||||
rem_code_phase_chips,
|
rem_code_phase_chips,
|
||||||
n_correlators,
|
n_correlators,
|
||||||
signal_length_samples,
|
signal_length_samples,
|
||||||
rem_carrier_phase_in_rad,
|
rem_carrier_phase_in_rad,
|
||||||
phase_step_rad
|
phase_step_rad);
|
||||||
);
|
|
||||||
|
|
||||||
gpuErrchk( cudaPeekAtLastError() );
|
gpuErrchk(cudaPeekAtLastError());
|
||||||
gpuErrchk( cudaStreamSynchronize(stream1));
|
gpuErrchk(cudaStreamSynchronize(stream1));
|
||||||
|
|
||||||
// cudaMemCpy version
|
// cudaMemCpy version
|
||||||
// Copy the device result vector in device memory to the host result vector
|
// Copy the device result vector in device memory to the host result vector
|
||||||
// in host memory.
|
// in host memory.
|
||||||
//scalar products (correlators outputs)
|
//scalar products (correlators outputs)
|
||||||
@ -352,37 +356,38 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cuda_multicorrelator::cuda_multicorrelator()
|
cuda_multicorrelator::cuda_multicorrelator()
|
||||||
{
|
{
|
||||||
d_sig_in=NULL;
|
d_sig_in = NULL;
|
||||||
d_nco_in=NULL;
|
d_nco_in = NULL;
|
||||||
d_sig_doppler_wiped=NULL;
|
d_sig_doppler_wiped = NULL;
|
||||||
d_local_codes_in=NULL;
|
d_local_codes_in = NULL;
|
||||||
d_shifts_samples=NULL;
|
d_shifts_samples = NULL;
|
||||||
d_shifts_chips=NULL;
|
d_shifts_chips = NULL;
|
||||||
d_corr_out=NULL;
|
d_corr_out = NULL;
|
||||||
threadsPerBlock=0;
|
threadsPerBlock = 0;
|
||||||
blocksPerGrid=0;
|
blocksPerGrid = 0;
|
||||||
d_code_length_chips=0;
|
d_code_length_chips = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cuda_multicorrelator::free_cuda()
|
bool cuda_multicorrelator::free_cuda()
|
||||||
{
|
{
|
||||||
// Free device global memory
|
// Free device global memory
|
||||||
if (d_sig_in!=NULL) cudaFree(d_sig_in);
|
if (d_sig_in != NULL) cudaFree(d_sig_in);
|
||||||
if (d_nco_in!=NULL) cudaFree(d_nco_in);
|
if (d_nco_in != NULL) cudaFree(d_nco_in);
|
||||||
if (d_sig_doppler_wiped!=NULL) cudaFree(d_sig_doppler_wiped);
|
if (d_sig_doppler_wiped != NULL) cudaFree(d_sig_doppler_wiped);
|
||||||
if (d_local_codes_in!=NULL) cudaFree(d_local_codes_in);
|
if (d_local_codes_in != NULL) cudaFree(d_local_codes_in);
|
||||||
if (d_corr_out!=NULL) cudaFree(d_corr_out);
|
if (d_corr_out != NULL) cudaFree(d_corr_out);
|
||||||
if (d_shifts_samples!=NULL) cudaFree(d_shifts_samples);
|
if (d_shifts_samples != NULL) cudaFree(d_shifts_samples);
|
||||||
if (d_shifts_chips!=NULL) cudaFree(d_shifts_chips);
|
if (d_shifts_chips != NULL) cudaFree(d_shifts_chips);
|
||||||
// Reset the device and exit
|
// Reset the device and exit
|
||||||
// cudaDeviceReset causes the driver to clean up all state. While
|
// cudaDeviceReset causes the driver to clean up all state. While
|
||||||
// not mandatory in normal operation, it is good practice. It is also
|
// not mandatory in normal operation, it is good practice. It is also
|
||||||
// needed to ensure correct operation when the application is being
|
// needed to ensure correct operation when the application is being
|
||||||
// profiled. Calling cudaDeviceReset causes all profile data to be
|
// profiled. Calling cudaDeviceReset causes all profile data to be
|
||||||
// flushed before the application exits
|
// flushed before the application exits
|
||||||
cudaDeviceReset();
|
cudaDeviceReset();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
/*!
|
/*!
|
||||||
* \file cuda_multicorrelator.h
|
* \file cuda_multicorrelator.h
|
||||||
* \brief High optimized CUDA GPU vector multiTAP correlator class
|
* \brief Highly optimized CUDA GPU vector multiTAP correlator class
|
||||||
* \authors <ul>
|
* \authors <ul>
|
||||||
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
* <li> Javier Arribas, 2015. jarribas(at)cttc.es
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Class that implements a high optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
|
* Class that implements a highly optimized vector multiTAP correlator class for NVIDIA CUDA GPUs
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
@ -92,6 +92,7 @@ struct GPU_Complex
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct GPU_Complex_Short
|
struct GPU_Complex_Short
|
||||||
{
|
{
|
||||||
float r;
|
float r;
|
||||||
@ -149,7 +150,6 @@ private:
|
|||||||
GPU_Complex* d_local_codes_in;
|
GPU_Complex* d_local_codes_in;
|
||||||
GPU_Complex* d_corr_out;
|
GPU_Complex* d_corr_out;
|
||||||
|
|
||||||
//
|
|
||||||
std::complex<float>* d_sig_in_cpu;
|
std::complex<float>* d_sig_in_cpu;
|
||||||
std::complex<float>* d_corr_out_cpu;
|
std::complex<float>* d_corr_out_cpu;
|
||||||
|
|
||||||
|
@ -32,7 +32,26 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "tracking_FLL_PLL_filter.h"
|
#include "tracking_FLL_PLL_filter.h"
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
|
Tracking_FLL_PLL_filter::Tracking_FLL_PLL_filter()
|
||||||
|
{
|
||||||
|
d_order = 0;
|
||||||
|
d_pll_w = 0.0;
|
||||||
|
d_pll_w0p3 = 0.0;
|
||||||
|
d_pll_w0f2 = 0.0;
|
||||||
|
d_pll_x = 0.0;
|
||||||
|
d_pll_a2 = 0.0;
|
||||||
|
d_pll_w0f = 0.0;
|
||||||
|
d_pll_a3 = 0.0;
|
||||||
|
d_pll_w0p2 = 0.0;
|
||||||
|
d_pll_b3 = 0.0;
|
||||||
|
d_pll_w0p = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Tracking_FLL_PLL_filter::~Tracking_FLL_PLL_filter() = default;
|
||||||
|
|
||||||
|
|
||||||
void Tracking_FLL_PLL_filter::set_params(float fll_bw_hz, float pll_bw_hz, int order)
|
void Tracking_FLL_PLL_filter::set_params(float fll_bw_hz, float pll_bw_hz, int order)
|
||||||
{
|
{
|
||||||
@ -89,7 +108,7 @@ float Tracking_FLL_PLL_filter::get_carrier_error(float FLL_discriminator, float
|
|||||||
if (d_order == 3)
|
if (d_order == 3)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* 3rd order PLL with 2nd order FLL assist
|
* 3rd order PLL with 2nd order FLL assist
|
||||||
*/
|
*/
|
||||||
d_pll_w = d_pll_w + correlation_time_s * (d_pll_w0p3 * PLL_discriminator + d_pll_w0f2 * FLL_discriminator);
|
d_pll_w = d_pll_w + correlation_time_s * (d_pll_w0p3 * PLL_discriminator + d_pll_w0f2 * FLL_discriminator);
|
||||||
d_pll_x = d_pll_x + correlation_time_s * (0.5 * d_pll_w + d_pll_a2 * d_pll_w0f * FLL_discriminator + d_pll_a3 * d_pll_w0p2 * PLL_discriminator);
|
d_pll_x = d_pll_x + correlation_time_s * (0.5 * d_pll_w + d_pll_a2 * d_pll_w0f * FLL_discriminator + d_pll_a3 * d_pll_w0p2 * PLL_discriminator);
|
||||||
@ -104,31 +123,11 @@ float Tracking_FLL_PLL_filter::get_carrier_error(float FLL_discriminator, float
|
|||||||
pll_w_new = d_pll_w + PLL_discriminator * d_pll_w0p2 * correlation_time_s + FLL_discriminator * d_pll_w0f * correlation_time_s;
|
pll_w_new = d_pll_w + PLL_discriminator * d_pll_w0p2 * correlation_time_s + FLL_discriminator * d_pll_w0f * correlation_time_s;
|
||||||
carrier_error_hz = 0.5 * (pll_w_new + d_pll_w) + d_pll_a2 * d_pll_w0p * PLL_discriminator;
|
carrier_error_hz = 0.5 * (pll_w_new + d_pll_w) + d_pll_a2 * d_pll_w0p * PLL_discriminator;
|
||||||
d_pll_w = pll_w_new;
|
d_pll_w = pll_w_new;
|
||||||
/*std::cout<<" d_pll_w = "<<carrier_error_hz<<
|
/* std::cout << " d_pll_w = " << carrier_error_hz << ", pll_w_new = " << pll_w_new
|
||||||
", pll_w_new = "<<pll_w_new
|
<< ", PLL_discriminator=" << PLL_discriminator
|
||||||
<<", PLL_discriminator=" <<PLL_discriminator
|
<< " FLL_discriminator =" << FLL_discriminator
|
||||||
<<" FLL_discriminator ="<<FLL_discriminator
|
<< " correlation_time_s = " << correlation_time_s << "\r\n"; */
|
||||||
<<" correlation_time_s = "<<correlation_time_s<<"\r\n";*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return carrier_error_hz;
|
return carrier_error_hz;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Tracking_FLL_PLL_filter::Tracking_FLL_PLL_filter()
|
|
||||||
{
|
|
||||||
d_order = 0;
|
|
||||||
d_pll_w = 0;
|
|
||||||
d_pll_w0p3 = 0;
|
|
||||||
d_pll_w0f2 = 0;
|
|
||||||
d_pll_x = 0;
|
|
||||||
d_pll_a2 = 0;
|
|
||||||
d_pll_w0f = 0;
|
|
||||||
d_pll_a3 = 0;
|
|
||||||
d_pll_w0p2 = 0;
|
|
||||||
d_pll_b3 = 0;
|
|
||||||
d_pll_w0p = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Tracking_FLL_PLL_filter::~Tracking_FLL_PLL_filter() = default;
|
|
||||||
|
@ -36,6 +36,7 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
// All the outputs are in RADIANS
|
// All the outputs are in RADIANS
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* FLL four quadrant arctan discriminator:
|
* FLL four quadrant arctan discriminator:
|
||||||
* \f{equation}
|
* \f{equation}
|
||||||
@ -45,7 +46,6 @@
|
|||||||
* \f$I_{PS1},Q_{PS1}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_1\f$, and
|
* \f$I_{PS1},Q_{PS1}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_1\f$, and
|
||||||
* \f$I_{PS2},Q_{PS2}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_2\f$. The output is in [radians/second].
|
* \f$I_{PS2},Q_{PS2}\f$ are the inphase and quadrature prompt correlator outputs respectively at sample time \f$t_2\f$. The output is in [radians/second].
|
||||||
*/
|
*/
|
||||||
|
|
||||||
double fll_four_quadrant_atan(gr_complex prompt_s1, gr_complex prompt_s2, double t1, double t2)
|
double fll_four_quadrant_atan(gr_complex prompt_s1, gr_complex prompt_s2, double t1, double t2)
|
||||||
{
|
{
|
||||||
double cross, dot;
|
double cross, dot;
|
||||||
@ -105,6 +105,7 @@ double dll_nc_e_minus_l_normalized(gr_complex early_s1, gr_complex late_s1)
|
|||||||
return 0.5 * (P_early - P_late) / (P_early + P_late);
|
return 0.5 * (P_early - P_late) / (P_early + P_late);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DLL Noncoherent Very Early Minus Late Power (VEMLP) normalized discriminator, using the outputs
|
* DLL Noncoherent Very Early Minus Late Power (VEMLP) normalized discriminator, using the outputs
|
||||||
* of four correlators, Very Early (VE), Early (E), Late (L) and Very Late (VL):
|
* of four correlators, Very Early (VE), Early (E), Late (L) and Very Late (VL):
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
* \author Cillian O'Driscoll, 2015. cillian.odriscoll(at)gmail.com
|
* \author Cillian O'Driscoll, 2015. cillian.odriscoll(at)gmail.com
|
||||||
*
|
*
|
||||||
* Class implementing a generic 1st, 2nd or 3rd order loop filter. Based
|
* Class implementing a generic 1st, 2nd or 3rd order loop filter. Based
|
||||||
* on the bilinear transform of the standard Weiner filter.
|
* on the bilinear transform of the standard Wiener filter.
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
@ -36,6 +36,8 @@
|
|||||||
#include <glog/logging.h>
|
#include <glog/logging.h>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
|
const int MAX_LOOP_ORDER = 3;
|
||||||
|
const int MAX_LOOP_HISTORY_LENGTH = 4;
|
||||||
|
|
||||||
Tracking_loop_filter::Tracking_loop_filter(float update_interval,
|
Tracking_loop_filter::Tracking_loop_filter(float update_interval,
|
||||||
float noise_bandwidth,
|
float noise_bandwidth,
|
||||||
@ -74,7 +76,7 @@ float Tracking_loop_filter::apply(float current_input)
|
|||||||
// Now apply the filter coefficients:
|
// Now apply the filter coefficients:
|
||||||
float result = 0.0;
|
float result = 0.0;
|
||||||
|
|
||||||
// Hanlde the old outputs first:
|
// Handle the old outputs first:
|
||||||
for (unsigned int ii = 0; ii < d_output_coefficients.size(); ++ii)
|
for (unsigned int ii = 0; ii < d_output_coefficients.size(); ++ii)
|
||||||
{
|
{
|
||||||
result += d_output_coefficients[ii] * d_outputs[(d_current_index + ii) % MAX_LOOP_HISTORY_LENGTH];
|
result += d_output_coefficients[ii] * d_outputs[(d_current_index + ii) % MAX_LOOP_HISTORY_LENGTH];
|
||||||
@ -95,16 +97,13 @@ float Tracking_loop_filter::apply(float current_input)
|
|||||||
|
|
||||||
d_inputs[d_current_index] = current_input;
|
d_inputs[d_current_index] = current_input;
|
||||||
|
|
||||||
|
|
||||||
for (unsigned int ii = 0; ii < d_input_coefficients.size(); ++ii)
|
for (unsigned int ii = 0; ii < d_input_coefficients.size(); ++ii)
|
||||||
{
|
{
|
||||||
result += d_input_coefficients[ii] * d_inputs[(d_current_index + ii) % MAX_LOOP_HISTORY_LENGTH];
|
result += d_input_coefficients[ii] * d_inputs[(d_current_index + ii) % MAX_LOOP_HISTORY_LENGTH];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
d_outputs[d_current_index] = result;
|
d_outputs[d_current_index] = result;
|
||||||
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,7 +178,6 @@ void Tracking_loop_filter::update_coefficients(void)
|
|||||||
d_output_coefficients[0] = 1.0;
|
d_output_coefficients[0] = 1.0;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
wn = d_noise_bandwidth / 0.7845; // From Kaplan
|
wn = d_noise_bandwidth / 0.7845; // From Kaplan
|
||||||
float a3 = 1.1;
|
float a3 = 1.1;
|
||||||
@ -208,7 +206,6 @@ void Tracking_loop_filter::update_coefficients(void)
|
|||||||
d_input_coefficients[1] = g1 * T * T / 2.0 - 2.0 * g3;
|
d_input_coefficients[1] = g1 * T * T / 2.0 - 2.0 * g3;
|
||||||
d_input_coefficients[2] = g3 + T / 2.0 * (-g2 + T / 2.0 * g1);
|
d_input_coefficients[2] = g3 + T / 2.0 * (-g2 + T / 2.0 * g1);
|
||||||
|
|
||||||
|
|
||||||
d_output_coefficients.resize(2);
|
d_output_coefficients.resize(2);
|
||||||
d_output_coefficients[0] = 2.0;
|
d_output_coefficients[0] = 2.0;
|
||||||
d_output_coefficients[1] = -1.0;
|
d_output_coefficients[1] = -1.0;
|
||||||
@ -260,10 +257,9 @@ void Tracking_loop_filter::set_order(int loop_order)
|
|||||||
{
|
{
|
||||||
if (loop_order < 1 or loop_order > MAX_LOOP_ORDER)
|
if (loop_order < 1 or loop_order > MAX_LOOP_ORDER)
|
||||||
{
|
{
|
||||||
LOG(ERROR) << "Ignoring attempt to set loop order to " << loop_order
|
LOG(WARNING) << "Ignoring attempt to set loop order to " << loop_order
|
||||||
<< ". Maximum allowed order is: " << MAX_LOOP_ORDER
|
<< ". Maximum allowed order is: " << MAX_LOOP_ORDER
|
||||||
<< ". Not changing current value of " << d_loop_order;
|
<< ". Not changing current value of " << d_loop_order;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
* \author Cillian O'Driscoll, 2015. cillian.odriscoll(at)gmail.com
|
* \author Cillian O'Driscoll, 2015. cillian.odriscoll(at)gmail.com
|
||||||
*
|
*
|
||||||
* Class implementing a generic 1st, 2nd or 3rd order loop filter. Based
|
* Class implementing a generic 1st, 2nd or 3rd order loop filter. Based
|
||||||
* on the bilinear transform of the standard Weiner filter.
|
* on the bilinear transform of the standard Wiener filter.
|
||||||
*
|
*
|
||||||
* -------------------------------------------------------------------------
|
* -------------------------------------------------------------------------
|
||||||
*
|
*
|
||||||
@ -33,8 +33,6 @@
|
|||||||
|
|
||||||
#ifndef GNSS_SDR_TRACKING_LOOP_FILTER_H_
|
#ifndef GNSS_SDR_TRACKING_LOOP_FILTER_H_
|
||||||
#define GNSS_SDR_TRACKING_LOOP_FILTER_H_
|
#define GNSS_SDR_TRACKING_LOOP_FILTER_H_
|
||||||
#define MAX_LOOP_ORDER 3
|
|
||||||
#define MAX_LOOP_HISTORY_LENGTH 4
|
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -74,7 +72,6 @@ private:
|
|||||||
// Compute the filter coefficients:
|
// Compute the filter coefficients:
|
||||||
void update_coefficients(void);
|
void update_coefficients(void);
|
||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
float get_noise_bandwidth(void) const;
|
float get_noise_bandwidth(void) const;
|
||||||
float get_update_interval(void) const;
|
float get_update_interval(void) const;
|
||||||
|
@ -830,7 +830,7 @@ int32_t Beidou_Dnav_Navigation_Message::d2_subframe_decoder(std::string const& s
|
|||||||
d_eccentricity_msb = static_cast<double>(read_navigation_unsigned(subframe_bits, D2_E_MSB));
|
d_eccentricity_msb = static_cast<double>(read_navigation_unsigned(subframe_bits, D2_E_MSB));
|
||||||
d_eccentricity_msb_bits = (read_navigation_unsigned(subframe_bits, D2_E_MSB));
|
d_eccentricity_msb_bits = (read_navigation_unsigned(subframe_bits, D2_E_MSB));
|
||||||
// Adjust for lsb in next page (shift number of lsb to the left)
|
// Adjust for lsb in next page (shift number of lsb to the left)
|
||||||
d_eccentricity_msb = static_cast<uint64_t>((static_cast<int>(d_eccentricity_msb) << 22));
|
d_eccentricity_msb = static_cast<uint64_t>((static_cast<uint64_t>(d_eccentricity_msb) << 22));
|
||||||
d_eccentricity_msb_bits = d_eccentricity_msb_bits << 22;
|
d_eccentricity_msb_bits = d_eccentricity_msb_bits << 22;
|
||||||
|
|
||||||
// Set system flags for message reception
|
// Set system flags for message reception
|
||||||
|
Loading…
Reference in New Issue
Block a user