mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2025-01-16 20:23:02 +00:00
Adding CPU and GPU multicorrelators unit tests
This commit is contained in:
parent
919bbe4e7a
commit
c8d7a607f8
@ -198,7 +198,7 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
|
||||
//********* ZERO COPY VERSION ************
|
||||
// Set flag to enable zero copy access
|
||||
// Optimal in shared memory devices (like Jetson K1)
|
||||
cudaSetDeviceFlags(cudaDeviceMapHost);
|
||||
//cudaSetDeviceFlags(cudaDeviceMapHost);
|
||||
|
||||
//******** CudaMalloc version ***********
|
||||
|
||||
|
@ -33,6 +33,7 @@ if(GTEST_INCLUDE_DIRS)
|
||||
set(GTEST_DIR_LOCAL true)
|
||||
endif(GTEST_INCLUDE_DIRS)
|
||||
|
||||
|
||||
if(NOT ${GTEST_DIR_LOCAL})
|
||||
# if GTEST_DIR is not defined, we download and build it
|
||||
set(gtest_RELEASE 1.7.0)
|
||||
@ -86,6 +87,11 @@ endif(NOT ${GTEST_DIR_LOCAL})
|
||||
set(GNSS_SDR_TEST_OPTIONAL_LIBS "")
|
||||
set(GNSS_SDR_TEST_OPTIONAL_HEADERS "")
|
||||
|
||||
if(ENABLE_CUDA)
|
||||
set(GNSS_SDR_TEST_OPTIONAL_HEADERS ${GNSS_SDR_TEST_OPTIONAL_HEADERS} ${CUDA_INCLUDE_DIRS})
|
||||
set(GNSS_SDR_TEST_OPTIONAL_LIBS ${GNSS_SDR_TEST_OPTIONAL_LIBS} ${CUDA_LIBRARIES})
|
||||
endif(ENABLE_CUDA)
|
||||
|
||||
if(ENABLE_GPERFTOOLS)
|
||||
if(GPERFTOOLS_FOUND)
|
||||
set(GNSS_SDR_TEST_OPTIONAL_LIBS "${GNSS_SDR_TEST_OPTIONAL_LIBS};${GPERFTOOLS_LIBRARIES}")
|
||||
@ -152,6 +158,10 @@ if(OPENCL_FOUND)
|
||||
add_definitions(-DOPENCL_BLOCKS_TEST=1)
|
||||
endif(OPENCL_FOUND)
|
||||
|
||||
if (ENABLE_CUDA)
|
||||
add_definitions(-DCUDA_BLOCKS_TEST=1)
|
||||
endif(ENABLE_CUDA)
|
||||
|
||||
add_definitions(-DTEST_PATH="${CMAKE_SOURCE_DIR}/src/tests/")
|
||||
|
||||
|
||||
|
167
src/tests/arithmetic/cpu_multicorrelator_test.cc
Normal file
167
src/tests/arithmetic/cpu_multicorrelator_test.cc
Normal file
@ -0,0 +1,167 @@
|
||||
/*!
|
||||
* \file fft_length_test.cc
|
||||
* \brief This file implements timing tests for the FFT.
|
||||
* \author Carles Fernandez-Prades, 2016. cfernandez(at)cttc.es
|
||||
*
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2016 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include <ctime>
|
||||
#include <complex>
|
||||
#include <thread>
|
||||
#include <volk/volk.h>
|
||||
#include "cpu_multicorrelator.h"
|
||||
#include "gps_sdr_signal_processing.h"
|
||||
#include "GPS_L1_CA.h"
|
||||
|
||||
|
||||
DEFINE_int32(cpu_multicorrelator_iterations_test, 1000, "Number of averaged iterations in CPU multicorrelator test timing test");
|
||||
DEFINE_int32(cpu_multicorrelator_max_threads_test, 12, "Number of maximum concurrent correlators in CPU multicorrelator test timing test");
|
||||
|
||||
void run_correlator_cpu(cpu_multicorrelator* correlator,
|
||||
float d_rem_carrier_phase_rad,
|
||||
float d_carrier_phase_step_rad,
|
||||
float d_code_phase_step_chips,
|
||||
float d_rem_code_phase_chips,
|
||||
int correlation_size,
|
||||
int d_n_correlator_taps)
|
||||
{
|
||||
correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad,
|
||||
d_carrier_phase_step_rad,
|
||||
d_code_phase_step_chips,
|
||||
d_rem_code_phase_chips,
|
||||
correlation_size);
|
||||
}
|
||||
|
||||
TEST(CPU_multicorrelator_test, MeasureExecutionTime)
|
||||
{
|
||||
struct timeval tv;
|
||||
int max_threads=FLAGS_cpu_multicorrelator_max_threads_test;
|
||||
std::vector<std::thread> thread_pool;
|
||||
cpu_multicorrelator* correlator_pool[max_threads];
|
||||
unsigned int correlation_sizes [3] = { 2048, 4096, 8192};
|
||||
double execution_times [3];
|
||||
|
||||
gr_complex* d_ca_code;
|
||||
gr_complex* in_cpu;
|
||||
gr_complex* d_correlator_outs;
|
||||
|
||||
int d_n_correlator_taps=3;
|
||||
int d_vector_length=correlation_sizes[2]; //max correlation size to allocate all the necessary memory
|
||||
float* d_local_code_shift_chips;
|
||||
|
||||
//allocate host memory
|
||||
// Get space for a vector with the C/A code replica sampled 1x/chip
|
||||
d_ca_code = static_cast<gr_complex*>(volk_malloc(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS) * sizeof(gr_complex), volk_get_alignment()));
|
||||
in_cpu = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment()));
|
||||
|
||||
// correlator outputs (scalar)
|
||||
d_n_correlator_taps = 3; // Early, Prompt, and Late
|
||||
d_correlator_outs = static_cast<gr_complex*>(volk_malloc(d_n_correlator_taps*sizeof(gr_complex), volk_get_alignment()));
|
||||
for (int n = 0; n < d_n_correlator_taps; n++)
|
||||
{
|
||||
d_correlator_outs[n] = gr_complex(0,0);
|
||||
}
|
||||
d_local_code_shift_chips = static_cast<float*>(volk_malloc(d_n_correlator_taps*sizeof(float), volk_get_alignment()));
|
||||
// Set TAPs delay values [chips]
|
||||
float d_early_late_spc_chips=0.5;
|
||||
d_local_code_shift_chips[0] = - d_early_late_spc_chips;
|
||||
d_local_code_shift_chips[1] = 0.0;
|
||||
d_local_code_shift_chips[2] = d_early_late_spc_chips;
|
||||
|
||||
//--- Perform initializations ------------------------------
|
||||
|
||||
//local code resampler on GPU
|
||||
// generate local reference (1 sample per chip)
|
||||
gps_l1_ca_code_gen_complex(d_ca_code, 1, 0);
|
||||
// generate inut signal
|
||||
for (int n=0;n<2*d_vector_length;n++)
|
||||
{
|
||||
in_cpu[n]=std::complex<float>(static_cast <float> (rand())/static_cast<float>(RAND_MAX),static_cast <float> (rand())/static_cast<float>(RAND_MAX));
|
||||
}
|
||||
|
||||
for (int n=0;n<max_threads;n++)
|
||||
{
|
||||
correlator_pool[n] = new cpu_multicorrelator();
|
||||
correlator_pool[n]->init(d_vector_length, d_n_correlator_taps);
|
||||
correlator_pool[n]->set_input_output_vectors(d_correlator_outs, in_cpu);
|
||||
correlator_pool[n]->set_local_code_and_taps(static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS), d_ca_code, d_local_code_shift_chips);
|
||||
}
|
||||
|
||||
float d_rem_carrier_phase_rad=0.0;
|
||||
float d_carrier_phase_step_rad=0.1;
|
||||
float d_code_phase_step_chips=0.3;
|
||||
float d_rem_code_phase_chips=0.4;
|
||||
|
||||
EXPECT_NO_THROW(
|
||||
for(int correlation_sizes_idx = 0; correlation_sizes_idx < 3; correlation_sizes_idx++)
|
||||
{
|
||||
for(int current_max_threads=1; current_max_threads<(max_threads+1); current_max_threads++)
|
||||
{
|
||||
std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl;
|
||||
gettimeofday(&tv, NULL);
|
||||
long long int begin = tv.tv_sec * 1000000 + tv.tv_usec;
|
||||
for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++)
|
||||
{
|
||||
//create the concurrent correlator threads
|
||||
for (int current_thread=0;current_thread<current_max_threads;current_thread++)
|
||||
{
|
||||
thread_pool.push_back(std::thread(run_correlator_cpu,
|
||||
correlator_pool[current_thread],
|
||||
d_rem_carrier_phase_rad,
|
||||
d_carrier_phase_step_rad,
|
||||
d_code_phase_step_chips,
|
||||
d_rem_code_phase_chips,
|
||||
correlation_sizes[correlation_sizes_idx],
|
||||
d_n_correlator_taps));
|
||||
}
|
||||
//wait the threads to finish they work and destroy the thread objects
|
||||
for(auto &t : thread_pool){
|
||||
t.join();
|
||||
}
|
||||
thread_pool.clear();
|
||||
}
|
||||
gettimeofday(&tv, NULL);
|
||||
long long int end = tv.tv_sec * 1000000 + tv.tv_usec;
|
||||
execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_cpu_multicorrelator_iterations_test));
|
||||
std::cout << "CPU Multicorrelator execution time for length=" << correlation_sizes[correlation_sizes_idx] << " : " << execution_times[correlation_sizes_idx] << " [s]" << std::endl;
|
||||
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
volk_free(d_local_code_shift_chips);
|
||||
volk_free(d_correlator_outs);
|
||||
volk_free(d_ca_code);
|
||||
volk_free(in_cpu);
|
||||
|
||||
for (int n=0;n<max_threads;n++)
|
||||
{
|
||||
correlator_pool[n]->free();
|
||||
delete(correlator_pool[n]);
|
||||
}
|
||||
}
|
166
src/tests/arithmetic/gpu_multicorrelator_test.cc
Normal file
166
src/tests/arithmetic/gpu_multicorrelator_test.cc
Normal file
@ -0,0 +1,166 @@
|
||||
/*!
|
||||
* \file fft_length_test.cc
|
||||
* \brief This file implements timing tests for the FFT.
|
||||
* \author Carles Fernandez-Prades, 2016. cfernandez(at)cttc.es
|
||||
*
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2016 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include <ctime>
|
||||
#include <complex>
|
||||
#include <thread>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include "cuda_multicorrelator.h"
|
||||
#include "gps_sdr_signal_processing.h"
|
||||
#include "GPS_L1_CA.h"
|
||||
|
||||
|
||||
DEFINE_int32(gpu_multicorrelator_iterations_test, 1000, "Number of averaged iterations in GPU multicorrelator test timing test");
|
||||
DEFINE_int32(gpu_multicorrelator_max_threads_test, 12, "Number of maximum concurrent correlators in GPU multicorrelator test timing test");
|
||||
|
||||
void run_correlator_gpu(cuda_multicorrelator* correlator,
|
||||
float d_rem_carrier_phase_rad,
|
||||
float d_carrier_phase_step_rad,
|
||||
float d_code_phase_step_chips,
|
||||
float d_rem_code_phase_chips,
|
||||
int correlation_size,
|
||||
int d_n_correlator_taps)
|
||||
{
|
||||
correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad,
|
||||
d_carrier_phase_step_rad,
|
||||
d_code_phase_step_chips,
|
||||
d_rem_code_phase_chips,
|
||||
correlation_size,
|
||||
d_n_correlator_taps);
|
||||
}
|
||||
|
||||
TEST(GPU_multicorrelator_test, MeasureExecutionTime)
|
||||
{
|
||||
struct timeval tv;
|
||||
int max_threads=FLAGS_gpu_multicorrelator_max_threads_test;
|
||||
std::vector<std::thread> thread_pool;
|
||||
cuda_multicorrelator* correlator_pool[max_threads];
|
||||
unsigned int correlation_sizes [3] = { 2048, 4096, 8192};
|
||||
double execution_times [3];
|
||||
|
||||
gr_complex* d_ca_code;
|
||||
gr_complex* in_gpu;
|
||||
gr_complex* d_correlator_outs;
|
||||
|
||||
int d_n_correlator_taps=3;
|
||||
int d_vector_length=correlation_sizes[2]; //max correlation size to allocate all the necessary memory
|
||||
float* d_local_code_shift_chips;
|
||||
// Set GPU flags
|
||||
cudaSetDeviceFlags(cudaDeviceMapHost);
|
||||
//allocate host memory
|
||||
//pinned memory mode - use special function to get OS-pinned memory
|
||||
d_n_correlator_taps = 3; // Early, Prompt, and Late
|
||||
// Get space for a vector with the C/A code replica sampled 1x/chip
|
||||
cudaHostAlloc((void**)&d_ca_code, (static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS)* sizeof(gr_complex)), cudaHostAllocMapped | cudaHostAllocWriteCombined);
|
||||
// Get space for the resampled early / prompt / late local replicas
|
||||
cudaHostAlloc((void**)&d_local_code_shift_chips, d_n_correlator_taps * sizeof(float), cudaHostAllocMapped | cudaHostAllocWriteCombined);
|
||||
cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped | cudaHostAllocWriteCombined);
|
||||
// correlator outputs (scalar)
|
||||
cudaHostAlloc((void**)&d_correlator_outs ,sizeof(gr_complex)*d_n_correlator_taps, cudaHostAllocMapped | cudaHostAllocWriteCombined );
|
||||
|
||||
//--- Perform initializations ------------------------------
|
||||
//local code resampler on GPU
|
||||
// generate local reference (1 sample per chip)
|
||||
gps_l1_ca_code_gen_complex(d_ca_code, 1, 0);
|
||||
// generate inut signal
|
||||
for (int n=0;n<2*d_vector_length;n++)
|
||||
{
|
||||
in_gpu[n]=std::complex<float>(static_cast <float> (rand())/static_cast<float>(RAND_MAX),static_cast <float> (rand())/static_cast<float>(RAND_MAX));
|
||||
}
|
||||
// Set TAPs delay values [chips]
|
||||
float d_early_late_spc_chips=0.5;
|
||||
d_local_code_shift_chips[0] = - d_early_late_spc_chips;
|
||||
d_local_code_shift_chips[1] = 0.0;
|
||||
d_local_code_shift_chips[2] = d_early_late_spc_chips;
|
||||
for (int n=0;n<max_threads;n++)
|
||||
{
|
||||
correlator_pool[n] = new cuda_multicorrelator();
|
||||
correlator_pool[n]->init_cuda_integrated_resampler(d_vector_length, GPS_L1_CA_CODE_LENGTH_CHIPS, d_n_correlator_taps);
|
||||
correlator_pool[n]->set_input_output_vectors(d_correlator_outs, in_gpu);
|
||||
}
|
||||
|
||||
float d_rem_carrier_phase_rad=0.0;
|
||||
float d_carrier_phase_step_rad=0.1;
|
||||
float d_code_phase_step_chips=0.3;
|
||||
float d_rem_code_phase_chips=0.4;
|
||||
|
||||
EXPECT_NO_THROW(
|
||||
for(int correlation_sizes_idx = 0; correlation_sizes_idx < 3; correlation_sizes_idx++)
|
||||
{
|
||||
for(int current_max_threads=1; current_max_threads<(max_threads+1); current_max_threads++)
|
||||
{
|
||||
std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl;
|
||||
gettimeofday(&tv, NULL);
|
||||
long long int begin = tv.tv_sec * 1000000 + tv.tv_usec;
|
||||
for(int k = 0; k < FLAGS_gpu_multicorrelator_iterations_test; k++)
|
||||
{
|
||||
//create the concurrent correlator threads
|
||||
for (int current_thread=0;current_thread<current_max_threads;current_thread++)
|
||||
{
|
||||
thread_pool.push_back(std::thread(run_correlator_gpu,
|
||||
correlator_pool[current_thread],
|
||||
d_rem_carrier_phase_rad,
|
||||
d_carrier_phase_step_rad,
|
||||
d_code_phase_step_chips,
|
||||
d_rem_code_phase_chips,
|
||||
correlation_sizes[correlation_sizes_idx],
|
||||
d_n_correlator_taps));
|
||||
}
|
||||
//wait the threads to finish they work and destroy the thread objects
|
||||
for(auto &t : thread_pool){
|
||||
t.join();
|
||||
}
|
||||
thread_pool.clear();
|
||||
}
|
||||
gettimeofday(&tv, NULL);
|
||||
long long int end = tv.tv_sec * 1000000 + tv.tv_usec;
|
||||
execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_gpu_multicorrelator_iterations_test));
|
||||
std::cout << "GPU Multicorrelator execution time for length=" << correlation_sizes[correlation_sizes_idx] << " : " << execution_times[correlation_sizes_idx] << " [s]" << std::endl;
|
||||
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
cudaFreeHost(in_gpu);
|
||||
cudaFreeHost(d_correlator_outs);
|
||||
cudaFreeHost(d_local_code_shift_chips);
|
||||
cudaFreeHost(d_ca_code);
|
||||
|
||||
for (int n=0;n<max_threads;n++)
|
||||
{
|
||||
correlator_pool[n]->free_cuda();
|
||||
delete(correlator_pool[n]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
@ -92,9 +92,15 @@ DECLARE_string(log_dir);
|
||||
#include "gnss_block/gps_l2_m_pcps_acquisition_test.cc"
|
||||
#include "gnss_block/gps_l1_ca_pcps_acquisition_gsoc2013_test.cc"
|
||||
//#include "gnss_block/gps_l1_ca_pcps_multithread_acquisition_gsoc2013_test.cc"
|
||||
#include "arithmetic/cpu_multicorrelator_test.cc"
|
||||
#if OPENCL_BLOCKS_TEST
|
||||
#include "gnss_block/gps_l1_ca_pcps_opencl_acquisition_gsoc2013_test.cc"
|
||||
#endif
|
||||
|
||||
#if CUDA_BLOCKS_TEST
|
||||
#include "arithmetic/gpu_multicorrelator_test.cc"
|
||||
#endif
|
||||
|
||||
#include "gnss_block/gps_l1_ca_pcps_quicksync_acquisition_gsoc2014_test.cc"
|
||||
#include "gnss_block/gps_l1_ca_pcps_tong_acquisition_gsoc2013_test.cc"
|
||||
#include "gnss_block/galileo_e1_pcps_ambiguous_acquisition_test.cc"
|
||||
|
Loading…
Reference in New Issue
Block a user