1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-12-15 20:50:33 +00:00

GPU / CPU unit test performance improvements

This commit is contained in:
Javier Arribas 2016-06-17 16:29:07 +02:00
parent 5ff34ec91e
commit 2c327b0de4
2 changed files with 50 additions and 51 deletions

View File

@ -46,8 +46,9 @@ void run_correlator_cpu(cpu_multicorrelator* correlator,
float d_carrier_phase_step_rad, float d_carrier_phase_step_rad,
float d_code_phase_step_chips, float d_code_phase_step_chips,
float d_rem_code_phase_chips, float d_rem_code_phase_chips,
int correlation_size, int correlation_size)
int d_n_correlator_taps) {
for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++)
{ {
correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad, correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad,
d_carrier_phase_step_rad, d_carrier_phase_step_rad,
@ -55,6 +56,7 @@ void run_correlator_cpu(cpu_multicorrelator* correlator,
d_rem_code_phase_chips, d_rem_code_phase_chips,
correlation_size); correlation_size);
} }
}
TEST(CPU_multicorrelator_test, MeasureExecutionTime) TEST(CPU_multicorrelator_test, MeasureExecutionTime)
{ {
@ -124,8 +126,6 @@ TEST(CPU_multicorrelator_test, MeasureExecutionTime)
std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl; std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl;
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
long long int begin = tv.tv_sec * 1000000 + tv.tv_usec; long long int begin = tv.tv_sec * 1000000 + tv.tv_usec;
for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++)
{
//create the concurrent correlator threads //create the concurrent correlator threads
for (int current_thread=0;current_thread<current_max_threads;current_thread++) for (int current_thread=0;current_thread<current_max_threads;current_thread++)
{ {
@ -135,15 +135,13 @@ TEST(CPU_multicorrelator_test, MeasureExecutionTime)
d_carrier_phase_step_rad, d_carrier_phase_step_rad,
d_code_phase_step_chips, d_code_phase_step_chips,
d_rem_code_phase_chips, d_rem_code_phase_chips,
correlation_sizes[correlation_sizes_idx], correlation_sizes[correlation_sizes_idx]));
d_n_correlator_taps));
} }
//wait the threads to finish they work and destroy the thread objects //wait the threads to finish they work and destroy the thread objects
for(auto &t : thread_pool){ for(auto &t : thread_pool){
t.join(); t.join();
} }
thread_pool.clear(); thread_pool.clear();
}
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
long long int end = tv.tv_sec * 1000000 + tv.tv_usec; long long int end = tv.tv_sec * 1000000 + tv.tv_usec;
execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_cpu_multicorrelator_iterations_test)); execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_cpu_multicorrelator_iterations_test));
@ -162,6 +160,5 @@ TEST(CPU_multicorrelator_test, MeasureExecutionTime)
for (int n=0;n<max_threads;n++) for (int n=0;n<max_threads;n++)
{ {
correlator_pool[n]->free(); correlator_pool[n]->free();
delete(correlator_pool[n]);
} }
} }

View File

@ -34,6 +34,7 @@
#include <thread> #include <thread>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cuda_profiler_api.h>
#include "cuda_multicorrelator.h" #include "cuda_multicorrelator.h"
#include "gps_sdr_signal_processing.h" #include "gps_sdr_signal_processing.h"
#include "GPS_L1_CA.h" #include "GPS_L1_CA.h"
@ -49,6 +50,8 @@ void run_correlator_gpu(cuda_multicorrelator* correlator,
float d_rem_code_phase_chips, float d_rem_code_phase_chips,
int correlation_size, int correlation_size,
int d_n_correlator_taps) int d_n_correlator_taps)
{
for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++)
{ {
correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad, correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad,
d_carrier_phase_step_rad, d_carrier_phase_step_rad,
@ -57,6 +60,7 @@ void run_correlator_gpu(cuda_multicorrelator* correlator,
correlation_size, correlation_size,
d_n_correlator_taps); d_n_correlator_taps);
} }
}
TEST(GPU_multicorrelator_test, MeasureExecutionTime) TEST(GPU_multicorrelator_test, MeasureExecutionTime)
{ {
@ -121,11 +125,10 @@ TEST(GPU_multicorrelator_test, MeasureExecutionTime)
std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl; std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl;
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
long long int begin = tv.tv_sec * 1000000 + tv.tv_usec; long long int begin = tv.tv_sec * 1000000 + tv.tv_usec;
for(int k = 0; k < FLAGS_gpu_multicorrelator_iterations_test; k++)
{
//create the concurrent correlator threads //create the concurrent correlator threads
for (int current_thread=0;current_thread<current_max_threads;current_thread++) for (int current_thread=0;current_thread<current_max_threads;current_thread++)
{ {
//cudaProfilerStart();
thread_pool.push_back(std::thread(run_correlator_gpu, thread_pool.push_back(std::thread(run_correlator_gpu,
correlator_pool[current_thread], correlator_pool[current_thread],
d_rem_carrier_phase_rad, d_rem_carrier_phase_rad,
@ -134,13 +137,13 @@ TEST(GPU_multicorrelator_test, MeasureExecutionTime)
d_rem_code_phase_chips, d_rem_code_phase_chips,
correlation_sizes[correlation_sizes_idx], correlation_sizes[correlation_sizes_idx],
d_n_correlator_taps)); d_n_correlator_taps));
//cudaProfilerStop();
} }
//wait the threads to finish they work and destroy the thread objects //wait the threads to finish they work and destroy the thread objects
for(auto &t : thread_pool){ for(auto &t : thread_pool){
t.join(); t.join();
} }
thread_pool.clear(); thread_pool.clear();
}
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
long long int end = tv.tv_sec * 1000000 + tv.tv_usec; long long int end = tv.tv_sec * 1000000 + tv.tv_usec;
execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_gpu_multicorrelator_iterations_test)); execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_gpu_multicorrelator_iterations_test));
@ -158,7 +161,6 @@ TEST(GPU_multicorrelator_test, MeasureExecutionTime)
for (int n=0;n<max_threads;n++) for (int n=0;n<max_threads;n++)
{ {
correlator_pool[n]->free_cuda(); correlator_pool[n]->free_cuda();
delete(correlator_pool[n]);
} }