1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-12-15 12:40:35 +00:00

GPU / CPU unit test performance improvements

This commit is contained in:
Javier Arribas 2016-06-17 16:29:07 +02:00
parent 5ff34ec91e
commit 2c327b0de4
2 changed files with 50 additions and 51 deletions

View File

@ -46,14 +46,16 @@ void run_correlator_cpu(cpu_multicorrelator* correlator,
float d_carrier_phase_step_rad, float d_carrier_phase_step_rad,
float d_code_phase_step_chips, float d_code_phase_step_chips,
float d_rem_code_phase_chips, float d_rem_code_phase_chips,
int correlation_size, int correlation_size)
int d_n_correlator_taps)
{ {
correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad, for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++)
d_carrier_phase_step_rad, {
d_code_phase_step_chips, correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad,
d_rem_code_phase_chips, d_carrier_phase_step_rad,
correlation_size); d_code_phase_step_chips,
d_rem_code_phase_chips,
correlation_size);
}
} }
TEST(CPU_multicorrelator_test, MeasureExecutionTime) TEST(CPU_multicorrelator_test, MeasureExecutionTime)
@ -124,26 +126,22 @@ TEST(CPU_multicorrelator_test, MeasureExecutionTime)
std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl; std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl;
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
long long int begin = tv.tv_sec * 1000000 + tv.tv_usec; long long int begin = tv.tv_sec * 1000000 + tv.tv_usec;
for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++) //create the concurrent correlator threads
for (int current_thread=0;current_thread<current_max_threads;current_thread++)
{ {
//create the concurrent correlator threads thread_pool.push_back(std::thread(run_correlator_cpu,
for (int current_thread=0;current_thread<current_max_threads;current_thread++) correlator_pool[current_thread],
{ d_rem_carrier_phase_rad,
thread_pool.push_back(std::thread(run_correlator_cpu, d_carrier_phase_step_rad,
correlator_pool[current_thread], d_code_phase_step_chips,
d_rem_carrier_phase_rad, d_rem_code_phase_chips,
d_carrier_phase_step_rad, correlation_sizes[correlation_sizes_idx]));
d_code_phase_step_chips,
d_rem_code_phase_chips,
correlation_sizes[correlation_sizes_idx],
d_n_correlator_taps));
}
//wait the threads to finish they work and destroy the thread objects
for(auto &t : thread_pool){
t.join();
}
thread_pool.clear();
} }
//wait the threads to finish they work and destroy the thread objects
for(auto &t : thread_pool){
t.join();
}
thread_pool.clear();
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
long long int end = tv.tv_sec * 1000000 + tv.tv_usec; long long int end = tv.tv_sec * 1000000 + tv.tv_usec;
execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_cpu_multicorrelator_iterations_test)); execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_cpu_multicorrelator_iterations_test));
@ -162,6 +160,5 @@ TEST(CPU_multicorrelator_test, MeasureExecutionTime)
for (int n=0;n<max_threads;n++) for (int n=0;n<max_threads;n++)
{ {
correlator_pool[n]->free(); correlator_pool[n]->free();
delete(correlator_pool[n]);
} }
} }

View File

@ -34,6 +34,7 @@
#include <thread> #include <thread>
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cuda_profiler_api.h>
#include "cuda_multicorrelator.h" #include "cuda_multicorrelator.h"
#include "gps_sdr_signal_processing.h" #include "gps_sdr_signal_processing.h"
#include "GPS_L1_CA.h" #include "GPS_L1_CA.h"
@ -50,12 +51,15 @@ void run_correlator_gpu(cuda_multicorrelator* correlator,
int correlation_size, int correlation_size,
int d_n_correlator_taps) int d_n_correlator_taps)
{ {
correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad, for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++)
d_carrier_phase_step_rad, {
d_code_phase_step_chips, correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad,
d_rem_code_phase_chips, d_carrier_phase_step_rad,
correlation_size, d_code_phase_step_chips,
d_n_correlator_taps); d_rem_code_phase_chips,
correlation_size,
d_n_correlator_taps);
}
} }
TEST(GPU_multicorrelator_test, MeasureExecutionTime) TEST(GPU_multicorrelator_test, MeasureExecutionTime)
@ -121,26 +125,25 @@ TEST(GPU_multicorrelator_test, MeasureExecutionTime)
std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl; std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl;
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
long long int begin = tv.tv_sec * 1000000 + tv.tv_usec; long long int begin = tv.tv_sec * 1000000 + tv.tv_usec;
for(int k = 0; k < FLAGS_gpu_multicorrelator_iterations_test; k++) //create the concurrent correlator threads
for (int current_thread=0;current_thread<current_max_threads;current_thread++)
{ {
//create the concurrent correlator threads //cudaProfilerStart();
for (int current_thread=0;current_thread<current_max_threads;current_thread++) thread_pool.push_back(std::thread(run_correlator_gpu,
{ correlator_pool[current_thread],
thread_pool.push_back(std::thread(run_correlator_gpu, d_rem_carrier_phase_rad,
correlator_pool[current_thread], d_carrier_phase_step_rad,
d_rem_carrier_phase_rad, d_code_phase_step_chips,
d_carrier_phase_step_rad, d_rem_code_phase_chips,
d_code_phase_step_chips, correlation_sizes[correlation_sizes_idx],
d_rem_code_phase_chips, d_n_correlator_taps));
correlation_sizes[correlation_sizes_idx], //cudaProfilerStop();
d_n_correlator_taps));
}
//wait the threads to finish they work and destroy the thread objects
for(auto &t : thread_pool){
t.join();
}
thread_pool.clear();
} }
//wait the threads to finish they work and destroy the thread objects
for(auto &t : thread_pool){
t.join();
}
thread_pool.clear();
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
long long int end = tv.tv_sec * 1000000 + tv.tv_usec; long long int end = tv.tv_sec * 1000000 + tv.tv_usec;
execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_gpu_multicorrelator_iterations_test)); execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_gpu_multicorrelator_iterations_test));
@ -158,7 +161,6 @@ TEST(GPU_multicorrelator_test, MeasureExecutionTime)
for (int n=0;n<max_threads;n++) for (int n=0;n<max_threads;n++)
{ {
correlator_pool[n]->free_cuda(); correlator_pool[n]->free_cuda();
delete(correlator_pool[n]);
} }