mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-31 15:23:04 +00:00 
			
		
		
		
	GPU / CPU unit test performance improvements
This commit is contained in:
		| @@ -46,14 +46,16 @@ void run_correlator_cpu(cpu_multicorrelator* correlator, | |||||||
|                     float d_carrier_phase_step_rad, |                     float d_carrier_phase_step_rad, | ||||||
|                     float d_code_phase_step_chips, |                     float d_code_phase_step_chips, | ||||||
|                     float d_rem_code_phase_chips, |                     float d_rem_code_phase_chips, | ||||||
|                     int correlation_size, |                     int correlation_size) | ||||||
|                     int d_n_correlator_taps) |  | ||||||
| { | { | ||||||
|  |     for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++) | ||||||
|  |     { | ||||||
|         correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad, |         correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad, | ||||||
|                                                                    d_carrier_phase_step_rad, |                                                                    d_carrier_phase_step_rad, | ||||||
|                                                                    d_code_phase_step_chips, |                                                                    d_code_phase_step_chips, | ||||||
|                                                                    d_rem_code_phase_chips, |                                                                    d_rem_code_phase_chips, | ||||||
|                                                                    correlation_size); |                                                                    correlation_size); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| TEST(CPU_multicorrelator_test, MeasureExecutionTime) | TEST(CPU_multicorrelator_test, MeasureExecutionTime) | ||||||
| @@ -124,8 +126,6 @@ TEST(CPU_multicorrelator_test, MeasureExecutionTime) | |||||||
|                 std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl; |                 std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl; | ||||||
|                 gettimeofday(&tv, NULL); |                 gettimeofday(&tv, NULL); | ||||||
|                 long long int begin = tv.tv_sec * 1000000 + tv.tv_usec; |                 long long int begin = tv.tv_sec * 1000000 + tv.tv_usec; | ||||||
|                 for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++) |  | ||||||
|                 { |  | ||||||
|                 //create the concurrent correlator threads |                 //create the concurrent correlator threads | ||||||
|                 for (int current_thread=0;current_thread<current_max_threads;current_thread++) |                 for (int current_thread=0;current_thread<current_max_threads;current_thread++) | ||||||
|                 { |                 { | ||||||
| @@ -135,15 +135,13 @@ TEST(CPU_multicorrelator_test, MeasureExecutionTime) | |||||||
|                             d_carrier_phase_step_rad, |                             d_carrier_phase_step_rad, | ||||||
|                             d_code_phase_step_chips, |                             d_code_phase_step_chips, | ||||||
|                             d_rem_code_phase_chips, |                             d_rem_code_phase_chips, | ||||||
|                                 correlation_sizes[correlation_sizes_idx], |                             correlation_sizes[correlation_sizes_idx])); | ||||||
|                                 d_n_correlator_taps)); |  | ||||||
|                 } |                 } | ||||||
|                 //wait the threads to finish they work and destroy the thread objects |                 //wait the threads to finish they work and destroy the thread objects | ||||||
|                 for(auto &t : thread_pool){ |                 for(auto &t : thread_pool){ | ||||||
|                     t.join(); |                     t.join(); | ||||||
|                 } |                 } | ||||||
|                 thread_pool.clear(); |                 thread_pool.clear(); | ||||||
|                 } |  | ||||||
|                 gettimeofday(&tv, NULL); |                 gettimeofday(&tv, NULL); | ||||||
|                 long long int end = tv.tv_sec * 1000000 + tv.tv_usec; |                 long long int end = tv.tv_sec * 1000000 + tv.tv_usec; | ||||||
|                 execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_cpu_multicorrelator_iterations_test)); |                 execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_cpu_multicorrelator_iterations_test)); | ||||||
| @@ -162,6 +160,5 @@ TEST(CPU_multicorrelator_test, MeasureExecutionTime) | |||||||
|     for (int n=0;n<max_threads;n++) |     for (int n=0;n<max_threads;n++) | ||||||
|     { |     { | ||||||
|         correlator_pool[n]->free(); |         correlator_pool[n]->free(); | ||||||
|         delete(correlator_pool[n]); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -34,6 +34,7 @@ | |||||||
| #include <thread> | #include <thread> | ||||||
| #include <cuda.h> | #include <cuda.h> | ||||||
| #include <cuda_runtime.h> | #include <cuda_runtime.h> | ||||||
|  | #include <cuda_profiler_api.h> | ||||||
| #include "cuda_multicorrelator.h" | #include "cuda_multicorrelator.h" | ||||||
| #include "gps_sdr_signal_processing.h" | #include "gps_sdr_signal_processing.h" | ||||||
| #include "GPS_L1_CA.h" | #include "GPS_L1_CA.h" | ||||||
| @@ -50,12 +51,15 @@ void run_correlator_gpu(cuda_multicorrelator* correlator, | |||||||
|                     int correlation_size, |                     int correlation_size, | ||||||
|                     int d_n_correlator_taps) |                     int d_n_correlator_taps) | ||||||
| { | { | ||||||
|  |     for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++) | ||||||
|  |     { | ||||||
|         correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad, |         correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad, | ||||||
|                                                                             d_carrier_phase_step_rad, |                                                                             d_carrier_phase_step_rad, | ||||||
|                                                                             d_code_phase_step_chips, |                                                                             d_code_phase_step_chips, | ||||||
|                                                                             d_rem_code_phase_chips, |                                                                             d_rem_code_phase_chips, | ||||||
|                                                                             correlation_size, |                                                                             correlation_size, | ||||||
|                                                                             d_n_correlator_taps); |                                                                             d_n_correlator_taps); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| TEST(GPU_multicorrelator_test, MeasureExecutionTime) | TEST(GPU_multicorrelator_test, MeasureExecutionTime) | ||||||
| @@ -121,11 +125,10 @@ TEST(GPU_multicorrelator_test, MeasureExecutionTime) | |||||||
|                     std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl; |                     std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl; | ||||||
|                     gettimeofday(&tv, NULL); |                     gettimeofday(&tv, NULL); | ||||||
|                     long long int begin = tv.tv_sec * 1000000 + tv.tv_usec; |                     long long int begin = tv.tv_sec * 1000000 + tv.tv_usec; | ||||||
|                     for(int k = 0; k < FLAGS_gpu_multicorrelator_iterations_test; k++) |  | ||||||
|                     { |  | ||||||
|                     //create the concurrent correlator threads |                     //create the concurrent correlator threads | ||||||
|                     for (int current_thread=0;current_thread<current_max_threads;current_thread++) |                     for (int current_thread=0;current_thread<current_max_threads;current_thread++) | ||||||
|                     { |                     { | ||||||
|  |                         //cudaProfilerStart(); | ||||||
|                         thread_pool.push_back(std::thread(run_correlator_gpu, |                         thread_pool.push_back(std::thread(run_correlator_gpu, | ||||||
|                                 correlator_pool[current_thread], |                                 correlator_pool[current_thread], | ||||||
|                                 d_rem_carrier_phase_rad, |                                 d_rem_carrier_phase_rad, | ||||||
| @@ -134,13 +137,13 @@ TEST(GPU_multicorrelator_test, MeasureExecutionTime) | |||||||
|                                 d_rem_code_phase_chips, |                                 d_rem_code_phase_chips, | ||||||
|                                 correlation_sizes[correlation_sizes_idx], |                                 correlation_sizes[correlation_sizes_idx], | ||||||
|                                 d_n_correlator_taps)); |                                 d_n_correlator_taps)); | ||||||
|  |                         //cudaProfilerStop(); | ||||||
|                     } |                     } | ||||||
|                     //wait the threads to finish they work and destroy the thread objects |                     //wait the threads to finish they work and destroy the thread objects | ||||||
|                     for(auto &t : thread_pool){ |                     for(auto &t : thread_pool){ | ||||||
|                          t.join(); |                          t.join(); | ||||||
|                      } |                      } | ||||||
|                     thread_pool.clear(); |                     thread_pool.clear(); | ||||||
|                     } |  | ||||||
|                     gettimeofday(&tv, NULL); |                     gettimeofday(&tv, NULL); | ||||||
|                     long long int end = tv.tv_sec * 1000000 + tv.tv_usec; |                     long long int end = tv.tv_sec * 1000000 + tv.tv_usec; | ||||||
|                     execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_gpu_multicorrelator_iterations_test)); |                     execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_gpu_multicorrelator_iterations_test)); | ||||||
| @@ -158,7 +161,6 @@ TEST(GPU_multicorrelator_test, MeasureExecutionTime) | |||||||
|     for (int n=0;n<max_threads;n++) |     for (int n=0;n<max_threads;n++) | ||||||
|     { |     { | ||||||
|         correlator_pool[n]->free_cuda(); |         correlator_pool[n]->free_cuda(); | ||||||
|         delete(correlator_pool[n]); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Javier Arribas
					Javier Arribas