GPU / CPU unit test performance improvements

2025-10-28 05:57:39 +00:00 · 2016-06-17 16:29:07 +02:00
parent 5ff34ec91e
commit 2c327b0de4
2 changed files with 50 additions and 51 deletions
--- a/src/tests/arithmetic/cpu_multicorrelator_test.cc
+++ b/src/tests/arithmetic/cpu_multicorrelator_test.cc
@@ -46,14 +46,16 @@ void run_correlator_cpu(cpu_multicorrelator* correlator,
                    float d_carrier_phase_step_rad,
                    float d_code_phase_step_chips,
                    float d_rem_code_phase_chips,
-                    int correlation_size,
+                    int correlation_size)
                    int d_n_correlator_taps)
 {
-    correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad,
+    for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++)
-                                                               d_carrier_phase_step_rad,
+    {
-                                                               d_code_phase_step_chips,
+        correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad,
-                                                               d_rem_code_phase_chips,
+                                                                   d_carrier_phase_step_rad,
-                                                               correlation_size);
+                                                                   d_code_phase_step_chips,
                                                                   d_rem_code_phase_chips,
                                                                   correlation_size);
    }
 }
 TEST(CPU_multicorrelator_test, MeasureExecutionTime)
@@ -124,26 +126,22 @@ TEST(CPU_multicorrelator_test, MeasureExecutionTime)
                std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl;
                gettimeofday(&tv, NULL);
                long long int begin = tv.tv_sec * 1000000 + tv.tv_usec;
-                for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++)
+                //create the concurrent correlator threads
                for (int current_thread=0;current_thread<current_max_threads;current_thread++)
                {
-                    //create the concurrent correlator threads
+                    thread_pool.push_back(std::thread(run_correlator_cpu,
-                    for (int current_thread=0;current_thread<current_max_threads;current_thread++)
+                            correlator_pool[current_thread],
-                    {
+                            d_rem_carrier_phase_rad,
-                        thread_pool.push_back(std::thread(run_correlator_cpu,
+                            d_carrier_phase_step_rad,
-                                correlator_pool[current_thread],
+                            d_code_phase_step_chips,
-                                d_rem_carrier_phase_rad,
+                            d_rem_code_phase_chips,
-                                d_carrier_phase_step_rad,
+                            correlation_sizes[correlation_sizes_idx]));
                                d_code_phase_step_chips,
                                d_rem_code_phase_chips,
                                correlation_sizes[correlation_sizes_idx],
                                d_n_correlator_taps));
                    }
                    //wait the threads to finish they work and destroy the thread objects
                    for(auto &t : thread_pool){
                    t.join();
                    }
                    thread_pool.clear();
                }
                //wait the threads to finish they work and destroy the thread objects
                for(auto &t : thread_pool){
                    t.join();
                }
                thread_pool.clear();
                gettimeofday(&tv, NULL);
                long long int end = tv.tv_sec * 1000000 + tv.tv_usec;
                execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_cpu_multicorrelator_iterations_test));
@@ -162,6 +160,5 @@ TEST(CPU_multicorrelator_test, MeasureExecutionTime)
    for (int n=0;n<max_threads;n++)
    {
        correlator_pool[n]->free();
        delete(correlator_pool[n]);
    }
 }
--- a/src/tests/arithmetic/gpu_multicorrelator_test.cc
+++ b/src/tests/arithmetic/gpu_multicorrelator_test.cc
@@ -34,6 +34,7 @@
 #include <thread>
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <cuda_profiler_api.h>
 #include "cuda_multicorrelator.h"
 #include "gps_sdr_signal_processing.h"
 #include "GPS_L1_CA.h"
@@ -50,12 +51,15 @@ void run_correlator_gpu(cuda_multicorrelator* correlator,
                    int correlation_size,
                    int d_n_correlator_taps)
 {
-    correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad,
+    for(int k = 0; k < FLAGS_cpu_multicorrelator_iterations_test; k++)
-                                                                        d_carrier_phase_step_rad,
+    {
-                                                                        d_code_phase_step_chips,
+        correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad,
-                                                                        d_rem_code_phase_chips,
+                                                                            d_carrier_phase_step_rad,
-                                                                        correlation_size,
+                                                                            d_code_phase_step_chips,
-                                                                        d_n_correlator_taps);
+                                                                            d_rem_code_phase_chips,
                                                                            correlation_size,
                                                                            d_n_correlator_taps);
    }
 }
 TEST(GPU_multicorrelator_test, MeasureExecutionTime)
@@ -121,26 +125,25 @@ TEST(GPU_multicorrelator_test, MeasureExecutionTime)
                    std::cout<<"Running "<<current_max_threads<<" concurrent correlators"<<std::endl;
                    gettimeofday(&tv, NULL);
                    long long int begin = tv.tv_sec * 1000000 + tv.tv_usec;
-                    for(int k = 0; k < FLAGS_gpu_multicorrelator_iterations_test; k++)
+                    //create the concurrent correlator threads
                    for (int current_thread=0;current_thread<current_max_threads;current_thread++)
                    {
-                        //create the concurrent correlator threads
+                        //cudaProfilerStart();
-                        for (int current_thread=0;current_thread<current_max_threads;current_thread++)
+                        thread_pool.push_back(std::thread(run_correlator_gpu,
-                        {
+                                correlator_pool[current_thread],
-                            thread_pool.push_back(std::thread(run_correlator_gpu,
+                                d_rem_carrier_phase_rad,
-                                    correlator_pool[current_thread],
+                                d_carrier_phase_step_rad,
-                                    d_rem_carrier_phase_rad,
+                                d_code_phase_step_chips,
-                                    d_carrier_phase_step_rad,
+                                d_rem_code_phase_chips,
-                                    d_code_phase_step_chips,
+                                correlation_sizes[correlation_sizes_idx],
-                                    d_rem_code_phase_chips,
+                                d_n_correlator_taps));
-                                    correlation_sizes[correlation_sizes_idx],
+                        //cudaProfilerStop();
                                    d_n_correlator_taps));
                        }
                        //wait the threads to finish they work and destroy the thread objects
                        for(auto &t : thread_pool){
                             t.join();
                         }
                        thread_pool.clear();
                    }
                    //wait the threads to finish they work and destroy the thread objects
                    for(auto &t : thread_pool){
                         t.join();
                     }
                    thread_pool.clear();
                    gettimeofday(&tv, NULL);
                    long long int end = tv.tv_sec * 1000000 + tv.tv_usec;
                    execution_times[correlation_sizes_idx] = static_cast<double>(end - begin) / (1000000.0 * static_cast<double>(FLAGS_gpu_multicorrelator_iterations_test));
@@ -158,7 +161,6 @@ TEST(GPU_multicorrelator_test, MeasureExecutionTime)
    for (int n=0;n<max_threads;n++)
    {
        correlator_pool[n]->free_cuda();
        delete(correlator_pool[n]);
    }