diff --git a/src/algorithms/tracking/libs/cuda_multicorrelator.cu b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
index 61dc3054e..78c3612f0 100644
--- a/src/algorithms/tracking/libs/cuda_multicorrelator.cu
+++ b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
@@ -198,7 +198,7 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
//********* ZERO COPY VERSION ************
// Set flag to enable zero copy access
// Optimal in shared memory devices (like Jetson K1)
- cudaSetDeviceFlags(cudaDeviceMapHost);
+ //cudaSetDeviceFlags(cudaDeviceMapHost);
//******** CudaMalloc version ***********
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index ce8c40e43..08ad88e94 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -33,6 +33,7 @@ if(GTEST_INCLUDE_DIRS)
set(GTEST_DIR_LOCAL true)
endif(GTEST_INCLUDE_DIRS)
+
if(NOT ${GTEST_DIR_LOCAL})
# if GTEST_DIR is not defined, we download and build it
set(gtest_RELEASE 1.7.0)
@@ -86,6 +87,11 @@ endif(NOT ${GTEST_DIR_LOCAL})
set(GNSS_SDR_TEST_OPTIONAL_LIBS "")
set(GNSS_SDR_TEST_OPTIONAL_HEADERS "")
+if(ENABLE_CUDA)
+ set(GNSS_SDR_TEST_OPTIONAL_HEADERS ${GNSS_SDR_TEST_OPTIONAL_HEADERS} ${CUDA_INCLUDE_DIRS})
+ set(GNSS_SDR_TEST_OPTIONAL_LIBS ${GNSS_SDR_TEST_OPTIONAL_LIBS} ${CUDA_LIBRARIES})
+endif(ENABLE_CUDA)
+
if(ENABLE_GPERFTOOLS)
if(GPERFTOOLS_FOUND)
set(GNSS_SDR_TEST_OPTIONAL_LIBS "${GNSS_SDR_TEST_OPTIONAL_LIBS};${GPERFTOOLS_LIBRARIES}")
@@ -152,6 +158,10 @@ if(OPENCL_FOUND)
add_definitions(-DOPENCL_BLOCKS_TEST=1)
endif(OPENCL_FOUND)
+if (ENABLE_CUDA)
+ add_definitions(-DCUDA_BLOCKS_TEST=1)
+endif(ENABLE_CUDA)
+
add_definitions(-DTEST_PATH="${CMAKE_SOURCE_DIR}/src/tests/")
diff --git a/src/tests/arithmetic/cpu_multicorrelator_test.cc b/src/tests/arithmetic/cpu_multicorrelator_test.cc
new file mode 100644
index 000000000..d84b38f89
--- /dev/null
+++ b/src/tests/arithmetic/cpu_multicorrelator_test.cc
@@ -0,0 +1,167 @@
+/*!
+ * \file fft_length_test.cc
+ * \brief This file implements timing tests for the FFT.
+ * \author Carles Fernandez-Prades, 2016. cfernandez(at)cttc.es
+ *
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2016 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include
+#include
+#include
+#include
+#include "cpu_multicorrelator.h"
+#include "gps_sdr_signal_processing.h"
+#include "GPS_L1_CA.h"
+
+
+DEFINE_int32(cpu_multicorrelator_iterations_test, 1000, "Number of averaged iterations in CPU multicorrelator test timing test");
+DEFINE_int32(cpu_multicorrelator_max_threads_test, 12, "Number of maximum concurrent correlators in CPU multicorrelator test timing test");
+
+void run_correlator_cpu(cpu_multicorrelator* correlator,
+ float d_rem_carrier_phase_rad,
+ float d_carrier_phase_step_rad,
+ float d_code_phase_step_chips,
+ float d_rem_code_phase_chips,
+ int correlation_size,
+ int d_n_correlator_taps)
+{
+ correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad,
+ d_carrier_phase_step_rad,
+ d_code_phase_step_chips,
+ d_rem_code_phase_chips,
+ correlation_size);
+}
+
+TEST(CPU_multicorrelator_test, MeasureExecutionTime)
+{
+ struct timeval tv;
+ int max_threads=FLAGS_cpu_multicorrelator_max_threads_test;
+ std::vector thread_pool;
+ cpu_multicorrelator* correlator_pool[max_threads];
+ unsigned int correlation_sizes [3] = { 2048, 4096, 8192};
+ double execution_times [3];
+
+ gr_complex* d_ca_code;
+ gr_complex* in_cpu;
+ gr_complex* d_correlator_outs;
+
+ int d_n_correlator_taps=3;
+ int d_vector_length=correlation_sizes[2]; //max correlation size to allocate all the necessary memory
+ float* d_local_code_shift_chips;
+
+ //allocate host memory
+ // Get space for a vector with the C/A code replica sampled 1x/chip
+ d_ca_code = static_cast(volk_malloc(static_cast(GPS_L1_CA_CODE_LENGTH_CHIPS) * sizeof(gr_complex), volk_get_alignment()));
+ in_cpu = static_cast(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment()));
+
+ // correlator outputs (scalar)
+ d_n_correlator_taps = 3; // Early, Prompt, and Late
+ d_correlator_outs = static_cast(volk_malloc(d_n_correlator_taps*sizeof(gr_complex), volk_get_alignment()));
+ for (int n = 0; n < d_n_correlator_taps; n++)
+ {
+ d_correlator_outs[n] = gr_complex(0,0);
+ }
+ d_local_code_shift_chips = static_cast(volk_malloc(d_n_correlator_taps*sizeof(float), volk_get_alignment()));
+ // Set TAPs delay values [chips]
+ float d_early_late_spc_chips=0.5;
+ d_local_code_shift_chips[0] = - d_early_late_spc_chips;
+ d_local_code_shift_chips[1] = 0.0;
+ d_local_code_shift_chips[2] = d_early_late_spc_chips;
+
+ //--- Perform initializations ------------------------------
+
+ //local code resampler on GPU
+ // generate local reference (1 sample per chip)
+ gps_l1_ca_code_gen_complex(d_ca_code, 1, 0);
+ // generate inut signal
+ for (int n=0;n<2*d_vector_length;n++)
+ {
+ in_cpu[n]=std::complex(static_cast (rand())/static_cast(RAND_MAX),static_cast (rand())/static_cast(RAND_MAX));
+ }
+
+ for (int n=0;ninit(d_vector_length, d_n_correlator_taps);
+ correlator_pool[n]->set_input_output_vectors(d_correlator_outs, in_cpu);
+ correlator_pool[n]->set_local_code_and_taps(static_cast(GPS_L1_CA_CODE_LENGTH_CHIPS), d_ca_code, d_local_code_shift_chips);
+ }
+
+ float d_rem_carrier_phase_rad=0.0;
+ float d_carrier_phase_step_rad=0.1;
+ float d_code_phase_step_chips=0.3;
+ float d_rem_code_phase_chips=0.4;
+
+ EXPECT_NO_THROW(
+ for(int correlation_sizes_idx = 0; correlation_sizes_idx < 3; correlation_sizes_idx++)
+ {
+ for(int current_max_threads=1; current_max_threads<(max_threads+1); current_max_threads++)
+ {
+ std::cout<<"Running "<(end - begin) / (1000000.0 * static_cast(FLAGS_cpu_multicorrelator_iterations_test));
+ std::cout << "CPU Multicorrelator execution time for length=" << correlation_sizes[correlation_sizes_idx] << " : " << execution_times[correlation_sizes_idx] << " [s]" << std::endl;
+
+ }
+ }
+ );
+
+
+ volk_free(d_local_code_shift_chips);
+ volk_free(d_correlator_outs);
+ volk_free(d_ca_code);
+ volk_free(in_cpu);
+
+ for (int n=0;nfree();
+ delete(correlator_pool[n]);
+ }
+}
diff --git a/src/tests/arithmetic/gpu_multicorrelator_test.cc b/src/tests/arithmetic/gpu_multicorrelator_test.cc
new file mode 100644
index 000000000..11c4fdef2
--- /dev/null
+++ b/src/tests/arithmetic/gpu_multicorrelator_test.cc
@@ -0,0 +1,166 @@
+/*!
+ * \file fft_length_test.cc
+ * \brief This file implements timing tests for the FFT.
+ * \author Carles Fernandez-Prades, 2016. cfernandez(at)cttc.es
+ *
+ *
+ * -------------------------------------------------------------------------
+ *
+ * Copyright (C) 2010-2016 (see AUTHORS file for a list of contributors)
+ *
+ * GNSS-SDR is a software defined Global Navigation
+ * Satellite Systems receiver
+ *
+ * This file is part of GNSS-SDR.
+ *
+ * GNSS-SDR is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * GNSS-SDR is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNSS-SDR. If not, see .
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include "cuda_multicorrelator.h"
+#include "gps_sdr_signal_processing.h"
+#include "GPS_L1_CA.h"
+
+
+DEFINE_int32(gpu_multicorrelator_iterations_test, 1000, "Number of averaged iterations in GPU multicorrelator test timing test");
+DEFINE_int32(gpu_multicorrelator_max_threads_test, 12, "Number of maximum concurrent correlators in GPU multicorrelator test timing test");
+
+void run_correlator_gpu(cuda_multicorrelator* correlator,
+ float d_rem_carrier_phase_rad,
+ float d_carrier_phase_step_rad,
+ float d_code_phase_step_chips,
+ float d_rem_code_phase_chips,
+ int correlation_size,
+ int d_n_correlator_taps)
+{
+ correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad,
+ d_carrier_phase_step_rad,
+ d_code_phase_step_chips,
+ d_rem_code_phase_chips,
+ correlation_size,
+ d_n_correlator_taps);
+}
+
+TEST(GPU_multicorrelator_test, MeasureExecutionTime)
+{
+ struct timeval tv;
+ int max_threads=FLAGS_gpu_multicorrelator_max_threads_test;
+ std::vector thread_pool;
+ cuda_multicorrelator* correlator_pool[max_threads];
+ unsigned int correlation_sizes [3] = { 2048, 4096, 8192};
+ double execution_times [3];
+
+ gr_complex* d_ca_code;
+ gr_complex* in_gpu;
+ gr_complex* d_correlator_outs;
+
+ int d_n_correlator_taps=3;
+ int d_vector_length=correlation_sizes[2]; //max correlation size to allocate all the necessary memory
+ float* d_local_code_shift_chips;
+ // Set GPU flags
+ cudaSetDeviceFlags(cudaDeviceMapHost);
+ //allocate host memory
+ //pinned memory mode - use special function to get OS-pinned memory
+ d_n_correlator_taps = 3; // Early, Prompt, and Late
+ // Get space for a vector with the C/A code replica sampled 1x/chip
+ cudaHostAlloc((void**)&d_ca_code, (static_cast(GPS_L1_CA_CODE_LENGTH_CHIPS)* sizeof(gr_complex)), cudaHostAllocMapped | cudaHostAllocWriteCombined);
+ // Get space for the resampled early / prompt / late local replicas
+ cudaHostAlloc((void**)&d_local_code_shift_chips, d_n_correlator_taps * sizeof(float), cudaHostAllocMapped | cudaHostAllocWriteCombined);
+ cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped | cudaHostAllocWriteCombined);
+ // correlator outputs (scalar)
+ cudaHostAlloc((void**)&d_correlator_outs ,sizeof(gr_complex)*d_n_correlator_taps, cudaHostAllocMapped | cudaHostAllocWriteCombined );
+
+ //--- Perform initializations ------------------------------
+ //local code resampler on GPU
+ // generate local reference (1 sample per chip)
+ gps_l1_ca_code_gen_complex(d_ca_code, 1, 0);
+ // generate inut signal
+ for (int n=0;n<2*d_vector_length;n++)
+ {
+ in_gpu[n]=std::complex(static_cast (rand())/static_cast(RAND_MAX),static_cast (rand())/static_cast(RAND_MAX));
+ }
+ // Set TAPs delay values [chips]
+ float d_early_late_spc_chips=0.5;
+ d_local_code_shift_chips[0] = - d_early_late_spc_chips;
+ d_local_code_shift_chips[1] = 0.0;
+ d_local_code_shift_chips[2] = d_early_late_spc_chips;
+ for (int n=0;ninit_cuda_integrated_resampler(d_vector_length, GPS_L1_CA_CODE_LENGTH_CHIPS, d_n_correlator_taps);
+ correlator_pool[n]->set_input_output_vectors(d_correlator_outs, in_gpu);
+ }
+
+ float d_rem_carrier_phase_rad=0.0;
+ float d_carrier_phase_step_rad=0.1;
+ float d_code_phase_step_chips=0.3;
+ float d_rem_code_phase_chips=0.4;
+
+ EXPECT_NO_THROW(
+ for(int correlation_sizes_idx = 0; correlation_sizes_idx < 3; correlation_sizes_idx++)
+ {
+ for(int current_max_threads=1; current_max_threads<(max_threads+1); current_max_threads++)
+ {
+ std::cout<<"Running "<(end - begin) / (1000000.0 * static_cast(FLAGS_gpu_multicorrelator_iterations_test));
+ std::cout << "GPU Multicorrelator execution time for length=" << correlation_sizes[correlation_sizes_idx] << " : " << execution_times[correlation_sizes_idx] << " [s]" << std::endl;
+
+ }
+ }
+ );
+
+ cudaFreeHost(in_gpu);
+ cudaFreeHost(d_correlator_outs);
+ cudaFreeHost(d_local_code_shift_chips);
+ cudaFreeHost(d_ca_code);
+
+ for (int n=0;nfree_cuda();
+ delete(correlator_pool[n]);
+ }
+
+
+
+}
diff --git a/src/tests/test_main.cc b/src/tests/test_main.cc
index a3dabce30..5b3228f56 100644
--- a/src/tests/test_main.cc
+++ b/src/tests/test_main.cc
@@ -92,9 +92,15 @@ DECLARE_string(log_dir);
#include "gnss_block/gps_l2_m_pcps_acquisition_test.cc"
#include "gnss_block/gps_l1_ca_pcps_acquisition_gsoc2013_test.cc"
//#include "gnss_block/gps_l1_ca_pcps_multithread_acquisition_gsoc2013_test.cc"
+#include "arithmetic/cpu_multicorrelator_test.cc"
#if OPENCL_BLOCKS_TEST
#include "gnss_block/gps_l1_ca_pcps_opencl_acquisition_gsoc2013_test.cc"
#endif
+
+#if CUDA_BLOCKS_TEST
+ #include "arithmetic/gpu_multicorrelator_test.cc"
+#endif
+
#include "gnss_block/gps_l1_ca_pcps_quicksync_acquisition_gsoc2014_test.cc"
#include "gnss_block/gps_l1_ca_pcps_tong_acquisition_gsoc2013_test.cc"
#include "gnss_block/galileo_e1_pcps_ambiguous_acquisition_test.cc"