diff --git a/src/algorithms/tracking/libs/cuda_multicorrelator.cu b/src/algorithms/tracking/libs/cuda_multicorrelator.cu index 61dc3054e..78c3612f0 100644 --- a/src/algorithms/tracking/libs/cuda_multicorrelator.cu +++ b/src/algorithms/tracking/libs/cuda_multicorrelator.cu @@ -198,7 +198,7 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler( //********* ZERO COPY VERSION ************ // Set flag to enable zero copy access // Optimal in shared memory devices (like Jetson K1) - cudaSetDeviceFlags(cudaDeviceMapHost); + //cudaSetDeviceFlags(cudaDeviceMapHost); //******** CudaMalloc version *********** diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index ce8c40e43..08ad88e94 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -33,6 +33,7 @@ if(GTEST_INCLUDE_DIRS) set(GTEST_DIR_LOCAL true) endif(GTEST_INCLUDE_DIRS) + if(NOT ${GTEST_DIR_LOCAL}) # if GTEST_DIR is not defined, we download and build it set(gtest_RELEASE 1.7.0) @@ -86,6 +87,11 @@ endif(NOT ${GTEST_DIR_LOCAL}) set(GNSS_SDR_TEST_OPTIONAL_LIBS "") set(GNSS_SDR_TEST_OPTIONAL_HEADERS "") +if(ENABLE_CUDA) + set(GNSS_SDR_TEST_OPTIONAL_HEADERS ${GNSS_SDR_TEST_OPTIONAL_HEADERS} ${CUDA_INCLUDE_DIRS}) + set(GNSS_SDR_TEST_OPTIONAL_LIBS ${GNSS_SDR_TEST_OPTIONAL_LIBS} ${CUDA_LIBRARIES}) +endif(ENABLE_CUDA) + if(ENABLE_GPERFTOOLS) if(GPERFTOOLS_FOUND) set(GNSS_SDR_TEST_OPTIONAL_LIBS "${GNSS_SDR_TEST_OPTIONAL_LIBS};${GPERFTOOLS_LIBRARIES}") @@ -152,6 +158,10 @@ if(OPENCL_FOUND) add_definitions(-DOPENCL_BLOCKS_TEST=1) endif(OPENCL_FOUND) +if (ENABLE_CUDA) + add_definitions(-DCUDA_BLOCKS_TEST=1) +endif(ENABLE_CUDA) + add_definitions(-DTEST_PATH="${CMAKE_SOURCE_DIR}/src/tests/") diff --git a/src/tests/arithmetic/cpu_multicorrelator_test.cc b/src/tests/arithmetic/cpu_multicorrelator_test.cc new file mode 100644 index 000000000..d84b38f89 --- /dev/null +++ b/src/tests/arithmetic/cpu_multicorrelator_test.cc @@ -0,0 +1,167 @@ +/*! + * \file fft_length_test.cc + * \brief This file implements timing tests for the FFT. + * \author Carles Fernandez-Prades, 2016. cfernandez(at)cttc.es + * + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2016 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#include +#include +#include +#include +#include "cpu_multicorrelator.h" +#include "gps_sdr_signal_processing.h" +#include "GPS_L1_CA.h" + + +DEFINE_int32(cpu_multicorrelator_iterations_test, 1000, "Number of averaged iterations in CPU multicorrelator test timing test"); +DEFINE_int32(cpu_multicorrelator_max_threads_test, 12, "Number of maximum concurrent correlators in CPU multicorrelator test timing test"); + +void run_correlator_cpu(cpu_multicorrelator* correlator, + float d_rem_carrier_phase_rad, + float d_carrier_phase_step_rad, + float d_code_phase_step_chips, + float d_rem_code_phase_chips, + int correlation_size, + int d_n_correlator_taps) +{ + correlator->Carrier_wipeoff_multicorrelator_resampler(d_rem_carrier_phase_rad, + d_carrier_phase_step_rad, + d_code_phase_step_chips, + d_rem_code_phase_chips, + correlation_size); +} + +TEST(CPU_multicorrelator_test, MeasureExecutionTime) +{ + struct timeval tv; + int max_threads=FLAGS_cpu_multicorrelator_max_threads_test; + std::vector thread_pool; + cpu_multicorrelator* correlator_pool[max_threads]; + unsigned int correlation_sizes [3] = { 2048, 4096, 8192}; + double execution_times [3]; + + gr_complex* d_ca_code; + gr_complex* in_cpu; + gr_complex* d_correlator_outs; + + int d_n_correlator_taps=3; + int d_vector_length=correlation_sizes[2]; //max correlation size to allocate all the necessary memory + float* d_local_code_shift_chips; + + //allocate host memory + // Get space for a vector with the C/A code replica sampled 1x/chip + d_ca_code = static_cast(volk_malloc(static_cast(GPS_L1_CA_CODE_LENGTH_CHIPS) * sizeof(gr_complex), volk_get_alignment())); + in_cpu = static_cast(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment())); + + // correlator outputs (scalar) + d_n_correlator_taps = 3; // Early, Prompt, and Late + d_correlator_outs = static_cast(volk_malloc(d_n_correlator_taps*sizeof(gr_complex), volk_get_alignment())); + for (int n = 0; n < d_n_correlator_taps; n++) + { + d_correlator_outs[n] = gr_complex(0,0); + } + d_local_code_shift_chips = static_cast(volk_malloc(d_n_correlator_taps*sizeof(float), volk_get_alignment())); + // Set TAPs delay values [chips] + float d_early_late_spc_chips=0.5; + d_local_code_shift_chips[0] = - d_early_late_spc_chips; + d_local_code_shift_chips[1] = 0.0; + d_local_code_shift_chips[2] = d_early_late_spc_chips; + + //--- Perform initializations ------------------------------ + + //local code resampler on GPU + // generate local reference (1 sample per chip) + gps_l1_ca_code_gen_complex(d_ca_code, 1, 0); + // generate inut signal + for (int n=0;n<2*d_vector_length;n++) + { + in_cpu[n]=std::complex(static_cast (rand())/static_cast(RAND_MAX),static_cast (rand())/static_cast(RAND_MAX)); + } + + for (int n=0;ninit(d_vector_length, d_n_correlator_taps); + correlator_pool[n]->set_input_output_vectors(d_correlator_outs, in_cpu); + correlator_pool[n]->set_local_code_and_taps(static_cast(GPS_L1_CA_CODE_LENGTH_CHIPS), d_ca_code, d_local_code_shift_chips); + } + + float d_rem_carrier_phase_rad=0.0; + float d_carrier_phase_step_rad=0.1; + float d_code_phase_step_chips=0.3; + float d_rem_code_phase_chips=0.4; + + EXPECT_NO_THROW( + for(int correlation_sizes_idx = 0; correlation_sizes_idx < 3; correlation_sizes_idx++) + { + for(int current_max_threads=1; current_max_threads<(max_threads+1); current_max_threads++) + { + std::cout<<"Running "<(end - begin) / (1000000.0 * static_cast(FLAGS_cpu_multicorrelator_iterations_test)); + std::cout << "CPU Multicorrelator execution time for length=" << correlation_sizes[correlation_sizes_idx] << " : " << execution_times[correlation_sizes_idx] << " [s]" << std::endl; + + } + } + ); + + + volk_free(d_local_code_shift_chips); + volk_free(d_correlator_outs); + volk_free(d_ca_code); + volk_free(in_cpu); + + for (int n=0;nfree(); + delete(correlator_pool[n]); + } +} diff --git a/src/tests/arithmetic/gpu_multicorrelator_test.cc b/src/tests/arithmetic/gpu_multicorrelator_test.cc new file mode 100644 index 000000000..11c4fdef2 --- /dev/null +++ b/src/tests/arithmetic/gpu_multicorrelator_test.cc @@ -0,0 +1,166 @@ +/*! + * \file fft_length_test.cc + * \brief This file implements timing tests for the FFT. + * \author Carles Fernandez-Prades, 2016. cfernandez(at)cttc.es + * + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2016 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +#include +#include +#include +#include +#include +#include "cuda_multicorrelator.h" +#include "gps_sdr_signal_processing.h" +#include "GPS_L1_CA.h" + + +DEFINE_int32(gpu_multicorrelator_iterations_test, 1000, "Number of averaged iterations in GPU multicorrelator test timing test"); +DEFINE_int32(gpu_multicorrelator_max_threads_test, 12, "Number of maximum concurrent correlators in GPU multicorrelator test timing test"); + +void run_correlator_gpu(cuda_multicorrelator* correlator, + float d_rem_carrier_phase_rad, + float d_carrier_phase_step_rad, + float d_code_phase_step_chips, + float d_rem_code_phase_chips, + int correlation_size, + int d_n_correlator_taps) +{ + correlator->Carrier_wipeoff_multicorrelator_resampler_cuda(d_rem_carrier_phase_rad, + d_carrier_phase_step_rad, + d_code_phase_step_chips, + d_rem_code_phase_chips, + correlation_size, + d_n_correlator_taps); +} + +TEST(GPU_multicorrelator_test, MeasureExecutionTime) +{ + struct timeval tv; + int max_threads=FLAGS_gpu_multicorrelator_max_threads_test; + std::vector thread_pool; + cuda_multicorrelator* correlator_pool[max_threads]; + unsigned int correlation_sizes [3] = { 2048, 4096, 8192}; + double execution_times [3]; + + gr_complex* d_ca_code; + gr_complex* in_gpu; + gr_complex* d_correlator_outs; + + int d_n_correlator_taps=3; + int d_vector_length=correlation_sizes[2]; //max correlation size to allocate all the necessary memory + float* d_local_code_shift_chips; + // Set GPU flags + cudaSetDeviceFlags(cudaDeviceMapHost); + //allocate host memory + //pinned memory mode - use special function to get OS-pinned memory + d_n_correlator_taps = 3; // Early, Prompt, and Late + // Get space for a vector with the C/A code replica sampled 1x/chip + cudaHostAlloc((void**)&d_ca_code, (static_cast(GPS_L1_CA_CODE_LENGTH_CHIPS)* sizeof(gr_complex)), cudaHostAllocMapped | cudaHostAllocWriteCombined); + // Get space for the resampled early / prompt / late local replicas + cudaHostAlloc((void**)&d_local_code_shift_chips, d_n_correlator_taps * sizeof(float), cudaHostAllocMapped | cudaHostAllocWriteCombined); + cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped | cudaHostAllocWriteCombined); + // correlator outputs (scalar) + cudaHostAlloc((void**)&d_correlator_outs ,sizeof(gr_complex)*d_n_correlator_taps, cudaHostAllocMapped | cudaHostAllocWriteCombined ); + + //--- Perform initializations ------------------------------ + //local code resampler on GPU + // generate local reference (1 sample per chip) + gps_l1_ca_code_gen_complex(d_ca_code, 1, 0); + // generate inut signal + for (int n=0;n<2*d_vector_length;n++) + { + in_gpu[n]=std::complex(static_cast (rand())/static_cast(RAND_MAX),static_cast (rand())/static_cast(RAND_MAX)); + } + // Set TAPs delay values [chips] + float d_early_late_spc_chips=0.5; + d_local_code_shift_chips[0] = - d_early_late_spc_chips; + d_local_code_shift_chips[1] = 0.0; + d_local_code_shift_chips[2] = d_early_late_spc_chips; + for (int n=0;ninit_cuda_integrated_resampler(d_vector_length, GPS_L1_CA_CODE_LENGTH_CHIPS, d_n_correlator_taps); + correlator_pool[n]->set_input_output_vectors(d_correlator_outs, in_gpu); + } + + float d_rem_carrier_phase_rad=0.0; + float d_carrier_phase_step_rad=0.1; + float d_code_phase_step_chips=0.3; + float d_rem_code_phase_chips=0.4; + + EXPECT_NO_THROW( + for(int correlation_sizes_idx = 0; correlation_sizes_idx < 3; correlation_sizes_idx++) + { + for(int current_max_threads=1; current_max_threads<(max_threads+1); current_max_threads++) + { + std::cout<<"Running "<(end - begin) / (1000000.0 * static_cast(FLAGS_gpu_multicorrelator_iterations_test)); + std::cout << "GPU Multicorrelator execution time for length=" << correlation_sizes[correlation_sizes_idx] << " : " << execution_times[correlation_sizes_idx] << " [s]" << std::endl; + + } + } + ); + + cudaFreeHost(in_gpu); + cudaFreeHost(d_correlator_outs); + cudaFreeHost(d_local_code_shift_chips); + cudaFreeHost(d_ca_code); + + for (int n=0;nfree_cuda(); + delete(correlator_pool[n]); + } + + + +} diff --git a/src/tests/test_main.cc b/src/tests/test_main.cc index a3dabce30..5b3228f56 100644 --- a/src/tests/test_main.cc +++ b/src/tests/test_main.cc @@ -92,9 +92,15 @@ DECLARE_string(log_dir); #include "gnss_block/gps_l2_m_pcps_acquisition_test.cc" #include "gnss_block/gps_l1_ca_pcps_acquisition_gsoc2013_test.cc" //#include "gnss_block/gps_l1_ca_pcps_multithread_acquisition_gsoc2013_test.cc" +#include "arithmetic/cpu_multicorrelator_test.cc" #if OPENCL_BLOCKS_TEST #include "gnss_block/gps_l1_ca_pcps_opencl_acquisition_gsoc2013_test.cc" #endif + +#if CUDA_BLOCKS_TEST + #include "arithmetic/gpu_multicorrelator_test.cc" +#endif + #include "gnss_block/gps_l1_ca_pcps_quicksync_acquisition_gsoc2014_test.cc" #include "gnss_block/gps_l1_ca_pcps_tong_acquisition_gsoc2013_test.cc" #include "gnss_block/galileo_e1_pcps_ambiguous_acquisition_test.cc"