/* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors) * * This file is part of GNSS-SDR. * * GNSS-SDR is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * GNSS-SDR is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNSS-SDR. If not, see . */ #include "qa_utils.h" #include #include #include #include #include #include #include #include #include #include #include #include #include float uniform() { return 2.0f * ((float) rand() / RAND_MAX - 0.5f); // uniformly (-1, 1) } template void random_floats (t *buf, unsigned n) { for (unsigned i = 0; i < n; i++) buf[i] = uniform (); } void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n) { if(type.is_complex) n *= 2; if(type.is_float) { if(type.size == 8) random_floats((double *)data, n); else random_floats((float *)data, n); } else { float int_max = float(uint64_t(2) << (type.size*8)); if(type.is_signed) int_max /= 2.0; for(unsigned int i = 0; i < n; i++) { float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / static_cast((RAND_MAX/2))) * int_max; //man i really don't know how to do this in a more clever way, you have to cast down at some point switch(type.size) { case 8: if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand; else ((uint64_t *)data)[i] = (uint64_t) scaled_rand; break; case 4: if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand; else ((uint32_t *)data)[i] = (uint32_t) scaled_rand; break; case 2: // 16 bit multiplication saturates very fast // we produce here only 3 bits input range if(type.is_signed) ((int16_t *)data)[i] = (int16_t)((int16_t) scaled_rand % 8); else ((uint16_t *)data)[i] = (uint16_t) (int16_t)((int16_t) scaled_rand % 8); break; case 1: if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand; else ((uint8_t *)data)[i] = (uint8_t) scaled_rand; break; default: throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here } } } } static std::vector get_arch_list(volk_gnsssdr_func_desc_t desc) { std::vector archlist; for(size_t i = 0; i < desc.n_impls; i++) { archlist.push_back(std::string(desc.impl_names[i])); } return archlist; } volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) { volk_gnsssdr_type_t type; type.is_float = false; type.is_scalar = false; type.is_complex = false; type.is_signed = false; type.size = 0; type.str = name; if(name.size() < 2) { throw std::string("name too short to be a datatype"); } //is it a scalar? if(name[0] == 's') { type.is_scalar = true; name = name.substr(1, name.size()-1); } //get the data size size_t last_size_pos = name.find_last_of("0123456789"); if(last_size_pos == std::string::npos) { throw std::string("no size spec in type ").append(name); } //will throw if malformed int size = boost::lexical_cast(name.substr(0, last_size_pos+1)); assert(((size % 8) == 0) && (size <= 64) && (size != 0)); type.size = size/8; //in bytes for(size_t i=last_size_pos+1; i < name.size(); i++) { switch (name[i]) { case 'f': type.is_float = true; break; case 'i': type.is_signed = true; break; case 'c': type.is_complex = true; break; case 'u': type.is_signed = false; break; default: throw; } } return type; } static void get_signatures_from_name(std::vector &inputsig, std::vector &outputsig, std::string name) { boost::char_separator sep("_"); boost::tokenizer > tok(name, sep); std::vector toked; tok.assign(name); toked.assign(tok.begin(), tok.end()); assert(toked[0] == "volk"); toked.erase(toked.begin()); toked.erase(toked.begin()); //ok. we're assuming a string in the form //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT; std::string fn_name; volk_gnsssdr_type_t type; BOOST_FOREACH(std::string token, toked) { try { type = volk_gnsssdr_type_from_string(token); if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name... if(side == SIDE_INPUT) inputsig.push_back(type); else outputsig.push_back(type); } catch (...){ if(token[0] == 'x' && (token.size() > 1) && (token[1] > '0' || token[1] < '9')) { if(side == SIDE_INPUT) assert(inputsig.size() > 0); else assert(outputsig.size() > 0); int multiplier = boost::lexical_cast(token.substr(1, token.size()-1)); //will throw if invalid /////////// for(int i=1; i &buffs, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], vlen, arch.c_str()); } inline void run_cast_test2(volk_gnsssdr_fn_2arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str()); } inline void run_cast_test3(volk_gnsssdr_fn_3arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str()); } inline void run_cast_test4(volk_gnsssdr_fn_4arg func, std::vector &buffs, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str()); } inline void run_cast_test1_s32f(volk_gnsssdr_fn_1arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); } inline void run_cast_test2_s32f(volk_gnsssdr_fn_2arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); } inline void run_cast_test3_s32f(volk_gnsssdr_fn_3arg_s32f func, std::vector &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); } inline void run_cast_test1_s32fc(volk_gnsssdr_fn_1arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); } inline void run_cast_test2_s32fc(volk_gnsssdr_fn_2arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); } inline void run_cast_test3_s32fc(volk_gnsssdr_fn_3arg_s32fc func, std::vector &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); } // *************** ADDED BY GNSS-SDR. START inline void run_cast_test1_s8i(volk_gnsssdr_fn_1arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); } inline void run_cast_test2_s8i(volk_gnsssdr_fn_2arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); } inline void run_cast_test3_s8i(volk_gnsssdr_fn_3arg_s8i func, std::vector &buffs, char scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); } inline void run_cast_test1_s8ic(volk_gnsssdr_fn_1arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); } inline void run_cast_test2_s8ic(volk_gnsssdr_fn_2arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); } inline void run_cast_test3_s8ic(volk_gnsssdr_fn_3arg_s8ic func, std::vector &buffs, lv_8sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); } inline void run_cast_test1_s16ic(volk_gnsssdr_fn_1arg_s16ic func, std::vector &buffs, lv_16sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], scalar, vlen, arch.c_str()); } inline void run_cast_test2_s16ic(volk_gnsssdr_fn_2arg_s16ic func, std::vector &buffs, lv_16sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); } inline void run_cast_test3_s16ic(volk_gnsssdr_fn_3arg_s16ic func, std::vector &buffs, lv_16sc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) { while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); } // *************** ADDED BY GNSS-SDR. END template bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) { bool fail = false; int print_max_errs = 10; for(unsigned int i=0; i tol ) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]); std::cout << " tolerance was: " << tol << std::endl; } } } // the primary test is the percent different greater than given tol else if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/fabs(((t *)in1)[i]) > tol) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]); std::cout << " tolerance was: " << tol << std::endl; } } } return fail; } template bool ccompare(t *in1, t *in2, unsigned int vlen, float tol) { bool fail = false; int print_max_errs = 10; for(unsigned int i=0; i<2*vlen; i+=2) { t diff[2] = { in1[i] - in2[i], in1[i+1] - in2[i+1] }; t err = std::sqrt(diff[0] * diff[0] + diff[1] * diff[1]); t norm = std::sqrt(in1[i] * in1[i] + in1[i+1] * in1[i+1]); // for very small numbers we'll see round off errors due to limited // precision. So a special test case... if (norm < 1e-30) { if (err > tol) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i/2 << " in1: " << in1[i] << " + " << in1[i+1] << "j in2: " << in2[i] << " + " << in2[i+1] << "j"; std::cout << " tolerance was: " << tol << std::endl; } } } // the primary test is the percent different greater than given tol else if((err / norm) > tol) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i/2 << " in1: " << in1[i] << " + " << in1[i+1] << "j in2: " << in2[i] << " + " << in2[i+1] << "j"; std::cout << " tolerance was: " << tol << std::endl; } } } return fail; } template bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) { bool fail = false; int print_max_errs = 10; for(unsigned int i=0; i tol) { fail=true; if(print_max_errs-- > 0) { std::cout << "offset " << i << " in1: " << static_cast(t(((t *)(in1))[i])) << " in2: " << static_cast(t(((t *)(in2))[i])); std::cout << " tolerance was: " << tol << std::endl; } } } return fail; } class volk_gnsssdr_qa_aligned_mem_pool{ public: void *get_new(size_t size){ size_t alignment = volk_gnsssdr_get_alignment(); void* ptr = volk_gnsssdr_malloc(size, alignment); memset(ptr, 0x00, size); _mems.push_back(ptr); return ptr; } ~volk_gnsssdr_qa_aligned_mem_pool() { for(unsigned int ii = 0; ii < _mems.size(); ++ii) { volk_gnsssdr_free(_mems[ii]); } } private: std::vector _mems; }; bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, void (*manual_func)(), std::string name, volk_gnsssdr_test_params_t test_params, std::vector *results, std::string puppet_master_name ) { return run_volk_gnsssdr_tests(desc, manual_func, name, test_params.tol(), test_params.scalar(), test_params.vlen(), test_params.iter(), results, puppet_master_name, test_params.benchmark_mode()); } bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc, void (*manual_func)(), std::string name, float tol, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::vector *results, std::string puppet_master_name, bool benchmark_mode) { // Initialize this entry in results vector results->push_back(volk_gnsssdr_test_results_t()); results->back().name = name; results->back().vlen = vlen; results->back().iter = iter; std::cout << "RUN_VOLK_GNSSSDR_TESTS: " << name << "(" << vlen << "," << iter << ")" << std::endl; // vlen_twiddle will increase vlen for malloc and data generation // but kernels will still be called with the user provided vlen. // This is useful for causing errors in kernels that do bad reads const unsigned int vlen_twiddle = 5; vlen = vlen + vlen_twiddle; const float tol_f = tol; const unsigned int tol_i = static_cast(tol); //first let's get a list of available architectures for the test std::vector arch_list = get_arch_list(desc); if((!benchmark_mode) && (arch_list.size() < 2)) { std::cout << "no architectures to test" << std::endl; return false; } //something that can hang onto memory and cleanup when this function exits volk_gnsssdr_qa_aligned_mem_pool mem_pool; //now we have to get a function signature by parsing the name std::vector inputsig, outputsig; try { get_signatures_from_name(inputsig, outputsig, name); } catch (boost::bad_lexical_cast& error) { std::cerr << "Error: unable to get function signature from kernel name" << std::endl; std::cerr << " - " << name << std::endl; return false; } //pull the input scalars into their own vector std::vector inputsc; for(size_t i=0; i inbuffs; BOOST_FOREACH(volk_gnsssdr_type_t sig, inputsig) { if(!sig.is_scalar) //we don't make buffers for scalars inbuffs.push_back(mem_pool.get_new(vlen*sig.size*(sig.is_complex ? 2 : 1))); } for(size_t i=0; i > test_data; for(size_t i=0; i arch_buffs; for(size_t j=0; j both_sigs; both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); //now run the test vlen = vlen - vlen_twiddle; clock_t start, end; std::vector profile_times; for(size_t i = 0; i < arch_list.size(); i++) { start = clock(); switch(both_sigs.size()) { case 1: if(inputsc.size() == 0) { run_cast_test1((volk_gnsssdr_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { if(inputsc[0].is_complex) { run_cast_test1_s32fc((volk_gnsssdr_fn_1arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else { run_cast_test1_s32f((volk_gnsssdr_fn_1arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); } } //ADDED BY GNSS-SDR. START else if(inputsc.size() == 1 && !inputsc[0].is_float) { if(inputsc[0].is_complex) { if(inputsc[0].size == 2) { run_cast_test1_s16ic((volk_gnsssdr_fn_1arg_s16ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else { run_cast_test1_s8ic((volk_gnsssdr_fn_1arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } } else { run_cast_test1_s8i((volk_gnsssdr_fn_1arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); } } //ADDED BY GNSS-SDR. END else throw "unsupported 1 arg function >1 scalars"; break; case 2: if(inputsc.size() == 0) { run_cast_test2((volk_gnsssdr_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { if(inputsc[0].is_complex) { run_cast_test2_s32fc((volk_gnsssdr_fn_2arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else { run_cast_test2_s32f((volk_gnsssdr_fn_2arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); } } //ADDED BY GNSS-SDR. START else if(inputsc.size() == 1 && !inputsc[0].is_float) { if(inputsc[0].is_complex) { if(inputsc[0].size == 2) { run_cast_test2_s16ic((volk_gnsssdr_fn_2arg_s16ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else { run_cast_test2_s8ic((volk_gnsssdr_fn_2arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } } else { run_cast_test2_s8i((volk_gnsssdr_fn_2arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); } } //ADDED BY GNSS-SDR. END else throw "unsupported 2 arg function >1 scalars"; break; case 3: if(inputsc.size() == 0) { run_cast_test3((volk_gnsssdr_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); } else if(inputsc.size() == 1 && inputsc[0].is_float) { if(inputsc[0].is_complex) { run_cast_test3_s32fc((volk_gnsssdr_fn_3arg_s32fc)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else { run_cast_test3_s32f((volk_gnsssdr_fn_3arg_s32f)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); } } //ADDED BY GNSS-SDR. START else if(inputsc.size() == 1 && !inputsc[0].is_float) { if(inputsc[0].is_complex) { { if(inputsc[0].size == 4) { run_cast_test3_s16ic((volk_gnsssdr_fn_3arg_s16ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } else { run_cast_test3_s8ic((volk_gnsssdr_fn_3arg_s8ic)(manual_func), test_data[i], scalar, vlen, iter, arch_list[i]); } } } else { run_cast_test3_s8i((volk_gnsssdr_fn_3arg_s8i)(manual_func), test_data[i], scalar.real(), vlen, iter, arch_list[i]); } } //ADDED BY GNSS-SDR. END else throw "unsupported 3 arg function >1 scalars"; break; default: throw "no function handler for this signature"; break; } end = clock(); double arch_time = 1000.0 * (double)(end-start)/(double)CLOCKS_PER_SEC; std::cout << arch_list[i] << " completed in " << arch_time << "ms" << std::endl; volk_gnsssdr_test_time_t result; result.name = arch_list[i]; result.time = arch_time; result.units = "ms"; result.pass = true; results->back().results[result.name] = result; profile_times.push_back(arch_time); } //and now compare each output to the generic output //first we have to know which output is the generic one, they aren't in order... size_t generic_offset=0; for(size_t i=0; i arch_results; for(size_t i = 0; i < arch_list.size(); i++) { fail = false; if(i != generic_offset) { for(size_t j=0; jback().results[arch_list[i]]; result->pass = !fail; fail_global = true; std::cout << name << ": fail on arch " << arch_list[i] << std::endl; } } } arch_results.push_back(!fail); } double best_time_a = std::numeric_limits::max(); double best_time_u = std::numeric_limits::max(); std::string best_arch_a = "generic"; std::string best_arch_u = "generic"; for(size_t i=0; i < arch_list.size(); i++) { if((profile_times[i] < best_time_u) && arch_results[i] && desc.impl_alignment[i] == 0) { best_time_u = profile_times[i]; best_arch_u = arch_list[i]; } if((profile_times[i] < best_time_a) && arch_results[i]) { best_time_a = profile_times[i]; best_arch_a = arch_list[i]; } } std::cout << "Best aligned arch: " << best_arch_a << std::endl; std::cout << "Best unaligned arch: " << best_arch_u << std::endl; if(puppet_master_name == "NULL") { results->back().config_name = name; } else { results->back().config_name = puppet_master_name; } results->back().best_arch_a = best_arch_a; results->back().best_arch_u = best_arch_u; return fail_global; }