mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-12-15 20:50:33 +00:00
Apply automated code formatting to volk-gnsssdr
See http://gnss-sdr.org/coding-style/#use-tools-for-automated-code-formatting
This commit is contained in:
parent
f924005733
commit
891478cf2c
@ -54,8 +54,10 @@ int main(int argc, char **argv)
|
||||
our_options.add(option_t("cc", "", "print the VOLK_GNSSDR C compiler version", volk_gnsssdr_c_compiler()));
|
||||
our_options.add(option_t("cflags", "", "print the VOLK_GNSSSDR CFLAGS", volk_gnsssdr_compiler_flags()));
|
||||
our_options.add(option_t("all-machines", "", "print VOLK_GNSSSDR machines built", volk_gnsssdr_available_machines()));
|
||||
our_options.add(option_t("avail-machines", "", "print VOLK_GNSSSDR machines on the current "
|
||||
"platform", volk_gnsssdr_list_machines));
|
||||
our_options.add(option_t("avail-machines", "",
|
||||
"print VOLK_GNSSSDR machines on the current "
|
||||
"platform",
|
||||
volk_gnsssdr_list_machines));
|
||||
our_options.add(option_t("machine", "", "print the current VOLK_GNSSSDR machine that will be used",
|
||||
volk_gnsssdr_get_machine()));
|
||||
our_options.add(option_t("alignment", "", "print the memory alignment", print_alignment));
|
||||
|
@ -25,7 +25,6 @@
|
||||
#include <utility> // for pair
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Option type
|
||||
*/
|
||||
@ -70,55 +69,74 @@ option_t::option_t(std::string longform, std::string shortform, std::string msg,
|
||||
* Option List
|
||||
*/
|
||||
|
||||
option_list::option_list(std::string program_name) :
|
||||
program_name(program_name) {
|
||||
{ internal_list = std::vector<option_t>(); }
|
||||
option_list::option_list(std::string program_name) : program_name(program_name)
|
||||
{
|
||||
{
|
||||
internal_list = std::vector<option_t>();
|
||||
}
|
||||
}
|
||||
|
||||
void option_list::add(const option_t &opt) { internal_list.push_back(opt); }
|
||||
|
||||
void option_list::parse(int argc, char **argv) {
|
||||
for (int arg_number = 0; arg_number < argc; ++arg_number) {
|
||||
void option_list::parse(int argc, char **argv)
|
||||
{
|
||||
for (int arg_number = 0; arg_number < argc; ++arg_number)
|
||||
{
|
||||
for (std::vector<option_t>::iterator this_option = internal_list.begin();
|
||||
this_option != internal_list.end();
|
||||
this_option++) {
|
||||
this_option++)
|
||||
{
|
||||
if (this_option->longform == std::string(argv[arg_number]) ||
|
||||
this_option->shortform == std::string(argv[arg_number])) {
|
||||
switch (this_option->option_type) {
|
||||
this_option->shortform == std::string(argv[arg_number]))
|
||||
{
|
||||
switch (this_option->option_type)
|
||||
{
|
||||
case VOID_CALLBACK:
|
||||
this_option->callback();
|
||||
break;
|
||||
case INT_CALLBACK:
|
||||
try {
|
||||
try
|
||||
{
|
||||
int int_val = std::stoi(argv[++arg_number]);
|
||||
((void (*)(int))this_option->callback)(int_val);
|
||||
} catch (std::exception &exc) {
|
||||
}
|
||||
catch (std::exception &exc)
|
||||
{
|
||||
std::cout << "An int option can only receive a number" << std::endl;
|
||||
throw std::exception();
|
||||
};
|
||||
break;
|
||||
case FLOAT_CALLBACK:
|
||||
try {
|
||||
try
|
||||
{
|
||||
int int_val = std::stof(argv[++arg_number]);
|
||||
((void (*)(float))this_option->callback)(int_val);
|
||||
} catch (std::exception &exc) {
|
||||
}
|
||||
catch (std::exception &exc)
|
||||
{
|
||||
std::cout << "A float option can only receive a number" << std::endl;
|
||||
throw std::exception();
|
||||
};
|
||||
break;
|
||||
case BOOL_CALLBACK:
|
||||
try {
|
||||
try
|
||||
{
|
||||
bool int_val = (bool)std::stoi(argv[++arg_number]);
|
||||
((void (*)(bool))this_option->callback)(int_val);
|
||||
} catch (std::exception &exc) {
|
||||
}
|
||||
catch (std::exception &exc)
|
||||
{
|
||||
std::cout << "A bool option can only receive 0 or 1" << std::endl;
|
||||
throw std::exception();
|
||||
};
|
||||
break;
|
||||
case STRING_CALLBACK:
|
||||
try {
|
||||
try
|
||||
{
|
||||
((void (*)(std::string))this_option->callback)(argv[++arg_number]);
|
||||
} catch (std::exception &exc) {
|
||||
}
|
||||
catch (std::exception &exc)
|
||||
{
|
||||
throw std::exception();
|
||||
};
|
||||
break;
|
||||
@ -132,26 +150,33 @@ void option_list::parse(int argc, char **argv) {
|
||||
}
|
||||
}
|
||||
if (std::string("--help") == std::string(argv[arg_number]) ||
|
||||
std::string("-h") == std::string(argv[arg_number])) {
|
||||
std::string("-h") == std::string(argv[arg_number]))
|
||||
{
|
||||
help();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void option_list::help() {
|
||||
void option_list::help()
|
||||
{
|
||||
std::cout << program_name << std::endl;
|
||||
std::cout << " -h [ --help ] \t\tDisplay this help message" << std::endl;
|
||||
for (std::vector<option_t>::iterator this_option = internal_list.begin();
|
||||
this_option != internal_list.end();
|
||||
this_option++) {
|
||||
this_option++)
|
||||
{
|
||||
std::string help_line(" ");
|
||||
if (this_option->shortform == "-") {
|
||||
if (this_option->shortform == "-")
|
||||
{
|
||||
help_line += this_option->longform + " ";
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
help_line += this_option->shortform + " [ " + this_option->longform + " ]";
|
||||
}
|
||||
|
||||
switch (help_line.size() / 8) {
|
||||
switch (help_line.size() / 8)
|
||||
{
|
||||
case 0:
|
||||
help_line += "\t\t\t\t";
|
||||
break;
|
||||
|
@ -36,7 +36,8 @@ typedef enum
|
||||
STRING,
|
||||
} VOLK_OPTYPE;
|
||||
|
||||
class option_t {
|
||||
class option_t
|
||||
{
|
||||
public:
|
||||
option_t(std::string longform, std::string shortform, std::string msg, void (*callback)());
|
||||
option_t(std::string longform, std::string shortform, std::string msg, void (*callback)(int));
|
||||
@ -51,7 +52,6 @@ public:
|
||||
VOLK_OPTYPE option_type;
|
||||
std::string printval;
|
||||
void (*callback)();
|
||||
|
||||
};
|
||||
|
||||
class option_list
|
||||
@ -64,6 +64,7 @@ public:
|
||||
void parse(int argc, char **argv);
|
||||
|
||||
void help();
|
||||
|
||||
private:
|
||||
std::string program_name;
|
||||
std::vector<option_t> internal_list;
|
||||
|
@ -34,7 +34,6 @@
|
||||
#include <vector> // for vector, vector<>::const_..
|
||||
|
||||
|
||||
|
||||
namespace fs = boost::filesystem;
|
||||
|
||||
volk_gnsssdr_test_params_t test_params(1e-6f, 327.f, 8111, 1987, false, "");
|
||||
@ -75,9 +74,11 @@ int main(int argc, char *argv[])
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (int arg_number = 0; arg_number < argc; ++arg_number) {
|
||||
for (int arg_number = 0; arg_number < argc; ++arg_number)
|
||||
{
|
||||
if (std::string("--help") == std::string(argv[arg_number]) ||
|
||||
std::string("-h") == std::string(argv[arg_number])) {
|
||||
std::string("-h") == std::string(argv[arg_number]))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -85,19 +86,24 @@ int main(int argc, char *argv[])
|
||||
std::ofstream json_file;
|
||||
std::string config_file;
|
||||
|
||||
if ( json_filename != "" ) {
|
||||
if (json_filename != "")
|
||||
{
|
||||
json_file.open(json_filename.c_str());
|
||||
}
|
||||
|
||||
if ( volk_config_path != "" ) {
|
||||
if (volk_config_path != "")
|
||||
{
|
||||
config_file = volk_config_path + "/volk_config";
|
||||
}
|
||||
|
||||
// Run tests
|
||||
std::vector<volk_gnsssdr_test_results_t> results;
|
||||
if(update_mode) {
|
||||
if( config_file != "" ) read_results(&results, config_file);
|
||||
else read_results(&results);
|
||||
if (update_mode)
|
||||
{
|
||||
if (config_file != "")
|
||||
read_results(&results, config_file);
|
||||
else
|
||||
read_results(&results);
|
||||
}
|
||||
|
||||
// Initialize the list of tests
|
||||
@ -105,35 +111,43 @@ int main(int argc, char *argv[])
|
||||
|
||||
// Iterate through list of tests running each one
|
||||
std::string substr_to_match(test_params.kernel_regex());
|
||||
for(unsigned int ii = 0; ii < test_cases.size(); ++ii) {
|
||||
for (unsigned int ii = 0; ii < test_cases.size(); ++ii)
|
||||
{
|
||||
bool regex_match = true;
|
||||
|
||||
volk_gnsssdr_test_case_t test_case = test_cases[ii];
|
||||
// if the kernel name matches regex then do the test
|
||||
std::string test_case_name = test_case.name();
|
||||
if(test_case_name.find(substr_to_match) == std::string::npos) {
|
||||
if (test_case_name.find(substr_to_match) == std::string::npos)
|
||||
{
|
||||
regex_match = false;
|
||||
}
|
||||
|
||||
// if we are in update mode check if we've already got results
|
||||
// if we have any, then no need to test that kernel
|
||||
bool update = true;
|
||||
if(update_mode) {
|
||||
for(unsigned int jj=0; jj < results.size(); ++jj) {
|
||||
if (update_mode)
|
||||
{
|
||||
for (unsigned int jj = 0; jj < results.size(); ++jj)
|
||||
{
|
||||
if (results[jj].name == test_case.name() ||
|
||||
results[jj].name == test_case.puppet_master_name()) {
|
||||
results[jj].name == test_case.puppet_master_name())
|
||||
{
|
||||
update = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( regex_match && update ) {
|
||||
try {
|
||||
if (regex_match && update)
|
||||
{
|
||||
try
|
||||
{
|
||||
run_volk_gnsssdr_tests(test_case.desc(), test_case.kernel_ptr(), test_case.name(),
|
||||
test_case.test_parameters(), &results, test_case.puppet_master_name());
|
||||
}
|
||||
catch (std::string &error) {
|
||||
catch (std::string &error)
|
||||
{
|
||||
std::cerr << "Caught Exception in 'run_volk_gnssdr_tests': " << error << std::endl;
|
||||
}
|
||||
}
|
||||
@ -141,16 +155,21 @@ int main(int argc, char *argv[])
|
||||
|
||||
|
||||
// Output results according to provided options
|
||||
if(json_filename != "") {
|
||||
if (json_filename != "")
|
||||
{
|
||||
write_json(json_file, results);
|
||||
json_file.close();
|
||||
}
|
||||
|
||||
if(!dry_run) {
|
||||
if(config_file != "") write_results(&results, false, config_file);
|
||||
else write_results(&results, false);
|
||||
if (!dry_run)
|
||||
{
|
||||
if (config_file != "")
|
||||
write_results(&results, false, config_file);
|
||||
else
|
||||
write_results(&results, false);
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
std::cout << "Warning: this was a dry-run. Config not generated" << std::endl;
|
||||
}
|
||||
}
|
||||
@ -169,11 +188,13 @@ void read_results(std::vector<volk_gnsssdr_test_results_t> *results, std::string
|
||||
struct stat buffer;
|
||||
bool config_status = (stat(path.c_str(), &buffer) == 0);
|
||||
|
||||
if( config_status ) {
|
||||
if (config_status)
|
||||
{
|
||||
// a config exists and we are reading results from it
|
||||
std::ifstream config(path.c_str());
|
||||
char config_line[256];
|
||||
while(config.getline(config_line, 255)) {
|
||||
while (config.getline(config_line, 255))
|
||||
{
|
||||
// tokenize the input line by kernel_name unaligned aligned
|
||||
// then push back in the results vector with fields filled in
|
||||
|
||||
@ -184,13 +205,15 @@ void read_results(std::vector<volk_gnsssdr_test_results_t> *results, std::string
|
||||
|
||||
found = config_str.find(' ');
|
||||
// Split line by spaces
|
||||
while(found && found < str_size) {
|
||||
while (found && found < str_size)
|
||||
{
|
||||
found = config_str.find(' ');
|
||||
// kernel names MUST be less than 128 chars, which is
|
||||
// a length restricted by volk/volk_prefs.c
|
||||
// on the last token in the parsed string we won't find a space
|
||||
// so make sure we copy at most 128 chars.
|
||||
if(found > 127) {
|
||||
if (found > 127)
|
||||
{
|
||||
found = 127;
|
||||
}
|
||||
str_size = config_str.size();
|
||||
@ -201,7 +224,8 @@ void read_results(std::vector<volk_gnsssdr_test_results_t> *results, std::string
|
||||
config_str.erase(0, found + 1);
|
||||
}
|
||||
|
||||
if(single_kernel_result.size() == 3) {
|
||||
if (single_kernel_result.size() == 3)
|
||||
{
|
||||
volk_gnsssdr_test_results_t kernel_result;
|
||||
kernel_result.name = std::string(single_kernel_result[0]);
|
||||
kernel_result.config_name = std::string(single_kernel_result[0]);
|
||||
@ -211,7 +235,6 @@ void read_results(std::vector<volk_gnsssdr_test_results_t> *results, std::string
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void write_results(const std::vector<volk_gnsssdr_test_results_t> *results, bool update_result)
|
||||
@ -234,17 +257,21 @@ void write_results(const std::vector<volk_gnsssdr_test_results_t> *results, bool
|
||||
}
|
||||
|
||||
std::ofstream config;
|
||||
if(update_result) {
|
||||
if (update_result)
|
||||
{
|
||||
std::cout << "Updating " << path << " ..." << std::endl;
|
||||
config.open(path.c_str(), std::ofstream::app);
|
||||
if (!config.is_open()) { //either we don't have write access or we don't have the dir yet
|
||||
if (!config.is_open())
|
||||
{ //either we don't have write access or we don't have the dir yet
|
||||
std::cout << "Error opening file " << path << std::endl;
|
||||
}
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
std::cout << "Writing " << path << " ..." << std::endl;
|
||||
config.open(path.c_str());
|
||||
if (!config.is_open()) { //either we don't have write access or we don't have the dir yet
|
||||
if (!config.is_open())
|
||||
{ //either we don't have write access or we don't have the dir yet
|
||||
std::cout << "Error opening file " << path << std::endl;
|
||||
}
|
||||
|
||||
@ -255,7 +282,8 @@ void write_results(const std::vector<volk_gnsssdr_test_results_t> *results, bool
|
||||
}
|
||||
|
||||
std::vector<volk_gnsssdr_test_results_t>::const_iterator profile_results;
|
||||
for(profile_results = results->begin(); profile_results != results->end(); ++profile_results) {
|
||||
for (profile_results = results->begin(); profile_results != results->end(); ++profile_results)
|
||||
{
|
||||
config << profile_results->config_name << " "
|
||||
<< profile_results->best_arch_a << " "
|
||||
<< profile_results->best_arch_u << std::endl;
|
||||
@ -270,7 +298,8 @@ void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_
|
||||
size_t len = results.size();
|
||||
size_t i = 0;
|
||||
std::vector<volk_gnsssdr_test_results_t>::iterator result;
|
||||
for(result = results.begin(); result != results.end(); ++result) {
|
||||
for (result = results.begin(); result != results.end(); ++result)
|
||||
{
|
||||
json_file << " {" << std::endl;
|
||||
json_file << " \"name\": \"" << result->name << "\"," << std::endl;
|
||||
json_file << " \"vlen\": " << (int)(result->vlen) << "," << std::endl;
|
||||
@ -284,14 +313,16 @@ void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_
|
||||
size_t ri = 0;
|
||||
|
||||
std::map<std::string, volk_gnsssdr_test_time_t>::iterator kernel_time_pair;
|
||||
for(kernel_time_pair = result->results.begin(); kernel_time_pair != result->results.end(); ++kernel_time_pair) {
|
||||
for (kernel_time_pair = result->results.begin(); kernel_time_pair != result->results.end(); ++kernel_time_pair)
|
||||
{
|
||||
volk_gnsssdr_test_time_t time = kernel_time_pair->second;
|
||||
json_file << " \"" << time.name << "\": {" << std::endl;
|
||||
json_file << " \"name\": \"" << time.name << "\"," << std::endl;
|
||||
json_file << " \"time\": " << time.time << "," << std::endl;
|
||||
json_file << " \"units\": \"" << time.units << "\"" << std::endl;
|
||||
json_file << " }";
|
||||
if(ri+1 != results_len) {
|
||||
if (ri + 1 != results_len)
|
||||
{
|
||||
json_file << ",";
|
||||
}
|
||||
json_file << std::endl;
|
||||
@ -299,7 +330,8 @@ void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_
|
||||
}
|
||||
json_file << " }" << std::endl;
|
||||
json_file << " }";
|
||||
if(i+1 != len) {
|
||||
if (i + 1 != len)
|
||||
{
|
||||
json_file << ",";
|
||||
}
|
||||
json_file << std::endl;
|
||||
@ -308,5 +340,3 @@ void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_
|
||||
json_file << " ]" << std::endl;
|
||||
json_file << "}" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
@ -40,19 +40,22 @@ _mm256_complexmul_ps(__m256 x, __m256 y)
|
||||
}
|
||||
|
||||
static inline __m256
|
||||
_mm256_conjugate_ps(__m256 x){
|
||||
_mm256_conjugate_ps(__m256 x)
|
||||
{
|
||||
const __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
|
||||
return _mm256_xor_ps(x, conjugator); // conjugate y
|
||||
}
|
||||
|
||||
static inline __m256
|
||||
_mm256_complexconjugatemul_ps(__m256 x, __m256 y){
|
||||
_mm256_complexconjugatemul_ps(__m256 x, __m256 y)
|
||||
{
|
||||
y = _mm256_conjugate_ps(y);
|
||||
return _mm256_complexmul_ps(x, y);
|
||||
}
|
||||
|
||||
static inline __m256
|
||||
_mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2){
|
||||
_mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2)
|
||||
{
|
||||
__m256 complex1, complex2;
|
||||
cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
|
||||
cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
|
||||
@ -61,7 +64,8 @@ _mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2){
|
||||
return _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
|
||||
}
|
||||
|
||||
static inline __m256 _mm256_complexnormalise_ps( __m256 z ){
|
||||
static inline __m256 _mm256_complexnormalise_ps(__m256 z)
|
||||
{
|
||||
__m256 tmp1 = _mm256_mul_ps(z, z);
|
||||
__m256 tmp2 = _mm256_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
@ -70,7 +74,8 @@ static inline __m256 _mm256_complexnormalise_ps( __m256 z ){
|
||||
}
|
||||
|
||||
static inline __m256
|
||||
_mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2){
|
||||
_mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2)
|
||||
{
|
||||
return _mm256_sqrt_ps(_mm256_magnitudesquared_ps(cplxValue1, cplxValue2));
|
||||
}
|
||||
|
||||
|
@ -91,7 +91,9 @@
|
||||
// FIXME: due to the usage of complex.h, require gcc for c-linkage
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
#if defined(__cplusplus) && (__GNUC__)
|
||||
# define __VOLK_DECL_BEGIN extern "C" {
|
||||
#define __VOLK_DECL_BEGIN \
|
||||
extern "C" \
|
||||
{
|
||||
#define __VOLK_DECL_END }
|
||||
#else
|
||||
#define __VOLK_DECL_BEGIN
|
||||
@ -121,7 +123,8 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
union bit128{
|
||||
union bit128
|
||||
{
|
||||
uint8_t i8[16];
|
||||
uint16_t i16[8];
|
||||
uint32_t i[4];
|
||||
@ -138,7 +141,8 @@ union bit128{
|
||||
#endif
|
||||
};
|
||||
|
||||
union bit256{
|
||||
union bit256
|
||||
{
|
||||
uint8_t i8[32];
|
||||
uint16_t i16[16];
|
||||
uint32_t i[8];
|
||||
|
@ -55,19 +55,27 @@ typedef std::complex<int64_t> lv_64sc_t;
|
||||
typedef std::complex<float> lv_32fc_t;
|
||||
typedef std::complex<double> lv_64fc_t;
|
||||
|
||||
template <typename T> inline std::complex<T> lv_cmake(const T &r, const T &i){
|
||||
template <typename T>
|
||||
inline std::complex<T> lv_cmake(const T &r, const T &i)
|
||||
{
|
||||
return std::complex<T>(r, i);
|
||||
}
|
||||
|
||||
template <typename T> inline typename T::value_type lv_creal(const T &x){
|
||||
template <typename T>
|
||||
inline typename T::value_type lv_creal(const T &x)
|
||||
{
|
||||
return x.real();
|
||||
}
|
||||
|
||||
template <typename T> inline typename T::value_type lv_cimag(const T &x){
|
||||
template <typename T>
|
||||
inline typename T::value_type lv_cimag(const T &x)
|
||||
{
|
||||
return x.imag();
|
||||
}
|
||||
|
||||
template <typename T> inline T lv_conj(const T &x){
|
||||
template <typename T>
|
||||
inline T lv_conj(const T &x)
|
||||
{
|
||||
return std::conj(x);
|
||||
}
|
||||
|
||||
|
@ -48,14 +48,16 @@ _mm_complexconjugatemul_ps(__m128 x, __m128 y)
|
||||
}
|
||||
|
||||
static inline __m128
|
||||
_mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2){
|
||||
_mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
|
||||
{
|
||||
cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
|
||||
cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
|
||||
return _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
|
||||
}
|
||||
|
||||
static inline __m128
|
||||
_mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2){
|
||||
_mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
|
||||
{
|
||||
return _mm_sqrt_ps(_mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2));
|
||||
}
|
||||
|
||||
|
@ -27,7 +27,8 @@
|
||||
#include <xmmintrin.h>
|
||||
|
||||
static inline __m128
|
||||
_mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2){
|
||||
_mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
|
||||
{
|
||||
__m128 iValue, qValue;
|
||||
// Arrange in i1i2i3i4 format
|
||||
iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
@ -39,7 +40,8 @@ _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2){
|
||||
}
|
||||
|
||||
static inline __m128
|
||||
_mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2){
|
||||
_mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
|
||||
{
|
||||
return _mm_sqrt_ps(_mm_magnitudesquared_ps(cplxValue1, cplxValue2));
|
||||
}
|
||||
|
||||
|
@ -279,4 +279,3 @@ static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_neon(int16_t* result,
|
||||
#endif
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_16i_resamplerpuppet_16i_H
|
||||
|
||||
|
@ -107,7 +107,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse4_1(int16_t** resul
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -173,7 +174,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse4_1(int16_t** resul
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -240,7 +242,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse3(int16_t** result,
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -310,7 +313,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse3(int16_t** result,
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -379,7 +383,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_avx(int16_t** result,
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
@ -456,7 +461,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_avx(int16_t** result,
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
@ -531,7 +537,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_neon(int16_t** result, c
|
||||
const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
|
||||
const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int32_t local_code_chip_index[4];
|
||||
int32_t local_code_chip_index_;
|
||||
|
||||
const int32x4_t zeros = vdupq_n_s32(0);
|
||||
@ -539,7 +546,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_neon(int16_t** result, c
|
||||
const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
|
||||
int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
|
||||
float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
|
||||
__VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
const float vec[4] = {0.0f, 1.0f, 2.0f, 3.0f};
|
||||
uint32x4_t igx;
|
||||
reciprocal = vrecpeq_f32(code_length_chips_reg_f);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
|
||||
@ -605,4 +613,3 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_neon(int16_t** result, c
|
||||
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_16i_xn_resampler_16i_xn_H*/
|
||||
|
||||
|
@ -192,7 +192,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc
|
||||
const lv_16sc_t* _in_common = in_common;
|
||||
lv_16sc_t* _out = result;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
__m128i* cacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
|
||||
@ -206,11 +207,13 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc
|
||||
// phase rotation registers
|
||||
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i pc1, pc2;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -290,7 +293,6 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc
|
||||
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
|
||||
a = cacc[n_vec];
|
||||
_mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0, 0);
|
||||
@ -597,7 +599,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc
|
||||
const lv_16sc_t* _in_common = in_common;
|
||||
lv_16sc_t* _out = result;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
__m128i* cacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
|
||||
@ -611,11 +614,13 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc
|
||||
// phase rotation registers
|
||||
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i pc1, pc2;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -695,7 +700,6 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc
|
||||
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
|
||||
a = cacc[n_vec];
|
||||
_mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0, 0);
|
||||
@ -755,7 +759,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc
|
||||
lv_16sc_t tmp16;
|
||||
lv_32fc_t tmp32;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_16sc_t dotProductVector[8];
|
||||
lv_16sc_t dotProduct = lv_cmake(0, 0);
|
||||
|
||||
__m256i* cacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
|
||||
@ -780,8 +785,10 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc
|
||||
_phase_inc /= hypotf(lv_creal(_phase_inc), lv_cimag(_phase_inc));
|
||||
#endif
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_inc[4];
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_acc[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t four_phase_inc[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t four_phase_acc[4];
|
||||
for (n = 0; n < 4; ++n)
|
||||
{
|
||||
four_phase_inc[n] = _phase_inc;
|
||||
@ -885,7 +892,6 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc
|
||||
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_AVX2 */
|
||||
|
||||
@ -907,7 +913,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
|
||||
lv_16sc_t tmp16;
|
||||
lv_32fc_t tmp32;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_16sc_t dotProductVector[8];
|
||||
lv_16sc_t dotProduct = lv_cmake(0, 0);
|
||||
|
||||
__m256i* cacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
|
||||
@ -932,8 +939,10 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
|
||||
_phase_inc /= hypotf(lv_creal(_phase_inc), lv_cimag(_phase_inc));
|
||||
#endif
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_inc[4];
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_acc[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t four_phase_inc[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t four_phase_acc[4];
|
||||
for (n = 0; n < 4; ++n)
|
||||
{
|
||||
four_phase_inc[n] = _phase_inc;
|
||||
@ -1037,7 +1046,6 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
|
||||
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_AVX2 */
|
||||
|
||||
@ -1596,5 +1604,3 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
|
||||
//#endif [> LV_HAVE_NEON <]
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_16ic_16i_dot_prod_16ic_xn_H*/
|
||||
|
||||
|
||||
|
@ -379,6 +379,3 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_u_avx2(lv_
|
||||
//#endif // NEON
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_H
|
||||
|
||||
|
||||
|
||||
|
@ -231,4 +231,3 @@ static inline void volk_gnsssdr_16ic_conjugate_16ic_u_avx2(lv_16sc_t* cVector, c
|
||||
//#endif /* LV_HAVE_NEON */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_16ic_conjugate_16ic_H */
|
||||
|
||||
|
@ -97,7 +97,8 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_a_sse2(lv_16sc_t* resul
|
||||
|
||||
lv_16sc_t* _result = result;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
__m128 _rem_code_phase, _code_phase_step_chips;
|
||||
__m128i _code_length_chips, _code_length_chips_minus1;
|
||||
__m128 _code_phase_out, _code_phase_out_with_offset;
|
||||
@ -105,13 +106,15 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_a_sse2(lv_16sc_t* resul
|
||||
|
||||
_rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register
|
||||
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips_minus1[4];
|
||||
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips[4];
|
||||
four_times_code_length_chips[0] = code_length_chips;
|
||||
four_times_code_length_chips[1] = code_length_chips;
|
||||
four_times_code_length_chips[2] = code_length_chips;
|
||||
@ -124,9 +127,11 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_a_sse2(lv_16sc_t* resul
|
||||
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
|
||||
__m128 _4output_index = _mm_load_ps(init_idx_float);
|
||||
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
|
||||
__m128 _4constant_float = _mm_load_ps(init_4constant_float);
|
||||
|
||||
for (number = 0; number < quarterPoints; number++)
|
||||
@ -177,7 +182,8 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_u_sse2(lv_16sc_t* resul
|
||||
|
||||
lv_16sc_t* _result = result;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
__m128 _rem_code_phase, _code_phase_step_chips;
|
||||
__m128i _code_length_chips, _code_length_chips_minus1;
|
||||
__m128 _code_phase_out, _code_phase_out_with_offset;
|
||||
@ -185,13 +191,15 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_u_sse2(lv_16sc_t* resul
|
||||
|
||||
_rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register
|
||||
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips_minus1[4];
|
||||
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips[4];
|
||||
four_times_code_length_chips[0] = code_length_chips;
|
||||
four_times_code_length_chips[1] = code_length_chips;
|
||||
four_times_code_length_chips[2] = code_length_chips;
|
||||
@ -204,9 +212,11 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_u_sse2(lv_16sc_t* resul
|
||||
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
|
||||
__m128 _4output_index = _mm_loadu_ps(init_idx_float);
|
||||
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
|
||||
__m128 _4constant_float = _mm_loadu_ps(init_4constant_float);
|
||||
|
||||
for (number = 0; number < quarterPoints; number++)
|
||||
@ -257,7 +267,8 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_neon(lv_16sc_t* result,
|
||||
|
||||
lv_16sc_t* _result = result;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
float32x4_t _rem_code_phase, _code_phase_step_chips;
|
||||
int32x4_t _code_length_chips, _code_length_chips_minus1;
|
||||
float32x4_t _code_phase_out, _code_phase_out_with_offset;
|
||||
@ -266,13 +277,15 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_neon(lv_16sc_t* result,
|
||||
|
||||
_rem_code_phase = vld1q_dup_f32(&rem_code_phase_chips); //load float to all four float values in m128 register
|
||||
_code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in m128 register
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips_minus1[4];
|
||||
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips[4];
|
||||
four_times_code_length_chips[0] = code_length_chips;
|
||||
four_times_code_length_chips[1] = code_length_chips;
|
||||
four_times_code_length_chips[2] = code_length_chips;
|
||||
@ -285,9 +298,11 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_neon(lv_16sc_t* result,
|
||||
uint32x4_t negative_indexes, overflow_indexes;
|
||||
int32x4_t zero = vmovq_n_s32(0);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
|
||||
float32x4_t _4output_index = vld1q_f32(init_idx_float);
|
||||
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
|
||||
float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
|
||||
|
||||
for (number = 0; number < quarterPoints; number++)
|
||||
|
@ -141,11 +141,13 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
|
||||
unsigned int number;
|
||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i c1, c2, result;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -232,7 +234,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
@ -244,11 +245,13 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc
|
||||
unsigned int j;
|
||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i c1, c2, result;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -385,7 +388,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
@ -395,11 +397,13 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
|
||||
unsigned int number;
|
||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i c1, c2, result;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -498,11 +502,13 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc
|
||||
unsigned int j;
|
||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i c1, c2, result;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -657,8 +663,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
|
||||
lv_16sc_t* _out = outVector;
|
||||
|
||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
|
||||
|
||||
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
|
||||
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
|
||||
@ -667,8 +675,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
|
||||
lv_32fc_t phase3 = phase2 * phase_inc;
|
||||
lv_32fc_t phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
float32x4_t _phase_real = vld1q_f32(__phase_real);
|
||||
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
|
||||
@ -745,8 +755,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
|
||||
phase3 = phase2 * phase_inc;
|
||||
phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
_phase_real = vld1q_f32(____phase_real);
|
||||
_phase_imag = vld1q_f32(____phase_imag);
|
||||
@ -791,8 +803,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t
|
||||
lv_16sc_t* _out = outVector;
|
||||
|
||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
|
||||
|
||||
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
|
||||
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
|
||||
@ -801,8 +815,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t
|
||||
lv_32fc_t phase3 = phase2 * phase_inc;
|
||||
lv_32fc_t phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
float32x4_t _phase_real = vld1q_f32(__phase_real);
|
||||
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
|
||||
@ -879,8 +895,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t
|
||||
phase3 = phase2 * phase_inc;
|
||||
phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
_phase_real = vld1q_f32(____phase_real);
|
||||
_phase_imag = vld1q_f32(____phase_imag);
|
||||
|
@ -96,7 +96,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
realcacc = _mm_setzero_si128();
|
||||
imagcacc = _mm_setzero_si128();
|
||||
@ -174,7 +175,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
realcacc = _mm_setzero_si128();
|
||||
imagcacc = _mm_setzero_si128();
|
||||
@ -253,7 +255,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_axv2(lv_16sc_t* out, con
|
||||
if (avx_iters > 0)
|
||||
{
|
||||
__m256i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
|
||||
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_16sc_t dotProductVector[8];
|
||||
|
||||
realcacc = _mm256_setzero_si256();
|
||||
imagcacc = _mm256_setzero_si256();
|
||||
@ -330,7 +333,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_axv2(lv_16sc_t* out, con
|
||||
if (avx_iters > 0)
|
||||
{
|
||||
__m256i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
|
||||
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_16sc_t dotProductVector[8];
|
||||
|
||||
realcacc = _mm256_setzero_si256();
|
||||
imagcacc = _mm256_setzero_si256();
|
||||
@ -407,7 +411,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const
|
||||
// 2nd lane holds the imaginary part
|
||||
int16x4x2_t a_val, b_val, c_val, accumulator;
|
||||
int16x4x2_t tmp_real, tmp_imag;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t accum_result[4];
|
||||
accumulator.val[0] = vdup_n_s16(0);
|
||||
accumulator.val[1] = vdup_n_s16(0);
|
||||
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
|
||||
@ -474,7 +479,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_vma(lv_16sc_t* out, c
|
||||
// 2nd lane holds the imaginary part
|
||||
int16x4x2_t a_val, b_val, accumulator;
|
||||
int16x4x2_t tmp;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t accum_result[4];
|
||||
accumulator.val[0] = vdup_n_s16(0);
|
||||
accumulator.val[1] = vdup_n_s16(0);
|
||||
|
||||
@ -526,7 +532,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_optvma(lv_16sc_t* out
|
||||
// 2nd lane holds the imaginary part
|
||||
int16x4x2_t a_val, b_val, accumulator1, accumulator2;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t accum_result[4];
|
||||
accumulator1.val[0] = vdup_n_s16(0);
|
||||
accumulator1.val[1] = vdup_n_s16(0);
|
||||
accumulator2.val[0] = vdup_n_s16(0);
|
||||
|
@ -125,7 +125,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul
|
||||
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
__m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
__m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
@ -219,7 +220,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul
|
||||
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
__m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
__m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
@ -313,7 +315,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_avx2(lv_16sc_t* resul
|
||||
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_16sc_t dotProductVector[8];
|
||||
|
||||
__m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
|
||||
__m256i* imagcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
|
||||
@ -407,7 +410,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul
|
||||
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_16sc_t dotProductVector[8];
|
||||
|
||||
__m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
|
||||
__m256i* imagcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
|
||||
@ -501,7 +505,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result,
|
||||
|
||||
if (neon_iters > 0)
|
||||
{
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
int16x4x2_t a_val, b_val, c_val;
|
||||
|
||||
@ -589,7 +594,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res
|
||||
|
||||
if (neon_iters > 0)
|
||||
{
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
int16x4x2_t a_val, b_val, tmp;
|
||||
|
||||
@ -666,7 +672,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t*
|
||||
|
||||
if (neon_iters > 0)
|
||||
{
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
int16x4x2_t a_val, b_val;
|
||||
|
||||
|
@ -262,5 +262,3 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_optvma(lv_16sc
|
||||
#endif // NEON
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_H
|
||||
|
||||
|
||||
|
@ -292,7 +292,6 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_avx2(lv_16sc_t* out, con
|
||||
#endif /* LV_HAVE_AVX2 */
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
|
@ -191,7 +191,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
|
||||
const lv_16sc_t* _in_common = in_common;
|
||||
lv_16sc_t* _out = result;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
__m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
__m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
@ -210,11 +211,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
|
||||
// phase rotation registers
|
||||
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i pc1, pc2;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -369,7 +372,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l
|
||||
const lv_16sc_t* _in_common = in_common;
|
||||
lv_16sc_t* _out = result;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
__m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
__m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
@ -388,11 +392,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l
|
||||
// phase rotation registers
|
||||
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i pc1, pc2;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -594,7 +600,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l
|
||||
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
@ -615,7 +620,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
||||
const lv_16sc_t* _in_common = in_common;
|
||||
|
||||
lv_16sc_t* _out = result;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
|
||||
__m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
__m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
|
||||
@ -634,11 +640,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
||||
// phase rotation registers
|
||||
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i pc1, pc2;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_loadu_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_loadu_ps((float*)two_phase_acc);
|
||||
@ -781,7 +789,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_
|
||||
lv_16sc_t tmp16;
|
||||
lv_32fc_t tmp32;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_16sc_t dotProductVector[8];
|
||||
lv_16sc_t dotProduct = lv_cmake(0, 0);
|
||||
|
||||
__m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
|
||||
@ -798,11 +807,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_
|
||||
|
||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i c1, c2, result1, result2;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -966,7 +977,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_
|
||||
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_AVX2 */
|
||||
|
||||
@ -989,7 +999,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l
|
||||
lv_16sc_t tmp16;
|
||||
lv_32fc_t tmp32;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_16sc_t dotProductVector[8];
|
||||
lv_16sc_t dotProduct = lv_cmake(0, 0);
|
||||
|
||||
__m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
|
||||
@ -1006,11 +1017,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l
|
||||
|
||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i c1, c2, result1, result2;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -1312,8 +1325,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
||||
float phase_est;
|
||||
|
||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
|
||||
|
||||
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
|
||||
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
|
||||
@ -1322,14 +1337,17 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
||||
lv_32fc_t phase3 = phase2 * phase_inc;
|
||||
lv_32fc_t phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
float32x4_t _phase_real = vld1q_f32(__phase_real);
|
||||
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
|
||||
|
||||
int16x4x2_t a_val, b_val, c_val;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
float32x4_t half = vdupq_n_f32(0.5f);
|
||||
int16x4x2_t tmp16;
|
||||
int32x4x2_t tmp32i;
|
||||
@ -1426,8 +1444,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
||||
phase3 = phase2 * phase_inc;
|
||||
phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
_phase_real = vld1q_f32(____phase_real);
|
||||
_phase_imag = vld1q_f32(____phase_imag);
|
||||
@ -1495,8 +1515,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
|
||||
float phase_est;
|
||||
//printf("arg phase0: %f", arg_phase0);
|
||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
|
||||
|
||||
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
|
||||
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
|
||||
@ -1505,14 +1527,17 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
|
||||
lv_32fc_t phase3 = phase2 * phase_inc;
|
||||
lv_32fc_t phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
float32x4_t _phase_real = vld1q_f32(__phase_real);
|
||||
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
|
||||
|
||||
int16x4x2_t a_val, b_val;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
float32x4_t half = vdupq_n_f32(0.5f);
|
||||
int16x4x2_t tmp16;
|
||||
int32x4x2_t tmp32i;
|
||||
@ -1589,8 +1614,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
|
||||
phase3 = phase2 * phase_inc;
|
||||
phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
_phase_real = vld1q_f32(____phase_real);
|
||||
_phase_imag = vld1q_f32(____phase_imag);
|
||||
@ -1605,7 +1632,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
|
||||
//_phase_real = vmulq_f32(_phase_real, Round);
|
||||
//_phase_imag = vmulq_f32(_phase_imag, Round);
|
||||
//printf("After %i: %f,%f, %f\n\n", number, _phase_real[0], _phase_imag[0], sqrt(_phase_real[0]*_phase_real[0]+_phase_imag[0]*_phase_imag[0]));
|
||||
|
||||
}
|
||||
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
@ -1686,8 +1712,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_
|
||||
float phase_est;
|
||||
|
||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
|
||||
|
||||
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
|
||||
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
|
||||
@ -1696,14 +1724,17 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_
|
||||
lv_32fc_t phase3 = phase2 * phase_inc;
|
||||
lv_32fc_t phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
float32x4_t _phase_real = vld1q_f32(__phase_real);
|
||||
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
|
||||
|
||||
int16x4x2_t a_val, b_val;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_16sc_t dotProductVector[4];
|
||||
float32x4_t half = vdupq_n_f32(0.5f);
|
||||
int32x4x2_t tmp32i;
|
||||
|
||||
@ -1782,8 +1813,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_
|
||||
phase3 = phase2 * phase_inc;
|
||||
phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
_phase_real = vld1q_f32(____phase_real);
|
||||
_phase_imag = vld1q_f32(____phase_imag);
|
||||
@ -1842,4 +1875,3 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_H*/
|
||||
|
||||
|
@ -379,5 +379,3 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon_vma(lv
|
||||
#endif // NEON
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_H
|
||||
|
||||
|
||||
|
@ -106,7 +106,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse4_1(lv_16sc_t** r
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -172,7 +173,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse4_1(lv_16sc_t** r
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -239,7 +241,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse3(lv_16sc_t** res
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -309,7 +312,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse3(lv_16sc_t** res
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -378,7 +382,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_avx(lv_16sc_t** resu
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
@ -455,7 +460,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_avx(lv_16sc_t** resu
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
@ -530,7 +536,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
|
||||
const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
|
||||
const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int32_t local_code_chip_index[4];
|
||||
int32_t local_code_chip_index_;
|
||||
|
||||
const int32x4_t zeros = vdupq_n_s32(0);
|
||||
@ -538,7 +545,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
|
||||
const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
|
||||
int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
|
||||
float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
|
||||
__VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
const float vec[4] = {0.0f, 1.0f, 2.0f, 3.0f};
|
||||
uint32x4_t igx;
|
||||
reciprocal = vrecpeq_f32(code_length_chips_reg_f);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
|
||||
@ -604,4 +612,3 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
|
||||
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_H*/
|
||||
|
||||
|
@ -102,20 +102,23 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_a_sse2(lv_16sc_t*
|
||||
const unsigned int quarterPoints = num_output_samples / 4;
|
||||
|
||||
lv_16sc_t** _result = result;
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
float tmp_rem_code_phase_chips;
|
||||
__m128 _rem_code_phase, _code_phase_step_chips;
|
||||
__m128i _code_length_chips, _code_length_chips_minus1;
|
||||
__m128 _code_phase_out, _code_phase_out_with_offset;
|
||||
|
||||
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips_minus1[4];
|
||||
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips[4];
|
||||
four_times_code_length_chips[0] = code_length_chips;
|
||||
four_times_code_length_chips[1] = code_length_chips;
|
||||
four_times_code_length_chips[2] = code_length_chips;
|
||||
@ -128,9 +131,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_a_sse2(lv_16sc_t*
|
||||
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
|
||||
__m128 _4output_index = _mm_load_ps(init_idx_float);
|
||||
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
|
||||
__m128 _4constant_float = _mm_load_ps(init_4constant_float);
|
||||
|
||||
int current_vector = 0;
|
||||
@ -193,20 +198,23 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_u_sse2(lv_16sc_t*
|
||||
const unsigned int quarterPoints = num_output_samples / 4;
|
||||
|
||||
lv_16sc_t** _result = result;
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
float tmp_rem_code_phase_chips;
|
||||
__m128 _rem_code_phase, _code_phase_step_chips;
|
||||
__m128i _code_length_chips, _code_length_chips_minus1;
|
||||
__m128 _code_phase_out, _code_phase_out_with_offset;
|
||||
|
||||
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips_minus1[4];
|
||||
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips[4];
|
||||
four_times_code_length_chips[0] = code_length_chips;
|
||||
four_times_code_length_chips[1] = code_length_chips;
|
||||
four_times_code_length_chips[2] = code_length_chips;
|
||||
@ -219,9 +227,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_u_sse2(lv_16sc_t*
|
||||
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
|
||||
__m128 _4output_index = _mm_loadu_ps(init_idx_float);
|
||||
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
|
||||
__m128 _4constant_float = _mm_loadu_ps(init_4constant_float);
|
||||
|
||||
int current_vector = 0;
|
||||
@ -285,7 +295,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_neon(lv_16sc_t**
|
||||
float32x4_t half = vdupq_n_f32(0.5f);
|
||||
|
||||
lv_16sc_t** _result = result;
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
float tmp_rem_code_phase_chips;
|
||||
float32x4_t _rem_code_phase, _code_phase_step_chips;
|
||||
int32x4_t _code_length_chips, _code_length_chips_minus1;
|
||||
@ -293,13 +304,15 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_neon(lv_16sc_t**
|
||||
float32x4_t sign, PlusHalf, Round;
|
||||
|
||||
_code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in float32x4_t register
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips_minus1[4];
|
||||
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
|
||||
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int four_times_code_length_chips[4];
|
||||
four_times_code_length_chips[0] = code_length_chips;
|
||||
four_times_code_length_chips[1] = code_length_chips;
|
||||
four_times_code_length_chips[2] = code_length_chips;
|
||||
@ -312,9 +325,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_neon(lv_16sc_t**
|
||||
uint32x4_t negative_indexes, overflow_indexes;
|
||||
int32x4_t zero = vmovq_n_s32(0);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
|
||||
float32x4_t _4output_index = vld1q_f32(init_idx_float);
|
||||
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
|
||||
float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
|
||||
|
||||
int current_vector = 0;
|
||||
|
@ -29,7 +29,6 @@
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
* \page volk_gnsssdr_32f_index_max_32u.h
|
||||
*
|
||||
@ -80,12 +79,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_a_avx(uint32_t* target, const
|
||||
__m256 compareResults;
|
||||
__m256 currentValues;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) float maxValuesBuffer[8];
|
||||
__VOLK_ATTR_ALIGNED(32) float maxIndexesBuffer[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
float maxValuesBuffer[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
float maxIndexesBuffer[8];
|
||||
|
||||
for (; number < quarterPoints; number++)
|
||||
{
|
||||
currentValues = _mm256_load_ps(inputPtr); inputPtr += 8;
|
||||
currentValues = _mm256_load_ps(inputPtr);
|
||||
inputPtr += 8;
|
||||
currentIndexes = _mm256_add_ps(currentIndexes, indexIncrementValues);
|
||||
compareResults = _mm256_cmp_ps(maxValues, currentValues, 0x1e);
|
||||
maxValuesIndex = _mm256_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
|
||||
@ -143,12 +145,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_u_avx(uint32_t* target, const
|
||||
__m256 compareResults;
|
||||
__m256 currentValues;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) float maxValuesBuffer[8];
|
||||
__VOLK_ATTR_ALIGNED(32) float maxIndexesBuffer[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
float maxValuesBuffer[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
float maxIndexesBuffer[8];
|
||||
|
||||
for (; number < quarterPoints; number++)
|
||||
{
|
||||
currentValues = _mm256_loadu_ps(inputPtr); inputPtr += 8;
|
||||
currentValues = _mm256_loadu_ps(inputPtr);
|
||||
inputPtr += 8;
|
||||
currentIndexes = _mm256_add_ps(currentIndexes, indexIncrementValues);
|
||||
compareResults = _mm256_cmp_ps(maxValues, currentValues, 0x1e);
|
||||
maxValuesIndex = _mm256_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
|
||||
@ -206,12 +211,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_a_sse4_1(uint32_t* target, con
|
||||
__m128 compareResults;
|
||||
__m128 currentValues;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float maxIndexesBuffer[4];
|
||||
|
||||
for (; number < quarterPoints; number++)
|
||||
{
|
||||
currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
|
||||
currentValues = _mm_load_ps(inputPtr);
|
||||
inputPtr += 4;
|
||||
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
|
||||
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
|
||||
maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
|
||||
@ -269,12 +277,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_u_sse4_1(uint32_t* target, con
|
||||
__m128 compareResults;
|
||||
__m128 currentValues;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float maxIndexesBuffer[4];
|
||||
|
||||
for (; number < quarterPoints; number++)
|
||||
{
|
||||
currentValues = _mm_loadu_ps(inputPtr); inputPtr += 4;
|
||||
currentValues = _mm_loadu_ps(inputPtr);
|
||||
inputPtr += 4;
|
||||
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
|
||||
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
|
||||
maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
|
||||
@ -333,12 +344,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_a_sse(uint32_t* target, const
|
||||
__m128 compareResults;
|
||||
__m128 currentValues;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float maxIndexesBuffer[4];
|
||||
|
||||
for (; number < quarterPoints; number++)
|
||||
{
|
||||
currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
|
||||
currentValues = _mm_load_ps(inputPtr);
|
||||
inputPtr += 4;
|
||||
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
|
||||
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
|
||||
maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex), _mm_andnot_ps(compareResults, currentIndexes));
|
||||
@ -397,12 +411,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_u_sse(uint32_t* target, const
|
||||
__m128 compareResults;
|
||||
__m128 currentValues;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float maxIndexesBuffer[4];
|
||||
|
||||
for (; number < quarterPoints; number++)
|
||||
{
|
||||
currentValues = _mm_loadu_ps(inputPtr); inputPtr += 4;
|
||||
currentValues = _mm_loadu_ps(inputPtr);
|
||||
inputPtr += 4;
|
||||
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
|
||||
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
|
||||
maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex), _mm_andnot_ps(compareResults, currentIndexes));
|
||||
@ -476,7 +493,8 @@ static inline void volk_gnsssdr_32f_index_max_32u_neon(uint32_t* target, const f
|
||||
|
||||
float* inputPtr = (float*)src0;
|
||||
float32x4_t indexIncrementValues = vdupq_n_f32(4);
|
||||
__VOLK_ATTR_ALIGNED(16) float currentIndexes_float[4] = { -4.0f, -3.0f, -2.0f, -1.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float currentIndexes_float[4] = {-4.0f, -3.0f, -2.0f, -1.0f};
|
||||
float32x4_t currentIndexes = vld1q_f32(currentIndexes_float);
|
||||
|
||||
float max = src0[0];
|
||||
@ -487,12 +505,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_neon(uint32_t* target, const f
|
||||
uint32x4_t currentIndexes_u;
|
||||
float32x4_t currentValues;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float maxValuesBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float maxIndexesBuffer[4];
|
||||
|
||||
for (; number < quarterPoints; number++)
|
||||
{
|
||||
currentValues = vld1q_f32(inputPtr); inputPtr += 4;
|
||||
currentValues = vld1q_f32(inputPtr);
|
||||
inputPtr += 4;
|
||||
currentIndexes = vaddq_f32(currentIndexes, indexIncrementValues);
|
||||
currentIndexes_u = vcvtq_u32_f32(currentIndexes);
|
||||
compareResults = vcgtq_f32(maxValues, currentValues);
|
||||
@ -528,4 +549,3 @@ static inline void volk_gnsssdr_32f_index_max_32u_neon(uint32_t* target, const f
|
||||
#endif /*LV_HAVE_NEON*/
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_32f_index_max_32u_H*/
|
||||
|
||||
|
@ -42,7 +42,6 @@
|
||||
#include <string.h>
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_generic(float* result, const float* local_code, unsigned int num_points)
|
||||
{
|
||||
@ -276,4 +275,3 @@ static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_neon(float* result, co
|
||||
#endif
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_32f_resamplerpuppet_32f_H
|
||||
|
||||
|
@ -268,26 +268,44 @@ static inline void volk_gnsssdr_32f_sincos_32fc_a_sse2(lv_32fc_t* out, const flo
|
||||
__m128i emm0, emm2, emm4;
|
||||
|
||||
/* declare some SSE constants */
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _ps_inv_sign_mask[4] = {~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _ps_sign_mask[4] = {(int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_cephes_FOPI[4] = {1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_1[4] = {1, 1, 1, 1};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_inv1[4] = {~1, ~1, ~1, ~1};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_2[4] = {2, 2, 2, 2};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_4[4] = {4, 4, 4, 4};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_minus_cephes_DP1[4] = {-0.78515625, -0.78515625, -0.78515625, -0.78515625};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_minus_cephes_DP2[4] = {-2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_minus_cephes_DP3[4] = {-3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_coscof_p0[4] = {2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_coscof_p1[4] = {-1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_coscof_p2[4] = {4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_sincof_p0[4] = {-1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_sincof_p1[4] = {8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_sincof_p2[4] = {-1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_0p5[4] = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_1[4] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
|
||||
for (; number < sse_iters; number++)
|
||||
{
|
||||
@ -397,7 +415,6 @@ static inline void volk_gnsssdr_32f_sincos_32fc_a_sse2(lv_32fc_t* out, const flo
|
||||
_in = *aPtr++;
|
||||
*bPtr++ = lv_cmake((float)cosf(_in), (float)sinf(_in));
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
@ -421,26 +438,44 @@ static inline void volk_gnsssdr_32f_sincos_32fc_u_sse2(lv_32fc_t* out, const flo
|
||||
__m128i emm0, emm2, emm4;
|
||||
|
||||
/* declare some SSE constants */
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _ps_inv_sign_mask[4] = {~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _ps_sign_mask[4] = {(int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_cephes_FOPI[4] = {1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_1[4] = {1, 1, 1, 1};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_inv1[4] = {~1, ~1, ~1, ~1};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_2[4] = {2, 2, 2, 2};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_4[4] = {4, 4, 4, 4};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_minus_cephes_DP1[4] = {-0.78515625, -0.78515625, -0.78515625, -0.78515625};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_minus_cephes_DP2[4] = {-2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_minus_cephes_DP3[4] = {-3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_coscof_p0[4] = {2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_coscof_p1[4] = {-1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_coscof_p2[4] = {4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_sincof_p0[4] = {-1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_sincof_p1[4] = {8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_sincof_p2[4] = {-1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_0p5[4] = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_1[4] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
|
||||
for (; number < sse_iters; number++)
|
||||
{
|
||||
@ -550,7 +585,6 @@ static inline void volk_gnsssdr_32f_sincos_32fc_u_sse2(lv_32fc_t* out, const flo
|
||||
_in = *aPtr++;
|
||||
*bPtr++ = lv_cmake((float)cosf(_in), (float)sinf(_in));
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
|
@ -110,7 +110,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse3(float** result, c
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -180,7 +181,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse3(float** result, c
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -248,7 +250,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse4_1(float** result,
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -314,7 +317,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse4_1(float** result,
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -380,7 +384,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_avx(float** result, co
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
@ -457,7 +462,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_avx(float** result, co
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
@ -536,7 +542,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_neon(float** result, con
|
||||
const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
|
||||
const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int32_t local_code_chip_index[4];
|
||||
int32_t local_code_chip_index_;
|
||||
|
||||
const int32x4_t zeros = vdupq_n_s32(0);
|
||||
@ -544,7 +551,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_neon(float** result, con
|
||||
const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
|
||||
int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
|
||||
float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
|
||||
__VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
const float vec[4] = {0.0f, 1.0f, 2.0f, 3.0f};
|
||||
uint32x4_t igx;
|
||||
reciprocal = vrecpeq_f32(code_length_chips_reg_f);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
|
||||
@ -606,5 +614,3 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_neon(float** result, con
|
||||
#endif
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_32f_xn_resampler_32f_xn_H*/
|
||||
|
||||
|
||||
|
@ -204,7 +204,8 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
|
||||
|
||||
// Set up the complex rotator
|
||||
__m256 z0, z1, z2, z3;
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t phase_vec[16];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t phase_vec[16];
|
||||
for (vec_ind = 0; vec_ind < 16; ++vec_ind)
|
||||
{
|
||||
phase_vec[vec_ind] = _phase;
|
||||
@ -216,7 +217,11 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
|
||||
z2 = _mm256_load_ps((float*)(phase_vec + 8));
|
||||
z3 = _mm256_load_ps((float*)(phase_vec + 12));
|
||||
|
||||
lv_32fc_t dz = phase_inc; dz *= dz; dz *= dz; dz *= dz; dz *= dz; // dz = phase_inc^16;
|
||||
lv_32fc_t dz = phase_inc;
|
||||
dz *= dz;
|
||||
dz *= dz;
|
||||
dz *= dz;
|
||||
dz *= dz; // dz = phase_inc^16;
|
||||
|
||||
for (vec_ind = 0; vec_ind < 4; ++vec_ind)
|
||||
{
|
||||
@ -282,7 +287,8 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
|
||||
|
||||
aPtr += 32;
|
||||
}
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t dotProductVector[4];
|
||||
|
||||
for (vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind)
|
||||
{
|
||||
@ -362,7 +368,8 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
|
||||
|
||||
// Set up the complex rotator
|
||||
__m256 z0, z1, z2, z3;
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t phase_vec[16];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t phase_vec[16];
|
||||
for (vec_ind = 0; vec_ind < 16; ++vec_ind)
|
||||
{
|
||||
phase_vec[vec_ind] = _phase;
|
||||
@ -374,7 +381,11 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
|
||||
z2 = _mm256_load_ps((float*)(phase_vec + 8));
|
||||
z3 = _mm256_load_ps((float*)(phase_vec + 12));
|
||||
|
||||
lv_32fc_t dz = phase_inc; dz *= dz; dz *= dz; dz *= dz; dz *= dz; // dz = phase_inc^16;
|
||||
lv_32fc_t dz = phase_inc;
|
||||
dz *= dz;
|
||||
dz *= dz;
|
||||
dz *= dz;
|
||||
dz *= dz; // dz = phase_inc^16;
|
||||
|
||||
for (vec_ind = 0; vec_ind < 4; ++vec_ind)
|
||||
{
|
||||
@ -386,7 +397,6 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
|
||||
|
||||
for (; number < sixteenthPoints; number++)
|
||||
{
|
||||
|
||||
a0Val = _mm256_load_ps(aPtr);
|
||||
a1Val = _mm256_load_ps(aPtr + 8);
|
||||
a2Val = _mm256_load_ps(aPtr + 16);
|
||||
@ -441,7 +451,8 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
|
||||
|
||||
aPtr += 32;
|
||||
}
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t dotProductVector[4];
|
||||
|
||||
for (vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind)
|
||||
{
|
||||
@ -482,5 +493,3 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
|
||||
#endif /* LV_HAVE_AVX */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_H */
|
||||
|
||||
|
||||
|
@ -159,4 +159,3 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_a_avx(lv_3
|
||||
#endif // AVX
|
||||
|
||||
#endif // INCLUDED_volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_H
|
||||
|
||||
|
@ -82,8 +82,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector
|
||||
|
||||
for (i = 0; i < sse_iters; i++)
|
||||
{
|
||||
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 8);
|
||||
|
||||
// Clip
|
||||
@ -135,8 +137,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector,
|
||||
|
||||
for (i = 0; i < sse_iters; i++)
|
||||
{
|
||||
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 8);
|
||||
|
||||
// Clip
|
||||
@ -186,8 +190,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_avx2(lv_16sc_t* outputVector
|
||||
|
||||
for (i = 0; i < avx2_iters; i++)
|
||||
{
|
||||
inputVal1 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal2 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal1 = _mm256_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
inputVal2 = _mm256_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 16);
|
||||
|
||||
// Clip
|
||||
@ -240,8 +246,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector
|
||||
|
||||
for (i = 0; i < sse_iters; i++)
|
||||
{
|
||||
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal1 = _mm_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
inputVal2 = _mm_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 8);
|
||||
|
||||
// Clip
|
||||
@ -291,8 +299,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector,
|
||||
|
||||
for (i = 0; i < sse_iters; i++)
|
||||
{
|
||||
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal1 = _mm_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
inputVal2 = _mm_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 8);
|
||||
|
||||
// Clip
|
||||
@ -343,8 +353,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_avx2(lv_16sc_t* outputVector
|
||||
|
||||
for (i = 0; i < avx2_iters; i++)
|
||||
{
|
||||
inputVal1 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal2 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal1 = _mm256_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
inputVal2 = _mm256_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 16);
|
||||
|
||||
// Clip
|
||||
@ -399,8 +411,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
|
||||
|
||||
for (i = 0; i < neon_iters; i++)
|
||||
{
|
||||
a = vld1q_f32((const float32_t*)(inputVectorPtr)); inputVectorPtr += 4;
|
||||
b = vld1q_f32((const float32_t*)(inputVectorPtr)); inputVectorPtr += 4;
|
||||
a = vld1q_f32((const float32_t*)(inputVectorPtr));
|
||||
inputVectorPtr += 4;
|
||||
b = vld1q_f32((const float32_t*)(inputVectorPtr));
|
||||
inputVectorPtr += 4;
|
||||
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 8);
|
||||
|
||||
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
|
||||
|
60
src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
Executable file → Normal file
60
src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h
Executable file → Normal file
@ -109,10 +109,14 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_avx2(lv_8sc_t* outputVector,
|
||||
|
||||
for (i = 0; i < avx2_iters; i++)
|
||||
{
|
||||
inputVal1 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal2 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal3 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal4 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal1 = _mm256_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
inputVal2 = _mm256_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
inputVal3 = _mm256_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
inputVal4 = _mm256_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 32);
|
||||
|
||||
inputVal1 = _mm256_mul_ps(inputVal1, vmax_val);
|
||||
@ -179,10 +183,14 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_avx2(lv_8sc_t* outputVector,
|
||||
|
||||
for (i = 0; i < avx2_iters; i++)
|
||||
{
|
||||
inputVal1 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal2 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal3 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal4 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
|
||||
inputVal1 = _mm256_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
inputVal2 = _mm256_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
inputVal3 = _mm256_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
inputVal4 = _mm256_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 8;
|
||||
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 32);
|
||||
|
||||
inputVal1 = _mm256_mul_ps(inputVal1, vmax_val);
|
||||
@ -249,10 +257,14 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector,
|
||||
|
||||
for (i = 0; i < sse_iters; i++)
|
||||
{
|
||||
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal3 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal4 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
inputVal3 = _mm_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
inputVal4 = _mm_loadu_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
|
||||
inputVal1 = _mm_mul_ps(inputVal1, vmax_val);
|
||||
inputVal2 = _mm_mul_ps(inputVal2, vmax_val);
|
||||
@ -315,10 +327,14 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
|
||||
|
||||
for (i = 0; i < sse_iters; i++)
|
||||
{
|
||||
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal3 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal4 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal1 = _mm_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
inputVal2 = _mm_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
inputVal3 = _mm_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
inputVal4 = _mm_load_ps((float*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
|
||||
inputVal1 = _mm_mul_ps(inputVal1, vmax_val);
|
||||
inputVal2 = _mm_mul_ps(inputVal2, vmax_val);
|
||||
@ -385,7 +401,8 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co
|
||||
|
||||
for (i = 0; i < neon_iters; i++)
|
||||
{
|
||||
a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
|
||||
a = vld1q_f32((const float32_t*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
a = vmulq_f32(a, max_val);
|
||||
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
|
||||
@ -394,7 +411,8 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co
|
||||
toint_a = vcvtq_s32_f32(Round);
|
||||
intInputVal1 = vqmovn_s32(toint_a);
|
||||
|
||||
a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
|
||||
a = vld1q_f32((const float32_t*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
a = vmulq_f32(a, max_val);
|
||||
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
|
||||
@ -406,7 +424,8 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co
|
||||
pack16_8_1 = vcombine_s16(intInputVal1, intInputVal2);
|
||||
res8_1 = vqmovn_s16(pack16_8_1);
|
||||
|
||||
a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
|
||||
a = vld1q_f32((const float32_t*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
a = vmulq_f32(a, max_val);
|
||||
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
|
||||
@ -415,7 +434,8 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co
|
||||
toint_a = vcvtq_s32_f32(Round);
|
||||
intInputVal1 = vqmovn_s32(toint_a);
|
||||
|
||||
a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
|
||||
a = vld1q_f32((const float32_t*)inputVectorPtr);
|
||||
inputVectorPtr += 4;
|
||||
a = vmulq_f32(a, max_val);
|
||||
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
|
||||
|
@ -42,7 +42,6 @@
|
||||
#include <string.h>
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_generic(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
|
||||
{
|
||||
|
@ -179,7 +179,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_
|
||||
const lv_32fc_t** _in_a = in_a;
|
||||
const lv_32fc_t* _in_common = in_common;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t dotProductVector[2];
|
||||
|
||||
__m128* acc = (__m128*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128), volk_gnsssdr_get_alignment());
|
||||
|
||||
@ -191,11 +192,13 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_
|
||||
// phase rotation registers
|
||||
__m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -288,7 +291,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_
|
||||
const lv_32fc_t** _in_a = in_a;
|
||||
const lv_32fc_t* _in_common = in_common;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t dotProductVector[2];
|
||||
|
||||
__m128* acc = (__m128*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128), volk_gnsssdr_get_alignment());
|
||||
|
||||
@ -300,11 +304,13 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_
|
||||
// phase rotation registers
|
||||
__m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
|
||||
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
@ -398,7 +404,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t
|
||||
const lv_32fc_t* _in_common = in_common;
|
||||
lv_32fc_t _phase = (*phase);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t dotProductVector[4];
|
||||
|
||||
__m256* acc = (__m256*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256), volk_gnsssdr_get_alignment());
|
||||
|
||||
@ -525,7 +532,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
|
||||
const lv_32fc_t* _in_common = in_common;
|
||||
lv_32fc_t _phase = (*phase);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t dotProductVector[4];
|
||||
|
||||
__m256* acc = (__m256*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256), volk_gnsssdr_get_alignment());
|
||||
|
||||
@ -538,7 +546,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
|
||||
// phase rotation registers
|
||||
__m256 a, four_phase_acc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_inc[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t four_phase_inc[4];
|
||||
const lv_32fc_t phase_inc2 = phase_inc * phase_inc;
|
||||
const lv_32fc_t phase_inc3 = phase_inc2 * phase_inc;
|
||||
const lv_32fc_t phase_inc4 = phase_inc3 * phase_inc;
|
||||
@ -548,7 +557,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
|
||||
four_phase_inc[3] = phase_inc4;
|
||||
const __m256 four_phase_inc_reg = _mm256_load_ps((float*)four_phase_inc);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_acc[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t four_phase_acc[4];
|
||||
four_phase_acc[0] = _phase;
|
||||
four_phase_acc[1] = _phase * phase_inc;
|
||||
four_phase_acc[2] = _phase * phase_inc2;
|
||||
@ -662,8 +672,10 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
|
||||
float32_t phase_est;
|
||||
|
||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
|
||||
|
||||
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
|
||||
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
|
||||
@ -672,13 +684,16 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
|
||||
lv_32fc_t phase3 = phase2 * phase_inc;
|
||||
lv_32fc_t phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((_phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((_phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_real[4] = {lv_creal((_phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t __phase_imag[4] = {lv_cimag((_phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
float32x4_t _phase_real = vld1q_f32(__phase_real);
|
||||
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t dotProductVector[4];
|
||||
|
||||
float32x4x2_t a_val, b_val, tmp32_real, tmp32_imag;
|
||||
|
||||
@ -728,8 +743,10 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
|
||||
phase3 = phase2 * phase_inc;
|
||||
phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((_phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((_phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_real[4] = {lv_creal((_phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t ____phase_imag[4] = {lv_cimag((_phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
|
||||
|
||||
_phase_real = vld1q_f32(____phase_real);
|
||||
_phase_imag = vld1q_f32(____phase_imag);
|
||||
@ -786,4 +803,3 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_H */
|
||||
|
||||
|
@ -107,7 +107,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_sse3(lv_32fc_t** res
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -177,7 +178,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_sse3(lv_32fc_t** res
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -245,7 +247,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_sse4_1(lv_32fc_t** r
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -311,7 +314,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_sse4_1(lv_32fc_t** r
|
||||
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
|
||||
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int local_code_chip_index[4];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m128i zeros = _mm_setzero_si128();
|
||||
@ -377,7 +381,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_avx(lv_32fc_t** resu
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
@ -454,7 +459,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_avx(lv_32fc_t** resu
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
@ -531,7 +537,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_avx2(lv_32fc_t** res
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
@ -609,7 +616,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_avx2(lv_32fc_t** res
|
||||
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
|
||||
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
int local_code_chip_index[8];
|
||||
int local_code_chip_index_;
|
||||
|
||||
const __m256 zeros = _mm256_setzero_ps();
|
||||
@ -689,7 +697,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_neon(lv_32fc_t** resul
|
||||
const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
|
||||
const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
int32_t local_code_chip_index[4];
|
||||
int32_t local_code_chip_index_;
|
||||
|
||||
const int32x4_t zeros = vdupq_n_s32(0);
|
||||
@ -697,7 +706,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_neon(lv_32fc_t** resul
|
||||
const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
|
||||
int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
|
||||
float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
|
||||
__VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
const float vec[4] = {0.0f, 1.0f, 2.0f, 3.0f};
|
||||
uint32x4_t igx;
|
||||
reciprocal = vrecpeq_f32(code_length_chips_reg_f);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
|
||||
|
@ -69,7 +69,8 @@ static inline void volk_gnsssdr_64f_accumulator_64f_u_avx(double* result, const
|
||||
unsigned int i;
|
||||
const double* aPtr = inputBuffer;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) double tempBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
double tempBuffer[4];
|
||||
__m256d accumulator = _mm256_setzero_pd();
|
||||
__m256d aVal = _mm256_setzero_pd();
|
||||
|
||||
@ -108,7 +109,8 @@ static inline void volk_gnsssdr_64f_accumulator_64f_u_sse3(double* result,const
|
||||
unsigned int i;
|
||||
const double* aPtr = inputBuffer;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) double tempBuffer[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
double tempBuffer[2];
|
||||
__m128d accumulator = _mm_setzero_pd();
|
||||
__m128d aVal = _mm_setzero_pd();
|
||||
|
||||
@ -164,7 +166,8 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_avx(double* result,const d
|
||||
unsigned int i;
|
||||
const double* aPtr = inputBuffer;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) double tempBuffer[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
double tempBuffer[4];
|
||||
__m256d accumulator = _mm256_setzero_pd();
|
||||
__m256d aVal = _mm256_setzero_pd();
|
||||
|
||||
@ -203,7 +206,8 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const
|
||||
unsigned int i;
|
||||
const double* aPtr = inputBuffer;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) double tempBuffer[2];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
double tempBuffer[2];
|
||||
__m128d accumulator = _mm_setzero_pd();
|
||||
__m128d aVal = _mm_setzero_pd();
|
||||
|
||||
|
@ -70,7 +70,8 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const ch
|
||||
unsigned int i;
|
||||
const char* aPtr = inputBuffer;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) char tempBuffer[16];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
char tempBuffer[16];
|
||||
__m128i accumulator = _mm_setzero_si128();
|
||||
__m128i aVal = _mm_setzero_si128();
|
||||
|
||||
@ -125,7 +126,8 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch
|
||||
|
||||
const char* aPtr = inputBuffer;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) char tempBuffer[16];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
char tempBuffer[16];
|
||||
__m128i accumulator = _mm_setzero_si128();
|
||||
__m128i aVal = _mm_setzero_si128();
|
||||
|
||||
@ -164,7 +166,8 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_avx2(char* result, const ch
|
||||
|
||||
const char* aPtr = inputBuffer;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) char tempBuffer[32];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
char tempBuffer[32];
|
||||
__m256i accumulator = _mm256_setzero_si256();
|
||||
__m256i aVal = _mm256_setzero_si256();
|
||||
|
||||
@ -202,7 +205,8 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_avx2(char* result, const ch
|
||||
unsigned int i;
|
||||
const char* aPtr = inputBuffer;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) char tempBuffer[32];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
char tempBuffer[32];
|
||||
__m256i accumulator = _mm256_setzero_si256();
|
||||
__m256i aVal = _mm256_setzero_si256();
|
||||
|
||||
|
@ -74,7 +74,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx2(unsigned int* target, co
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
unsigned int mask;
|
||||
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
char currentValuesBuffer[32];
|
||||
__m256i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm256_set1_epi8(max);
|
||||
@ -137,7 +138,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, con
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
char currentValuesBuffer[32];
|
||||
__m256i ones, compareResults, currentValues;
|
||||
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
|
||||
|
||||
@ -204,7 +206,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target,
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
@ -263,7 +266,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, co
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
unsigned short mask;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
@ -351,7 +355,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx2(unsigned int* target, co
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
unsigned int mask;
|
||||
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
char currentValuesBuffer[32];
|
||||
__m256i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm256_set1_epi8(max);
|
||||
@ -414,7 +419,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
char currentValuesBuffer[32];
|
||||
__m256i ones, compareResults, currentValues;
|
||||
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
|
||||
|
||||
@ -481,7 +487,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target,
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
@ -540,7 +547,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, co
|
||||
char max = src0[0];
|
||||
unsigned int index = 0;
|
||||
unsigned short mask;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
@ -70,7 +70,8 @@ static inline void volk_gnsssdr_8i_max_s8i_u_avx2(char* target, const char* src0
|
||||
unsigned int i;
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
__VOLK_ATTR_ALIGNED(32) char maxValuesBuffer[32];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
char maxValuesBuffer[32];
|
||||
__m256i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm256_set1_epi8(max);
|
||||
@ -119,7 +120,8 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char* target, const char* sr
|
||||
unsigned int i;
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
__VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
char maxValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
@ -169,7 +171,8 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char* target, const char* src0
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned short mask;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
@ -250,7 +253,8 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char* target, const char* sr
|
||||
unsigned int i;
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
__VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
char maxValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
@ -299,7 +303,8 @@ static inline void volk_gnsssdr_8i_max_s8i_a_avx2(char* target, const char* src0
|
||||
unsigned int i;
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
__VOLK_ATTR_ALIGNED(32) char maxValuesBuffer[32];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
char maxValuesBuffer[32];
|
||||
__m256i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm256_set1_epi8(max);
|
||||
@ -349,7 +354,8 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char* target, const char* src0
|
||||
char* inputPtr = (char*)src0;
|
||||
char max = src0[0];
|
||||
unsigned short mask;
|
||||
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
char currentValuesBuffer[16];
|
||||
__m128i maxValues, compareResults, currentValues;
|
||||
|
||||
maxValues = _mm_set1_epi8(max);
|
||||
|
@ -155,7 +155,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, con
|
||||
{
|
||||
*c++ = lv_conj(*a++);
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSSE3 */
|
||||
|
||||
@ -188,7 +187,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, cons
|
||||
{
|
||||
*c++ = lv_conj(*a++);
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
@ -336,7 +334,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons
|
||||
{
|
||||
*c++ = lv_conj(*a++);
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
@ -111,7 +111,6 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector,
|
||||
{
|
||||
*c++ = (*a++) * scalar;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
@ -204,7 +203,6 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
|
||||
{
|
||||
*c++ = (*a++) * scalar;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
@ -165,7 +165,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
|
||||
|
||||
totalc = _mm_or_si128(realcacc, imagcacc);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_storeu_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
|
||||
|
||||
@ -240,7 +241,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
|
||||
|
||||
totalc = _mm_blendv_epi8(imagcacc, realcacc, mult1);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_storeu_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
|
||||
|
||||
@ -317,7 +319,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con
|
||||
|
||||
totalc = _mm_or_si128(realcacc, imagcacc);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_store_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
|
||||
|
||||
@ -391,7 +394,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
|
||||
|
||||
totalc = _mm_blendv_epi8(imagcacc, realcacc, mult1);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_8sc_t dotProductVector[8];
|
||||
|
||||
_mm_store_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
|
||||
|
||||
@ -446,7 +450,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const
|
||||
// for 2-lane vectors, 1st lane holds the real part,
|
||||
// 2nd lane holds the imaginary part
|
||||
int8x8x2_t a_val, b_val, c_val, accumulator, tmp_real, tmp_imag;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t accum_result[8] = { lv_cmake(0,0) };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
lv_8sc_t accum_result[8] = {lv_cmake(0, 0)};
|
||||
accumulator.val[0] = vdup_n_s8(0);
|
||||
accumulator.val[1] = vdup_n_s8(0);
|
||||
unsigned int number;
|
||||
|
@ -241,29 +241,49 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_sse2(lv_32fc_t* out, const fl
|
||||
__m128i emm0, emm2, emm4;
|
||||
|
||||
/* declare some SSE constants */
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _ps_inv_sign_mask[4] = {~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _ps_sign_mask[4] = {(int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
|
||||
__VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_cephes_FOPI[4] = {1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_1[4] = {1, 1, 1, 1};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_inv1[4] = {~1, ~1, ~1, ~1};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_2[4] = {2, 2, 2, 2};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const int _pi32_4[4] = {4, 4, 4, 4};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
__VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_minus_cephes_DP1[4] = {-0.78515625, -0.78515625, -0.78515625, -0.78515625};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_minus_cephes_DP2[4] = {-2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_minus_cephes_DP3[4] = {-3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_coscof_p0[4] = {2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_coscof_p1[4] = {-1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_coscof_p2[4] = {4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_sincof_p0[4] = {-1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_sincof_p1[4] = {8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_sincof_p2[4] = {-1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_0p5[4] = {0.5f, 0.5f, 0.5f, 0.5f};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
static const float _ps_1[4] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float four_phases[4] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc };
|
||||
__VOLK_ATTR_ALIGNED(16) float four_phases_inc[4] = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float four_phases[4] = {_phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc};
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float four_phases_inc[4] = {4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc};
|
||||
four_phases_reg = _mm_load_ps(four_phases);
|
||||
const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc);
|
||||
|
||||
@ -456,29 +476,49 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_a_avx2(lv_32fc_t* out, const fl
|
||||
__m128 aux, c1, s1;
|
||||
|
||||
/* declare some AXX2 constants */
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _ps_inv_sign_mask[8] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _ps_sign_mask[8] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _ps_inv_sign_mask[8] = {~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _ps_sign_mask[8] = {(int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_cephes_FOPI[8] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _pi32_1[8] = { 1, 1, 1, 1, 1, 1, 1, 1 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _pi32_inv1[8] = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _pi32_2[8] = { 2, 2, 2, 2, 2, 2, 2, 2 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _pi32_4[8] = { 4, 4, 4, 4, 4, 4, 4, 4 };
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_cephes_FOPI[8] = {1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _pi32_1[8] = {1, 1, 1, 1, 1, 1, 1, 1};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _pi32_inv1[8] = {~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _pi32_2[8] = {2, 2, 2, 2, 2, 2, 2, 2};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _pi32_4[8] = {4, 4, 4, 4, 4, 4, 4, 4};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP1[8] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP2[8] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP3[8] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p0[8] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p1[8] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p2[8] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p0[8] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p1[8] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p2[8] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_0p5[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_1[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_minus_cephes_DP1[8] = {-0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_minus_cephes_DP2[8] = {-2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_minus_cephes_DP3[8] = {-3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_coscof_p0[8] = {2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_coscof_p1[8] = {-1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_coscof_p2[8] = {4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_sincof_p0[8] = {-1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_sincof_p1[8] = {8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_sincof_p2[8] = {-1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_0p5[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_1[8] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) float eight_phases[8] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc };
|
||||
__VOLK_ATTR_ALIGNED(32) float eight_phases_inc[8] = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc };
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
float eight_phases[8] = {_phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
float eight_phases_inc[8] = {8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc};
|
||||
eight_phases_reg = _mm256_load_ps(eight_phases);
|
||||
const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);
|
||||
|
||||
@ -624,29 +664,49 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_avx2(lv_32fc_t* out, const fl
|
||||
__m128 aux, c1, s1;
|
||||
|
||||
/* declare some AXX2 constants */
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _ps_inv_sign_mask[8] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _ps_sign_mask[8] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _ps_inv_sign_mask[8] = {~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _ps_sign_mask[8] = {(int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_cephes_FOPI[8] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _pi32_1[8] = { 1, 1, 1, 1, 1, 1, 1, 1 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _pi32_inv1[8] = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _pi32_2[8] = { 2, 2, 2, 2, 2, 2, 2, 2 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const int _pi32_4[8] = { 4, 4, 4, 4, 4, 4, 4, 4 };
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_cephes_FOPI[8] = {1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _pi32_1[8] = {1, 1, 1, 1, 1, 1, 1, 1};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _pi32_inv1[8] = {~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _pi32_2[8] = {2, 2, 2, 2, 2, 2, 2, 2};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const int _pi32_4[8] = {4, 4, 4, 4, 4, 4, 4, 4};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP1[8] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP2[8] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP3[8] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p0[8] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p1[8] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p2[8] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p0[8] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p1[8] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p2[8] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_0p5[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
|
||||
__VOLK_ATTR_ALIGNED(32) static const float _ps_1[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_minus_cephes_DP1[8] = {-0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_minus_cephes_DP2[8] = {-2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_minus_cephes_DP3[8] = {-3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_coscof_p0[8] = {2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_coscof_p1[8] = {-1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_coscof_p2[8] = {4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_sincof_p0[8] = {-1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_sincof_p1[8] = {8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_sincof_p2[8] = {-1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_0p5[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
static const float _ps_1[8] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
|
||||
|
||||
__VOLK_ATTR_ALIGNED(32) float eight_phases[8] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc };
|
||||
__VOLK_ATTR_ALIGNED(32) float eight_phases_inc[8] = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc };
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
float eight_phases[8] = {_phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc};
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
float eight_phases_inc[8] = {8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc};
|
||||
eight_phases_reg = _mm256_load_ps(eight_phases);
|
||||
const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);
|
||||
|
||||
@ -783,9 +843,11 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_neon(lv_32fc_t* out, const floa
|
||||
const unsigned int neon_iters = num_points / 4;
|
||||
float _phase = (*phase);
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t four_phases[4] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t four_phases[4] = {_phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc};
|
||||
float four_inc = 4 * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t four_phases_inc[4] = { four_inc, four_inc, four_inc, four_inc };
|
||||
__VOLK_ATTR_ALIGNED(16)
|
||||
float32_t four_phases_inc[4] = {four_inc, four_inc, four_inc, four_inc};
|
||||
|
||||
float32x4_t four_phases_reg = vld1q_f32(four_phases);
|
||||
float32x4_t four_phases_inc_reg = vld1q_f32(four_phases_inc);
|
||||
|
@ -50,7 +50,6 @@
|
||||
|
||||
std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t test_params)
|
||||
{
|
||||
|
||||
// Some kernels need a lower tolerance
|
||||
volk_gnsssdr_test_params_t test_params_inacc = volk_gnsssdr_test_params_t(1e-3, test_params.scalar(),
|
||||
test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex());
|
||||
@ -98,8 +97,7 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t
|
||||
QA(VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn, test_params_int16))
|
||||
QA(VOLK_INIT_PUPP(volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn, test_params_int16))
|
||||
QA(VOLK_INIT_PUPP(volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc, volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn, test_params_int1))
|
||||
QA(VOLK_INIT_PUPP(volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc, volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn, test_params_int1))
|
||||
;
|
||||
QA(VOLK_INIT_PUPP(volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc, volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn, test_params_int1));
|
||||
|
||||
return test_cases;
|
||||
}
|
||||
|
@ -37,7 +37,8 @@
|
||||
#include <vector> // for vector
|
||||
|
||||
|
||||
float uniform() {
|
||||
float uniform()
|
||||
{
|
||||
std::random_device r;
|
||||
std::default_random_engine e1(r());
|
||||
std::uniform_real_distribution<float> uniform_dist(-1, 1);
|
||||
@ -60,8 +61,10 @@ void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n)
|
||||
|
||||
if (type.is_float)
|
||||
{
|
||||
if(type.size == 8) random_floats<double>((double *)data, n);
|
||||
else random_floats<float>((float *)data, n);
|
||||
if (type.size == 8)
|
||||
random_floats<double>((double *)data, n);
|
||||
else
|
||||
random_floats<float>((float *)data, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -75,22 +78,30 @@ void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n)
|
||||
switch (type.size)
|
||||
{
|
||||
case 8:
|
||||
if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
|
||||
else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
|
||||
if (type.is_signed)
|
||||
((int64_t *)data)[i] = (int64_t)scaled_rand;
|
||||
else
|
||||
((uint64_t *)data)[i] = (uint64_t)scaled_rand;
|
||||
break;
|
||||
case 4:
|
||||
if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
|
||||
else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
|
||||
if (type.is_signed)
|
||||
((int32_t *)data)[i] = (int32_t)scaled_rand;
|
||||
else
|
||||
((uint32_t *)data)[i] = (uint32_t)scaled_rand;
|
||||
break;
|
||||
case 2:
|
||||
// 16 bit multiplication saturates very fast
|
||||
// we produce here only 3 bits input range
|
||||
if(type.is_signed) ((int16_t *)data)[i] = (int16_t)((int16_t) scaled_rand % 8);
|
||||
else ((uint16_t *)data)[i] = (uint16_t) (int16_t)((int16_t) scaled_rand % 8);
|
||||
if (type.is_signed)
|
||||
((int16_t *)data)[i] = (int16_t)((int16_t)scaled_rand % 8);
|
||||
else
|
||||
((uint16_t *)data)[i] = (uint16_t)(int16_t)((int16_t)scaled_rand % 8);
|
||||
break;
|
||||
case 1:
|
||||
if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
|
||||
else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
|
||||
if (type.is_signed)
|
||||
((int8_t *)data)[i] = (int8_t)scaled_rand;
|
||||
else
|
||||
((uint8_t *)data)[i] = (uint8_t)scaled_rand;
|
||||
break;
|
||||
default:
|
||||
throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here
|
||||
@ -99,17 +110,20 @@ void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n)
|
||||
}
|
||||
}
|
||||
|
||||
static std::vector<std::string> get_arch_list(volk_gnsssdr_func_desc_t desc) {
|
||||
static std::vector<std::string> get_arch_list(volk_gnsssdr_func_desc_t desc)
|
||||
{
|
||||
std::vector<std::string> archlist;
|
||||
|
||||
for(size_t i = 0; i < desc.n_impls; i++) {
|
||||
for (size_t i = 0; i < desc.n_impls; i++)
|
||||
{
|
||||
archlist.push_back(std::string(desc.impl_names[i]));
|
||||
}
|
||||
|
||||
return archlist;
|
||||
}
|
||||
|
||||
volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) {
|
||||
volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name)
|
||||
{
|
||||
volk_gnsssdr_type_t type;
|
||||
type.is_float = false;
|
||||
type.is_scalar = false;
|
||||
@ -118,19 +132,22 @@ volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) {
|
||||
type.size = 0;
|
||||
type.str = name;
|
||||
|
||||
if(name.size() < 2) {
|
||||
if (name.size() < 2)
|
||||
{
|
||||
throw std::string("name too short to be a datatype");
|
||||
}
|
||||
|
||||
//is it a scalar?
|
||||
if(name[0] == 's') {
|
||||
if (name[0] == 's')
|
||||
{
|
||||
type.is_scalar = true;
|
||||
name = name.substr(1, name.size() - 1);
|
||||
}
|
||||
|
||||
//get the data size
|
||||
size_t last_size_pos = name.find_last_of("0123456789");
|
||||
if(last_size_pos == std::string::npos) {
|
||||
if (last_size_pos == std::string::npos)
|
||||
{
|
||||
throw std::string("no size spec in type ").append(name);
|
||||
}
|
||||
//will throw if malformed
|
||||
@ -139,8 +156,10 @@ volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) {
|
||||
assert(((size % 8) == 0) && (size <= 64) && (size != 0));
|
||||
type.size = size / 8; //in bytes
|
||||
|
||||
for(size_t i=last_size_pos+1; i < name.size(); i++) {
|
||||
switch (name[i]) {
|
||||
for (size_t i = last_size_pos + 1; i < name.size(); i++)
|
||||
{
|
||||
switch (name[i])
|
||||
{
|
||||
case 'f':
|
||||
type.is_float = true;
|
||||
break;
|
||||
@ -163,7 +182,8 @@ volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) {
|
||||
|
||||
static void get_signatures_from_name(std::vector<volk_gnsssdr_type_t> &inputsig,
|
||||
std::vector<volk_gnsssdr_type_t> &outputsig,
|
||||
std::string name) {
|
||||
std::string name)
|
||||
{
|
||||
boost::char_separator<char> sep("_");
|
||||
boost::tokenizer<boost::char_separator<char> > tok(name, sep);
|
||||
std::vector<std::string> toked;
|
||||
@ -176,79 +196,107 @@ static void get_signatures_from_name(std::vector<volk_gnsssdr_type_t> &inputsig,
|
||||
//ok. we're assuming a string in the form
|
||||
//(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
|
||||
|
||||
enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT;
|
||||
enum
|
||||
{
|
||||
SIDE_INPUT,
|
||||
SIDE_NAME,
|
||||
SIDE_OUTPUT
|
||||
} side = SIDE_INPUT;
|
||||
std::string fn_name;
|
||||
volk_gnsssdr_type_t type;
|
||||
BOOST_FOREACH(std::string token, toked) {
|
||||
try {
|
||||
BOOST_FOREACH (std::string token, toked)
|
||||
{
|
||||
try
|
||||
{
|
||||
type = volk_gnsssdr_type_from_string(token);
|
||||
if (side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name...
|
||||
|
||||
if(side == SIDE_INPUT) inputsig.push_back(type);
|
||||
else outputsig.push_back(type);
|
||||
} catch (...){
|
||||
if(token[0] == 'x' && (token.size() > 1) && (token[1] > '0' || token[1] < '9')) {
|
||||
if(side == SIDE_INPUT) assert(inputsig.size() > 0);
|
||||
else assert(outputsig.size() > 0);
|
||||
if (side == SIDE_INPUT)
|
||||
inputsig.push_back(type);
|
||||
else
|
||||
outputsig.push_back(type);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (token[0] == 'x' && (token.size() > 1) && (token[1] > '0' || token[1] < '9'))
|
||||
{
|
||||
if (side == SIDE_INPUT)
|
||||
assert(inputsig.size() > 0);
|
||||
else
|
||||
assert(outputsig.size() > 0);
|
||||
int multiplier = boost::lexical_cast<int>(token.substr(1, token.size() - 1)); //will throw if invalid ///////////
|
||||
for(int i=1; i<multiplier; i++) {
|
||||
if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
|
||||
else outputsig.push_back(outputsig.back());
|
||||
for (int i = 1; i < multiplier; i++)
|
||||
{
|
||||
if (side == SIDE_INPUT)
|
||||
inputsig.push_back(inputsig.back());
|
||||
else
|
||||
outputsig.push_back(outputsig.back());
|
||||
}
|
||||
}
|
||||
|
||||
else if(side == SIDE_INPUT) { //it's the function name, at least it better be
|
||||
else if (side == SIDE_INPUT)
|
||||
{ //it's the function name, at least it better be
|
||||
side = SIDE_NAME;
|
||||
fn_name.append("_");
|
||||
fn_name.append(token);
|
||||
}
|
||||
else if(side == SIDE_OUTPUT) {
|
||||
else if (side == SIDE_OUTPUT)
|
||||
{
|
||||
if (token != toked.back()) throw; //the last token in the name is the alignment
|
||||
}
|
||||
}
|
||||
}
|
||||
//we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input!
|
||||
assert(inputsig.size() != 0);
|
||||
|
||||
}
|
||||
|
||||
inline void run_cast_test1(volk_gnsssdr_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
inline void run_cast_test1(volk_gnsssdr_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch)
|
||||
{
|
||||
while (iter--) func(buffs[0], vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test2(volk_gnsssdr_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
inline void run_cast_test2(volk_gnsssdr_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch)
|
||||
{
|
||||
while (iter--) func(buffs[0], buffs[1], vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test3(volk_gnsssdr_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
inline void run_cast_test3(volk_gnsssdr_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch)
|
||||
{
|
||||
while (iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test4(volk_gnsssdr_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
inline void run_cast_test4(volk_gnsssdr_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch)
|
||||
{
|
||||
while (iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test1_s32f(volk_gnsssdr_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
inline void run_cast_test1_s32f(volk_gnsssdr_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch)
|
||||
{
|
||||
while (iter--) func(buffs[0], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test2_s32f(volk_gnsssdr_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
inline void run_cast_test2_s32f(volk_gnsssdr_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch)
|
||||
{
|
||||
while (iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test3_s32f(volk_gnsssdr_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
inline void run_cast_test3_s32f(volk_gnsssdr_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch)
|
||||
{
|
||||
while (iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test1_s32fc(volk_gnsssdr_fn_1arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
inline void run_cast_test1_s32fc(volk_gnsssdr_fn_1arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch)
|
||||
{
|
||||
while (iter--) func(buffs[0], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test2_s32fc(volk_gnsssdr_fn_2arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
inline void run_cast_test2_s32fc(volk_gnsssdr_fn_2arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch)
|
||||
{
|
||||
while (iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
inline void run_cast_test3_s32fc(volk_gnsssdr_fn_3arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
|
||||
inline void run_cast_test3_s32fc(volk_gnsssdr_fn_3arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch)
|
||||
{
|
||||
while (iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
|
||||
}
|
||||
|
||||
@ -299,26 +347,32 @@ inline void run_cast_test3_s16ic(volk_gnsssdr_fn_3arg_s16ic func, std::vector<vo
|
||||
// *************** ADDED BY GNSS-SDR. END
|
||||
|
||||
template <class t>
|
||||
bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
|
||||
bool fcompare(t *in1, t *in2, unsigned int vlen, float tol)
|
||||
{
|
||||
bool fail = false;
|
||||
int print_max_errs = 10;
|
||||
for(unsigned int i=0; i<vlen; i++) {
|
||||
for (unsigned int i = 0; i < vlen; i++)
|
||||
{
|
||||
// for very small numbers we'll see round off errors due to limited
|
||||
// precision. So a special test case...
|
||||
if(fabs(((t *)(in1))[i]) < 1e-30) {
|
||||
if (fabs(((t *)(in1))[i]) < 1e-30)
|
||||
{
|
||||
if (fabs(((t *)(in2))[i]) > tol)
|
||||
{
|
||||
fail = true;
|
||||
if(print_max_errs-- > 0) {
|
||||
if (print_max_errs-- > 0)
|
||||
{
|
||||
std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]);
|
||||
std::cout << " tolerance was: " << tol << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
// the primary test is the percent different greater than given tol
|
||||
else if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/fabs(((t *)in1)[i]) > tol) {
|
||||
else if (fabs(((t *)(in1))[i] - ((t *)(in2))[i]) / fabs(((t *)in1)[i]) > tol)
|
||||
{
|
||||
fail = true;
|
||||
if(print_max_errs-- > 0) {
|
||||
if (print_max_errs-- > 0)
|
||||
{
|
||||
std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]);
|
||||
std::cout << " tolerance was: " << tol << std::endl;
|
||||
}
|
||||
@ -329,30 +383,36 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
|
||||
}
|
||||
|
||||
template <class t>
|
||||
bool ccompare(t *in1, t *in2, unsigned int vlen, float tol) {
|
||||
bool ccompare(t *in1, t *in2, unsigned int vlen, float tol)
|
||||
{
|
||||
bool fail = false;
|
||||
int print_max_errs = 10;
|
||||
for(unsigned int i=0; i<2*vlen; i+=2) {
|
||||
for (unsigned int i = 0; i < 2 * vlen; i += 2)
|
||||
{
|
||||
t diff[2] = {in1[i] - in2[i], in1[i + 1] - in2[i + 1]};
|
||||
t err = std::sqrt(diff[0] * diff[0] + diff[1] * diff[1]);
|
||||
t norm = std::sqrt(in1[i] * in1[i] + in1[i + 1] * in1[i + 1]);
|
||||
|
||||
// for very small numbers we'll see round off errors due to limited
|
||||
// precision. So a special test case...
|
||||
if (norm < 1e-30) {
|
||||
if (norm < 1e-30)
|
||||
{
|
||||
if (err > tol)
|
||||
{
|
||||
fail = true;
|
||||
if(print_max_errs-- > 0) {
|
||||
if (print_max_errs-- > 0)
|
||||
{
|
||||
std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " << in1[i + 1] << "j in2: " << in2[i] << " + " << in2[i + 1] << "j";
|
||||
std::cout << " tolerance was: " << tol << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
// the primary test is the percent different greater than given tol
|
||||
else if((err / norm) > tol) {
|
||||
else if ((err / norm) > tol)
|
||||
{
|
||||
fail = true;
|
||||
if(print_max_errs-- > 0) {
|
||||
if (print_max_errs-- > 0)
|
||||
{
|
||||
std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " << in1[i + 1] << "j in2: " << in2[i] << " + " << in2[i + 1] << "j";
|
||||
std::cout << " tolerance was: " << tol << std::endl;
|
||||
}
|
||||
@ -363,13 +423,17 @@ bool ccompare(t *in1, t *in2, unsigned int vlen, float tol) {
|
||||
}
|
||||
|
||||
template <class t>
|
||||
bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
|
||||
bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol)
|
||||
{
|
||||
bool fail = false;
|
||||
int print_max_errs = 10;
|
||||
for(unsigned int i=0; i<vlen; i++) {
|
||||
if(((unsigned int)abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i]))) > tol) {
|
||||
for (unsigned int i = 0; i < vlen; i++)
|
||||
{
|
||||
if (((unsigned int)abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i]))) > tol)
|
||||
{
|
||||
fail = true;
|
||||
if(print_max_errs-- > 0) {
|
||||
if (print_max_errs-- > 0)
|
||||
{
|
||||
std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i]));
|
||||
std::cout << " tolerance was: " << tol << std::endl;
|
||||
}
|
||||
@ -379,21 +443,27 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
|
||||
return fail;
|
||||
}
|
||||
|
||||
class volk_gnsssdr_qa_aligned_mem_pool{
|
||||
class volk_gnsssdr_qa_aligned_mem_pool
|
||||
{
|
||||
public:
|
||||
void *get_new(size_t size){
|
||||
void *get_new(size_t size)
|
||||
{
|
||||
size_t alignment = volk_gnsssdr_get_alignment();
|
||||
void *ptr = volk_gnsssdr_malloc(size, alignment);
|
||||
memset(ptr, 0x00, size);
|
||||
_mems.push_back(ptr);
|
||||
return ptr;
|
||||
}
|
||||
~volk_gnsssdr_qa_aligned_mem_pool() {
|
||||
for(unsigned int ii = 0; ii < _mems.size(); ++ii) {
|
||||
~volk_gnsssdr_qa_aligned_mem_pool()
|
||||
{
|
||||
for (unsigned int ii = 0; ii < _mems.size(); ++ii)
|
||||
{
|
||||
volk_gnsssdr_free(_mems[ii]);
|
||||
}
|
||||
}
|
||||
private: std::vector<void * > _mems;
|
||||
|
||||
private:
|
||||
std::vector<void *> _mems;
|
||||
};
|
||||
|
||||
bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
@ -401,8 +471,7 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
std::string name,
|
||||
volk_gnsssdr_test_params_t test_params,
|
||||
std::vector<volk_gnsssdr_test_results_t> *results,
|
||||
std::string puppet_master_name
|
||||
)
|
||||
std::string puppet_master_name)
|
||||
{
|
||||
return run_volk_gnsssdr_tests(desc, manual_func, name, test_params.tol(), test_params.scalar(),
|
||||
test_params.vlen(), test_params.iter(), results, puppet_master_name,
|
||||
@ -439,7 +508,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
//first let's get a list of available architectures for the test
|
||||
std::vector<std::string> arch_list = get_arch_list(desc);
|
||||
|
||||
if((!benchmark_mode) && (arch_list.size() < 2)) {
|
||||
if ((!benchmark_mode) && (arch_list.size() < 2))
|
||||
{
|
||||
std::cout << "no architectures to test" << std::endl;
|
||||
return false;
|
||||
}
|
||||
@ -449,10 +519,12 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
|
||||
//now we have to get a function signature by parsing the name
|
||||
std::vector<volk_gnsssdr_type_t> inputsig, outputsig;
|
||||
try {
|
||||
try
|
||||
{
|
||||
get_signatures_from_name(inputsig, outputsig, name);
|
||||
}
|
||||
catch (boost::bad_lexical_cast& error) {
|
||||
catch (boost::bad_lexical_cast &error)
|
||||
{
|
||||
std::cerr << "Error: unable to get function signature from kernel name" << std::endl;
|
||||
std::cerr << " - " << name << std::endl;
|
||||
return false;
|
||||
@ -460,30 +532,37 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
|
||||
//pull the input scalars into their own vector
|
||||
std::vector<volk_gnsssdr_type_t> inputsc;
|
||||
for(size_t i=0; i<inputsig.size(); i++) {
|
||||
if(inputsig[i].is_scalar) {
|
||||
for (size_t i = 0; i < inputsig.size(); i++)
|
||||
{
|
||||
if (inputsig[i].is_scalar)
|
||||
{
|
||||
inputsc.push_back(inputsig[i]);
|
||||
inputsig.erase(inputsig.begin() + i);
|
||||
i -= 1;
|
||||
}
|
||||
}
|
||||
std::vector<void *> inbuffs;
|
||||
BOOST_FOREACH(volk_gnsssdr_type_t sig, inputsig) {
|
||||
BOOST_FOREACH (volk_gnsssdr_type_t sig, inputsig)
|
||||
{
|
||||
if (!sig.is_scalar) //we don't make buffers for scalars
|
||||
inbuffs.push_back(mem_pool.get_new(vlen * sig.size * (sig.is_complex ? 2 : 1)));
|
||||
}
|
||||
for(size_t i=0; i<inbuffs.size(); i++) {
|
||||
for (size_t i = 0; i < inbuffs.size(); i++)
|
||||
{
|
||||
load_random_data(inbuffs[i], inputsig[i], vlen);
|
||||
}
|
||||
|
||||
//ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
|
||||
std::vector<std::vector<void *> > test_data;
|
||||
for(size_t i=0; i<arch_list.size(); i++) {
|
||||
for (size_t i = 0; i < arch_list.size(); i++)
|
||||
{
|
||||
std::vector<void *> arch_buffs;
|
||||
for(size_t j=0; j<outputsig.size(); j++) {
|
||||
for (size_t j = 0; j < outputsig.size(); j++)
|
||||
{
|
||||
arch_buffs.push_back(mem_pool.get_new(vlen * outputsig[j].size * (outputsig[j].is_complex ? 2 : 1)));
|
||||
}
|
||||
for(size_t j=0; j<inputsig.size(); j++) {
|
||||
for (size_t j = 0; j < inputsig.size(); j++)
|
||||
{
|
||||
void *arch_inbuff = mem_pool.get_new(vlen * inputsig[j].size * (inputsig[j].is_complex ? 2 : 1));
|
||||
memcpy(arch_inbuff, inbuffs[j], vlen * inputsig[j].size * (inputsig[j].is_complex ? 2 : 1));
|
||||
arch_buffs.push_back(arch_inbuff);
|
||||
@ -499,7 +578,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
vlen = vlen - vlen_twiddle;
|
||||
std::chrono::time_point<std::chrono::system_clock> start, end;
|
||||
std::vector<double> profile_times;
|
||||
for(size_t i = 0; i < arch_list.size(); i++) {
|
||||
for (size_t i = 0; i < arch_list.size(); i++)
|
||||
{
|
||||
start = std::chrono::system_clock::now();
|
||||
|
||||
switch (both_sigs.size())
|
||||
@ -540,7 +620,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
}
|
||||
}
|
||||
//ADDED BY GNSS-SDR. END
|
||||
else throw "unsupported 1 arg function >1 scalars";
|
||||
else
|
||||
throw "unsupported 1 arg function >1 scalars";
|
||||
break;
|
||||
case 2:
|
||||
if (inputsc.size() == 0)
|
||||
@ -578,7 +659,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
}
|
||||
}
|
||||
//ADDED BY GNSS-SDR. END
|
||||
else throw "unsupported 2 arg function >1 scalars";
|
||||
else
|
||||
throw "unsupported 2 arg function >1 scalars";
|
||||
break;
|
||||
case 3:
|
||||
if (inputsc.size() == 0)
|
||||
@ -618,7 +700,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
}
|
||||
}
|
||||
//ADDED BY GNSS-SDR. END
|
||||
else throw "unsupported 3 arg function >1 scalars";
|
||||
else
|
||||
throw "unsupported 3 arg function >1 scalars";
|
||||
break;
|
||||
default:
|
||||
throw "no function handler for this signature";
|
||||
@ -642,8 +725,10 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
//and now compare each output to the generic output
|
||||
//first we have to know which output is the generic one, they aren't in order...
|
||||
size_t generic_offset = 0;
|
||||
for(size_t i=0; i<arch_list.size(); i++) {
|
||||
if (arch_list[i] == "generic") {
|
||||
for (size_t i = 0; i < arch_list.size(); i++)
|
||||
{
|
||||
if (arch_list[i] == "generic")
|
||||
{
|
||||
generic_offset = i;
|
||||
}
|
||||
}
|
||||
@ -795,9 +880,12 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
|
||||
std::cout << "Best aligned arch: " << best_arch_a << std::endl;
|
||||
std::cout << "Best unaligned arch: " << best_arch_u << std::endl;
|
||||
|
||||
if(puppet_master_name == "NULL") {
|
||||
if (puppet_master_name == "NULL")
|
||||
{
|
||||
results->back().config_name = name;
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
results->back().config_name = puppet_master_name;
|
||||
}
|
||||
results->back().best_arch_a = best_arch_a;
|
||||
|
@ -35,7 +35,8 @@
|
||||
/************************************************
|
||||
* VOLK QA type definitions *
|
||||
************************************************/
|
||||
struct volk_gnsssdr_type_t {
|
||||
struct volk_gnsssdr_type_t
|
||||
{
|
||||
bool is_float;
|
||||
bool is_scalar;
|
||||
bool is_signed;
|
||||
@ -44,7 +45,8 @@ struct volk_gnsssdr_type_t {
|
||||
std::string str;
|
||||
};
|
||||
|
||||
class volk_gnsssdr_test_time_t {
|
||||
class volk_gnsssdr_test_time_t
|
||||
{
|
||||
public:
|
||||
std::string name;
|
||||
double time;
|
||||
@ -52,7 +54,8 @@ class volk_gnsssdr_test_time_t {
|
||||
bool pass;
|
||||
};
|
||||
|
||||
class volk_gnsssdr_test_results_t {
|
||||
class volk_gnsssdr_test_results_t
|
||||
{
|
||||
public:
|
||||
std::string name;
|
||||
std::string config_name;
|
||||
@ -63,7 +66,8 @@ class volk_gnsssdr_test_results_t {
|
||||
std::string best_arch_u;
|
||||
};
|
||||
|
||||
class volk_gnsssdr_test_params_t {
|
||||
class volk_gnsssdr_test_params_t
|
||||
{
|
||||
private:
|
||||
float _tol;
|
||||
lv_32fc_t _scalar;
|
||||
@ -71,12 +75,11 @@ class volk_gnsssdr_test_params_t {
|
||||
unsigned int _iter;
|
||||
bool _benchmark_mode;
|
||||
std::string _kernel_regex;
|
||||
|
||||
public:
|
||||
// ctor
|
||||
volk_gnsssdr_test_params_t(float tol, lv_32fc_t scalar, unsigned int vlen, unsigned int iter,
|
||||
bool benchmark_mode, std::string kernel_regex) :
|
||||
_tol(tol), _scalar(scalar), _vlen(vlen), _iter(iter),
|
||||
_benchmark_mode(benchmark_mode), _kernel_regex(kernel_regex) {};
|
||||
bool benchmark_mode, std::string kernel_regex) : _tol(tol), _scalar(scalar), _vlen(vlen), _iter(iter), _benchmark_mode(benchmark_mode), _kernel_regex(kernel_regex){};
|
||||
// setters
|
||||
void set_tol(float tol) { _tol = tol; };
|
||||
void set_scalar(lv_32fc_t scalar) { _scalar = scalar; };
|
||||
@ -93,13 +96,15 @@ class volk_gnsssdr_test_params_t {
|
||||
std::string kernel_regex() { return _kernel_regex; };
|
||||
};
|
||||
|
||||
class volk_gnsssdr_test_case_t {
|
||||
class volk_gnsssdr_test_case_t
|
||||
{
|
||||
private:
|
||||
volk_gnsssdr_func_desc_t _desc;
|
||||
void (*_kernel_ptr)();
|
||||
std::string _name;
|
||||
volk_gnsssdr_test_params_t _test_parameters;
|
||||
std::string _puppet_master_name;
|
||||
|
||||
public:
|
||||
volk_gnsssdr_func_desc_t desc() { return _desc; };
|
||||
void (*kernel_ptr())() { return _kernel_ptr; };
|
||||
@ -108,16 +113,10 @@ class volk_gnsssdr_test_case_t {
|
||||
volk_gnsssdr_test_params_t test_parameters() { return _test_parameters; };
|
||||
// normal ctor
|
||||
volk_gnsssdr_test_case_t(volk_gnsssdr_func_desc_t desc, void (*kernel_ptr)(), std::string name,
|
||||
volk_gnsssdr_test_params_t test_parameters) :
|
||||
_desc(desc), _kernel_ptr(kernel_ptr), _name(name), _test_parameters(test_parameters),
|
||||
_puppet_master_name("NULL")
|
||||
{};
|
||||
volk_gnsssdr_test_params_t test_parameters) : _desc(desc), _kernel_ptr(kernel_ptr), _name(name), _test_parameters(test_parameters), _puppet_master_name("NULL"){};
|
||||
// ctor for puppets
|
||||
volk_gnsssdr_test_case_t(volk_gnsssdr_func_desc_t desc, void (*kernel_ptr)(), std::string name,
|
||||
std::string puppet_master_name, volk_gnsssdr_test_params_t test_parameters) :
|
||||
_desc(desc), _kernel_ptr(kernel_ptr), _name(name), _test_parameters(test_parameters),
|
||||
_puppet_master_name(puppet_master_name)
|
||||
{};
|
||||
std::string puppet_master_name, volk_gnsssdr_test_params_t test_parameters) : _desc(desc), _kernel_ptr(kernel_ptr), _name(name), _test_parameters(test_parameters), _puppet_master_name(puppet_master_name){};
|
||||
};
|
||||
|
||||
/************************************************
|
||||
@ -134,8 +133,7 @@ bool run_volk_gnsssdr_tests(
|
||||
std::string,
|
||||
volk_gnsssdr_test_params_t,
|
||||
std::vector<volk_gnsssdr_test_results_t> *results = NULL,
|
||||
std::string puppet_master_name = "NULL"
|
||||
);
|
||||
std::string puppet_master_name = "NULL");
|
||||
|
||||
bool run_volk_gnsssdr_tests(
|
||||
volk_gnsssdr_func_desc_t,
|
||||
@ -147,12 +145,12 @@ bool run_volk_gnsssdr_tests(
|
||||
unsigned int,
|
||||
std::vector<volk_gnsssdr_test_results_t> *results = NULL,
|
||||
std::string puppet_master_name = "NULL",
|
||||
bool benchmark_mode = false
|
||||
);
|
||||
bool benchmark_mode = false);
|
||||
|
||||
|
||||
#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) \
|
||||
BOOST_AUTO_TEST_CASE(func##_test) { \
|
||||
BOOST_AUTO_TEST_CASE(func##_test) \
|
||||
{ \
|
||||
BOOST_CHECK_EQUAL(run_volk_gnsssdr_tests( \
|
||||
func##_get_func_desc(), (void (*)())func##_manual, \
|
||||
std::string(#func), tol, scalar, len, iter, 0, "NULL"), \
|
||||
|
@ -49,20 +49,24 @@ int main()
|
||||
std::vector<std::string> qa_failures;
|
||||
std::vector<volk_gnsssdr_test_results_t> results;
|
||||
// Test every kernel reporting failures when they occur
|
||||
for(unsigned int ii = 0; ii < test_cases.size(); ++ii) {
|
||||
for (unsigned int ii = 0; ii < test_cases.size(); ++ii)
|
||||
{
|
||||
bool qa_result = false;
|
||||
volk_gnsssdr_test_case_t test_case = test_cases[ii];
|
||||
try {
|
||||
try
|
||||
{
|
||||
qa_result = run_volk_gnsssdr_tests(test_case.desc(), test_case.kernel_ptr(), test_case.name(),
|
||||
test_case.test_parameters(), &results, test_case.puppet_master_name());
|
||||
}
|
||||
catch(...) {
|
||||
catch (...)
|
||||
{
|
||||
// TODO: what exceptions might we need to catch and how do we handle them?
|
||||
std::cerr << "Exception found on kernel: " << test_case.name() << std::endl;
|
||||
qa_result = false;
|
||||
}
|
||||
|
||||
if(qa_result) {
|
||||
if (qa_result)
|
||||
{
|
||||
std::cerr << "Failure on " << test_case.name() << std::endl;
|
||||
qa_failures.push_back(test_case.name());
|
||||
}
|
||||
@ -74,9 +78,11 @@ int main()
|
||||
// Summarize QA results
|
||||
std::cerr << "Kernel QA finished: " << qa_failures.size() << " failures out of "
|
||||
<< test_cases.size() << " tests." << std::endl;
|
||||
if(qa_failures.size() > 0) {
|
||||
if (qa_failures.size() > 0)
|
||||
{
|
||||
std::cerr << "The following kernels failed QA:" << std::endl;
|
||||
for(unsigned int ii = 0; ii < qa_failures.size(); ++ii) {
|
||||
for (unsigned int ii = 0; ii < qa_failures.size(); ++ii)
|
||||
{
|
||||
std::cerr << " " << qa_failures[ii] << std::endl;
|
||||
}
|
||||
qa_ret_val = 1;
|
||||
@ -95,26 +101,28 @@ void print_qa_xml(std::vector<volk_gnsssdr_test_results_t> results, unsigned int
|
||||
qa_file.open(".unittest/kernels.xml");
|
||||
|
||||
qa_file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << std::endl;
|
||||
qa_file << "<testsuites name=\"kernels\" " <<
|
||||
"tests=\"" << results.size() << "\" " <<
|
||||
"failures=\"" << nfails << "\" id=\"1\">" << std::endl;
|
||||
qa_file << "<testsuites name=\"kernels\" "
|
||||
<< "tests=\"" << results.size() << "\" "
|
||||
<< "failures=\"" << nfails << "\" id=\"1\">" << std::endl;
|
||||
|
||||
// Results are in a vector by kernel. Each element has a result
|
||||
// map containing time and arch name with test result
|
||||
for(unsigned int ii=0; ii < results.size(); ++ii) {
|
||||
for (unsigned int ii = 0; ii < results.size(); ++ii)
|
||||
{
|
||||
volk_gnsssdr_test_results_t result = results[ii];
|
||||
qa_file << " <testsuite name=\"" << result.name << "\">" << std::endl;
|
||||
|
||||
std::map<std::string, volk_gnsssdr_test_time_t>::iterator kernel_time_pair;
|
||||
for(kernel_time_pair = result.results.begin(); kernel_time_pair != result.results.end(); ++kernel_time_pair) {
|
||||
for (kernel_time_pair = result.results.begin(); kernel_time_pair != result.results.end(); ++kernel_time_pair)
|
||||
{
|
||||
volk_gnsssdr_test_time_t test_time = kernel_time_pair->second;
|
||||
qa_file << " <testcase name=\"" << test_time.name << "\" " <<
|
||||
"classname=\"" << result.name << "\" " <<
|
||||
"time=\"" << test_time.time << "\">" << std::endl;
|
||||
qa_file << " <testcase name=\"" << test_time.name << "\" "
|
||||
<< "classname=\"" << result.name << "\" "
|
||||
<< "time=\"" << test_time.time << "\">" << std::endl;
|
||||
if (!test_time.pass)
|
||||
qa_file << " <failure " <<
|
||||
"message=\"fail on arch " << test_time.name << "\">" <<
|
||||
"</failure>" << std::endl;
|
||||
qa_file << " <failure "
|
||||
<< "message=\"fail on arch " << test_time.name << "\">"
|
||||
<< "</failure>" << std::endl;
|
||||
qa_file << " </testcase>" << std::endl;
|
||||
}
|
||||
qa_file << " </testsuite>" << std::endl;
|
||||
@ -123,6 +131,4 @@ void print_qa_xml(std::vector<volk_gnsssdr_test_results_t> results, unsigned int
|
||||
|
||||
qa_file << "</testsuites>" << std::endl;
|
||||
qa_file.close();
|
||||
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,8 @@ void *volk_gnsssdr_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"VOLK_GNSSSDR: Error allocating memory "
|
||||
"(posix_memalign: error %d: %s)\n", err, strerror(err));
|
||||
"(posix_memalign: error %d: %s)\n",
|
||||
err, strerror(err));
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
@ -112,8 +113,7 @@ volk_gnsssdr_malloc(size_t size, size_t alignment)
|
||||
return user;
|
||||
}
|
||||
|
||||
void
|
||||
volk_gnsssdr_free(void *ptr)
|
||||
void volk_gnsssdr_free(void *ptr)
|
||||
{
|
||||
struct block_info *info;
|
||||
|
||||
|
@ -31,7 +31,8 @@ void volk_gnsssdr_get_config_path(char *path)
|
||||
|
||||
//allows config redirection via env variable
|
||||
home = getenv("VOLK_CONFIGPATH");
|
||||
if(home!=NULL){
|
||||
if (home != NULL)
|
||||
{
|
||||
strncpy(path, home, 512);
|
||||
strcat(path, suffix2);
|
||||
return;
|
||||
|
@ -23,7 +23,8 @@
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
int volk_gnsssdr_get_index(
|
||||
|
@ -37,13 +37,17 @@ struct volk_gnsssdr_machine *get_machine(void)
|
||||
|
||||
if (machine != NULL)
|
||||
return machine;
|
||||
else {
|
||||
else
|
||||
{
|
||||
unsigned int max_score = 0;
|
||||
unsigned int i;
|
||||
struct volk_gnsssdr_machine *max_machine = NULL;
|
||||
for(i=0; i<n_volk_gnsssdr_machines; i++) {
|
||||
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
|
||||
if(volk_gnsssdr_machines[i]->caps > max_score) {
|
||||
for (i = 0; i < n_volk_gnsssdr_machines; i++)
|
||||
{
|
||||
if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch())))
|
||||
{
|
||||
if (volk_gnsssdr_machines[i]->caps > max_score)
|
||||
{
|
||||
max_score = volk_gnsssdr_machines[i]->caps;
|
||||
max_machine = volk_gnsssdr_machines[i];
|
||||
}
|
||||
@ -63,8 +67,10 @@ void volk_gnsssdr_list_machines(void)
|
||||
extern unsigned int n_volk_gnsssdr_machines;
|
||||
|
||||
unsigned int i;
|
||||
for(i=0; i<n_volk_gnsssdr_machines; i++) {
|
||||
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
|
||||
for (i = 0; i < n_volk_gnsssdr_machines; i++)
|
||||
{
|
||||
if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch())))
|
||||
{
|
||||
printf("%s;", volk_gnsssdr_machines[i]->name);
|
||||
}
|
||||
}
|
||||
@ -79,13 +85,17 @@ const char* volk_gnsssdr_get_machine(void)
|
||||
|
||||
if (machine != NULL)
|
||||
return machine->name;
|
||||
else {
|
||||
else
|
||||
{
|
||||
unsigned int max_score = 0;
|
||||
unsigned int i;
|
||||
struct volk_gnsssdr_machine *max_machine = NULL;
|
||||
for(i=0; i<n_volk_gnsssdr_machines; i++) {
|
||||
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
|
||||
if(volk_gnsssdr_machines[i]->caps > max_score) {
|
||||
for (i = 0; i < n_volk_gnsssdr_machines; i++)
|
||||
{
|
||||
if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch())))
|
||||
{
|
||||
if (volk_gnsssdr_machines[i]->caps > max_score)
|
||||
{
|
||||
max_score = volk_gnsssdr_machines[i]->caps;
|
||||
max_machine = volk_gnsssdr_machines[i];
|
||||
}
|
||||
@ -118,8 +128,7 @@ bool volk_gnsssdr_is_aligned(const void *ptr)
|
||||
|
||||
static inline void __${kern.name}_d(${kern.arglist_full})
|
||||
{
|
||||
%if kern.has_dispatcher:
|
||||
${kern.name}_dispatcher(${kern.arglist_names});
|
||||
% if kern.has_dispatcher : ${kern.name} _dispatcher(${kern.arglist_names});
|
||||
return;
|
||||
%endif
|
||||
|
||||
@ -183,14 +192,13 @@ void ${kern.name}_manual(${kern.arglist_full}, const char* impl_name)
|
||||
const int index = volk_gnsssdr_get_index(
|
||||
get_machine()->${kern.name} _impl_names,
|
||||
get_machine()->${kern.name} _n_impls,
|
||||
impl_name
|
||||
);
|
||||
impl_name);
|
||||
get_machine()->${kern.name} _impls[index](
|
||||
${kern.arglist_names}
|
||||
);
|
||||
${kern.arglist_names});
|
||||
}
|
||||
|
||||
volk_gnsssdr_func_desc_t ${kern.name}_get_func_desc(void) {
|
||||
volk_gnsssdr_func_desc_t ${kern.name} _get_func_desc(void)
|
||||
{
|
||||
const char **impl_names = get_machine()->${kern.name} _impl_names;
|
||||
const int *impl_deps = get_machine()->${kern.name} _impl_deps;
|
||||
const bool *alignment = get_machine()->${kern.name} _impl_alignment;
|
||||
@ -199,8 +207,7 @@ volk_gnsssdr_func_desc_t ${kern.name}_get_func_desc(void) {
|
||||
impl_names,
|
||||
impl_deps,
|
||||
alignment,
|
||||
n_impls
|
||||
};
|
||||
n_impls};
|
||||
return desc;
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,8 @@
|
||||
|
||||
%for i, arch in enumerate(archs):
|
||||
//#ifndef LV_${arch.name.upper()}
|
||||
#define LV_${arch.name.upper()} ${i}
|
||||
#define LV_$ \
|
||||
{arch.name.upper()} $ { i }
|
||||
//#endif
|
||||
%endfor
|
||||
|
||||
|
@ -40,9 +40,12 @@ struct VOLK_CPU volk_gnsssdr_cpu;
|
||||
* check for AVX capability before executing.
|
||||
*/
|
||||
#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) && defined(HAVE_XGETBV)
|
||||
static inline unsigned long long _xgetbv(unsigned int index){
|
||||
static inline unsigned long long _xgetbv(unsigned int index)
|
||||
{
|
||||
unsigned int eax, edx;
|
||||
__VOLK_ASM __VOLK_VOLATILE ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
||||
__VOLK_ASM __VOLK_VOLATILE("xgetbv"
|
||||
: "=a"(eax), "=d"(edx)
|
||||
: "c"(index));
|
||||
return ((unsigned long long)edx << 32) | eax;
|
||||
}
|
||||
#define __xgetbv() _xgetbv(0)
|
||||
@ -67,7 +70,8 @@ struct VOLK_CPU volk_gnsssdr_cpu;
|
||||
|
||||
#endif //defined(VOLK_CPU_x86)
|
||||
|
||||
static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit) {
|
||||
static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit)
|
||||
{
|
||||
#if defined(VOLK_CPU_x86)
|
||||
unsigned int regs[4] = {0};
|
||||
cpuid_x86_count(level, count, regs);
|
||||
@ -77,7 +81,8 @@ static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) {
|
||||
static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit)
|
||||
{
|
||||
#if defined(VOLK_CPU_x86)
|
||||
unsigned int regs[4];
|
||||
memset(regs, 0, sizeof(unsigned int) * 4);
|
||||
@ -88,7 +93,8 @@ static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsi
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned int check_extended_cpuid(unsigned int val) {
|
||||
static inline unsigned int check_extended_cpuid(unsigned int val)
|
||||
{
|
||||
#if defined(VOLK_CPU_x86)
|
||||
unsigned int regs[4];
|
||||
memset(regs, 0, sizeof(unsigned int) * 4);
|
||||
@ -99,7 +105,8 @@ static inline unsigned int check_extended_cpuid(unsigned int val) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned int get_avx_enabled(void) {
|
||||
static inline unsigned int get_avx_enabled(void)
|
||||
{
|
||||
#if defined(VOLK_CPU_x86)
|
||||
return __xgetbv() & 0x6;
|
||||
#else
|
||||
@ -107,7 +114,8 @@ static inline unsigned int get_avx_enabled(void) {
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned int get_avx2_enabled(void) {
|
||||
static inline unsigned int get_avx2_enabled(void)
|
||||
{
|
||||
#if defined(VOLK_CPU_x86)
|
||||
return __xgetbv() & 0x6;
|
||||
#else
|
||||
@ -123,7 +131,8 @@ static inline unsigned int get_avx2_enabled(void) {
|
||||
#define VOLK_CPU_ARM
|
||||
#endif
|
||||
|
||||
static int has_neon(void){
|
||||
static int has_neon(void)
|
||||
{
|
||||
#if defined(VOLK_CPU_ARM)
|
||||
FILE *auxvec_f;
|
||||
unsigned long auxvec[2];
|
||||
@ -134,7 +143,8 @@ static int has_neon(void){
|
||||
size_t r = 1;
|
||||
//so auxv is basically 32b of ID and 32b of value
|
||||
//so it goes like this
|
||||
while(!found_neon && r) {
|
||||
while (!found_neon && r)
|
||||
{
|
||||
r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
|
||||
if ((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON))
|
||||
found_neon = 1;
|
||||
@ -148,50 +158,59 @@ static int has_neon(void){
|
||||
}
|
||||
|
||||
%for arch in archs:
|
||||
static int i_can_has_${arch.name} (void) {
|
||||
static int i_can_has_${arch.name} (void)
|
||||
{
|
||||
%for check, params in arch.checks:
|
||||
if (${check}(<% joined_params = ', '.join(params)%>${joined_params}) == 0) return 0;
|
||||
%endfor
|
||||
return 1;
|
||||
% endfor return 1;
|
||||
}
|
||||
% endfor
|
||||
|
||||
#if defined(HAVE_FENV_H)
|
||||
#if defined(FE_TONEAREST)
|
||||
#include <fenv.h>
|
||||
static inline void set_float_rounding(void){
|
||||
static inline void
|
||||
set_float_rounding(void)
|
||||
{
|
||||
fesetround(FE_TONEAREST);
|
||||
}
|
||||
#else
|
||||
static inline void set_float_rounding(void){
|
||||
static inline void
|
||||
set_float_rounding(void)
|
||||
{
|
||||
//do nothing
|
||||
}
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#include <float.h>
|
||||
static inline void set_float_rounding(void){
|
||||
static inline void
|
||||
set_float_rounding(void)
|
||||
{
|
||||
unsigned int cwrd;
|
||||
_controlfp_s(&cwrd, 0, 0);
|
||||
_controlfp_s(&cwrd, _RC_NEAR, _MCW_RC);
|
||||
}
|
||||
#else
|
||||
static inline void set_float_rounding(void){
|
||||
static inline void
|
||||
set_float_rounding(void)
|
||||
{
|
||||
//do nothing
|
||||
}
|
||||
#endif
|
||||
|
||||
void volk_gnsssdr_cpu_init() {
|
||||
void volk_gnsssdr_cpu_init()
|
||||
{
|
||||
%for arch in archs:
|
||||
volk_gnsssdr_cpu.has_${arch.name} = &i_can_has_${arch.name};
|
||||
% endfor
|
||||
set_float_rounding();
|
||||
}
|
||||
|
||||
unsigned int volk_gnsssdr_get_lvarch() {
|
||||
unsigned int volk_gnsssdr_get_lvarch()
|
||||
{
|
||||
unsigned int retval = 0;
|
||||
volk_gnsssdr_cpu_init();
|
||||
%for arch in archs:
|
||||
retval += volk_gnsssdr_cpu.has_${arch.name}() << LV_${arch.name.upper()};
|
||||
%endfor
|
||||
return retval;
|
||||
% endfor return retval;
|
||||
}
|
||||
|
@ -23,7 +23,8 @@
|
||||
|
||||
__VOLK_DECL_BEGIN
|
||||
|
||||
struct VOLK_CPU {
|
||||
struct VOLK_CPU
|
||||
{
|
||||
%for arch in archs:
|
||||
int (*has_${arch.name}) ();
|
||||
% endfor
|
||||
|
@ -20,7 +20,11 @@
|
||||
<% arch_names = this_machine.arch_names %>
|
||||
|
||||
%for arch in this_machine.archs:
|
||||
#define LV_HAVE_${arch.name.upper()} 1
|
||||
#define LV_HAVE_$ \
|
||||
{ \
|
||||
arch.name.upper() \
|
||||
} \
|
||||
1
|
||||
%endfor
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||
@ -35,7 +39,9 @@
|
||||
#include <volk_gnsssdr/${kern.name}.h>
|
||||
%endfor
|
||||
|
||||
struct volk_gnsssdr_machine volk_gnsssdr_machine_${this_machine.name} = {
|
||||
struct volk_gnsssdr_machine volk_gnsssdr_machine_$
|
||||
{
|
||||
this_machine.name} = {
|
||||
<% make_arch_have_list = (' | '.join(['(1 << LV_%s)'%a.name.upper() for a in this_machine.archs])) %> ${make_arch_have_list},
|
||||
<% this_machine_name = "\""+this_machine.name+"\"" %> ${this_machine_name},
|
||||
${this_machine.alignment},
|
||||
|
@ -27,7 +27,8 @@
|
||||
|
||||
__VOLK_DECL_BEGIN
|
||||
|
||||
struct volk_gnsssdr_machine {
|
||||
struct volk_gnsssdr_machine
|
||||
{
|
||||
const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_gnsssdr_get_lvarch format)
|
||||
const char *name;
|
||||
const size_t alignment; //the maximum byte alignment required for functions in this library
|
||||
@ -43,7 +44,10 @@ struct volk_gnsssdr_machine {
|
||||
|
||||
%for machine in machines:
|
||||
#ifdef LV_MACHINE_${machine.name.upper() }
|
||||
extern struct volk_gnsssdr_machine volk_gnsssdr_machine_${machine.name};
|
||||
extern struct volk_gnsssdr_machine volk_gnsssdr_machine_$
|
||||
{
|
||||
machine.name
|
||||
};
|
||||
#endif
|
||||
% endfor
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user