1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-12-15 20:50:33 +00:00

Apply automated code formatting to volk-gnsssdr

See http://gnss-sdr.org/coding-style/#use-tools-for-automated-code-formatting
This commit is contained in:
Carles Fernandez 2018-03-03 12:09:45 +01:00
parent f924005733
commit 891478cf2c
75 changed files with 6642 additions and 6120 deletions

View File

@ -54,8 +54,10 @@ int main(int argc, char **argv)
our_options.add(option_t("cc", "", "print the VOLK_GNSSDR C compiler version", volk_gnsssdr_c_compiler()));
our_options.add(option_t("cflags", "", "print the VOLK_GNSSSDR CFLAGS", volk_gnsssdr_compiler_flags()));
our_options.add(option_t("all-machines", "", "print VOLK_GNSSSDR machines built", volk_gnsssdr_available_machines()));
our_options.add(option_t("avail-machines", "", "print VOLK_GNSSSDR machines on the current "
"platform", volk_gnsssdr_list_machines));
our_options.add(option_t("avail-machines", "",
"print VOLK_GNSSSDR machines on the current "
"platform",
volk_gnsssdr_list_machines));
our_options.add(option_t("machine", "", "print the current VOLK_GNSSSDR machine that will be used",
volk_gnsssdr_get_machine()));
our_options.add(option_t("alignment", "", "print the memory alignment", print_alignment));

View File

@ -25,7 +25,6 @@
#include <utility> // for pair
/*
* Option type
*/
@ -70,55 +69,74 @@ option_t::option_t(std::string longform, std::string shortform, std::string msg,
* Option List
*/
option_list::option_list(std::string program_name) :
program_name(program_name) {
{ internal_list = std::vector<option_t>(); }
option_list::option_list(std::string program_name) : program_name(program_name)
{
{
internal_list = std::vector<option_t>();
}
}
void option_list::add(const option_t &opt) { internal_list.push_back(opt); }
void option_list::parse(int argc, char **argv) {
for (int arg_number = 0; arg_number < argc; ++arg_number) {
void option_list::parse(int argc, char **argv)
{
for (int arg_number = 0; arg_number < argc; ++arg_number)
{
for (std::vector<option_t>::iterator this_option = internal_list.begin();
this_option != internal_list.end();
this_option++) {
this_option++)
{
if (this_option->longform == std::string(argv[arg_number]) ||
this_option->shortform == std::string(argv[arg_number])) {
switch (this_option->option_type) {
this_option->shortform == std::string(argv[arg_number]))
{
switch (this_option->option_type)
{
case VOID_CALLBACK:
this_option->callback();
break;
case INT_CALLBACK:
try {
try
{
int int_val = std::stoi(argv[++arg_number]);
((void (*)(int))this_option->callback)(int_val);
} catch (std::exception &exc) {
}
catch (std::exception &exc)
{
std::cout << "An int option can only receive a number" << std::endl;
throw std::exception();
};
break;
case FLOAT_CALLBACK:
try {
try
{
int int_val = std::stof(argv[++arg_number]);
((void (*)(float))this_option->callback)(int_val);
} catch (std::exception &exc) {
}
catch (std::exception &exc)
{
std::cout << "A float option can only receive a number" << std::endl;
throw std::exception();
};
break;
case BOOL_CALLBACK:
try {
try
{
bool int_val = (bool)std::stoi(argv[++arg_number]);
((void (*)(bool))this_option->callback)(int_val);
} catch (std::exception &exc) {
}
catch (std::exception &exc)
{
std::cout << "A bool option can only receive 0 or 1" << std::endl;
throw std::exception();
};
break;
case STRING_CALLBACK:
try {
try
{
((void (*)(std::string))this_option->callback)(argv[++arg_number]);
} catch (std::exception &exc) {
}
catch (std::exception &exc)
{
throw std::exception();
};
break;
@ -132,26 +150,33 @@ void option_list::parse(int argc, char **argv) {
}
}
if (std::string("--help") == std::string(argv[arg_number]) ||
std::string("-h") == std::string(argv[arg_number])) {
std::string("-h") == std::string(argv[arg_number]))
{
help();
}
}
}
void option_list::help() {
void option_list::help()
{
std::cout << program_name << std::endl;
std::cout << " -h [ --help ] \t\tDisplay this help message" << std::endl;
for (std::vector<option_t>::iterator this_option = internal_list.begin();
this_option != internal_list.end();
this_option++) {
this_option++)
{
std::string help_line(" ");
if (this_option->shortform == "-") {
if (this_option->shortform == "-")
{
help_line += this_option->longform + " ";
} else {
}
else
{
help_line += this_option->shortform + " [ " + this_option->longform + " ]";
}
switch (help_line.size() / 8) {
switch (help_line.size() / 8)
{
case 0:
help_line += "\t\t\t\t";
break;

View File

@ -36,7 +36,8 @@ typedef enum
STRING,
} VOLK_OPTYPE;
class option_t {
class option_t
{
public:
option_t(std::string longform, std::string shortform, std::string msg, void (*callback)());
option_t(std::string longform, std::string shortform, std::string msg, void (*callback)(int));
@ -51,7 +52,6 @@ public:
VOLK_OPTYPE option_type;
std::string printval;
void (*callback)();
};
class option_list
@ -64,6 +64,7 @@ public:
void parse(int argc, char **argv);
void help();
private:
std::string program_name;
std::vector<option_t> internal_list;

View File

@ -34,7 +34,6 @@
#include <vector> // for vector, vector<>::const_..
namespace fs = boost::filesystem;
volk_gnsssdr_test_params_t test_params(1e-6f, 327.f, 8111, 1987, false, "");
@ -75,9 +74,11 @@ int main(int argc, char *argv[])
return 1;
}
for (int arg_number = 0; arg_number < argc; ++arg_number) {
for (int arg_number = 0; arg_number < argc; ++arg_number)
{
if (std::string("--help") == std::string(argv[arg_number]) ||
std::string("-h") == std::string(argv[arg_number])) {
std::string("-h") == std::string(argv[arg_number]))
{
return 0;
}
}
@ -85,19 +86,24 @@ int main(int argc, char *argv[])
std::ofstream json_file;
std::string config_file;
if ( json_filename != "" ) {
if (json_filename != "")
{
json_file.open(json_filename.c_str());
}
if ( volk_config_path != "" ) {
if (volk_config_path != "")
{
config_file = volk_config_path + "/volk_config";
}
// Run tests
std::vector<volk_gnsssdr_test_results_t> results;
if(update_mode) {
if( config_file != "" ) read_results(&results, config_file);
else read_results(&results);
if (update_mode)
{
if (config_file != "")
read_results(&results, config_file);
else
read_results(&results);
}
// Initialize the list of tests
@ -105,35 +111,43 @@ int main(int argc, char *argv[])
// Iterate through list of tests running each one
std::string substr_to_match(test_params.kernel_regex());
for(unsigned int ii = 0; ii < test_cases.size(); ++ii) {
for (unsigned int ii = 0; ii < test_cases.size(); ++ii)
{
bool regex_match = true;
volk_gnsssdr_test_case_t test_case = test_cases[ii];
// if the kernel name matches regex then do the test
std::string test_case_name = test_case.name();
if(test_case_name.find(substr_to_match) == std::string::npos) {
if (test_case_name.find(substr_to_match) == std::string::npos)
{
regex_match = false;
}
// if we are in update mode check if we've already got results
// if we have any, then no need to test that kernel
bool update = true;
if(update_mode) {
for(unsigned int jj=0; jj < results.size(); ++jj) {
if (update_mode)
{
for (unsigned int jj = 0; jj < results.size(); ++jj)
{
if (results[jj].name == test_case.name() ||
results[jj].name == test_case.puppet_master_name()) {
results[jj].name == test_case.puppet_master_name())
{
update = false;
break;
}
}
}
if( regex_match && update ) {
try {
if (regex_match && update)
{
try
{
run_volk_gnsssdr_tests(test_case.desc(), test_case.kernel_ptr(), test_case.name(),
test_case.test_parameters(), &results, test_case.puppet_master_name());
}
catch (std::string &error) {
catch (std::string &error)
{
std::cerr << "Caught Exception in 'run_volk_gnssdr_tests': " << error << std::endl;
}
}
@ -141,16 +155,21 @@ int main(int argc, char *argv[])
// Output results according to provided options
if(json_filename != "") {
if (json_filename != "")
{
write_json(json_file, results);
json_file.close();
}
if(!dry_run) {
if(config_file != "") write_results(&results, false, config_file);
else write_results(&results, false);
if (!dry_run)
{
if (config_file != "")
write_results(&results, false, config_file);
else
write_results(&results, false);
}
else {
else
{
std::cout << "Warning: this was a dry-run. Config not generated" << std::endl;
}
}
@ -169,11 +188,13 @@ void read_results(std::vector<volk_gnsssdr_test_results_t> *results, std::string
struct stat buffer;
bool config_status = (stat(path.c_str(), &buffer) == 0);
if( config_status ) {
if (config_status)
{
// a config exists and we are reading results from it
std::ifstream config(path.c_str());
char config_line[256];
while(config.getline(config_line, 255)) {
while (config.getline(config_line, 255))
{
// tokenize the input line by kernel_name unaligned aligned
// then push back in the results vector with fields filled in
@ -184,13 +205,15 @@ void read_results(std::vector<volk_gnsssdr_test_results_t> *results, std::string
found = config_str.find(' ');
// Split line by spaces
while(found && found < str_size) {
while (found && found < str_size)
{
found = config_str.find(' ');
// kernel names MUST be less than 128 chars, which is
// a length restricted by volk/volk_prefs.c
// on the last token in the parsed string we won't find a space
// so make sure we copy at most 128 chars.
if(found > 127) {
if (found > 127)
{
found = 127;
}
str_size = config_str.size();
@ -201,7 +224,8 @@ void read_results(std::vector<volk_gnsssdr_test_results_t> *results, std::string
config_str.erase(0, found + 1);
}
if(single_kernel_result.size() == 3) {
if (single_kernel_result.size() == 3)
{
volk_gnsssdr_test_results_t kernel_result;
kernel_result.name = std::string(single_kernel_result[0]);
kernel_result.config_name = std::string(single_kernel_result[0]);
@ -211,7 +235,6 @@ void read_results(std::vector<volk_gnsssdr_test_results_t> *results, std::string
}
}
}
}
void write_results(const std::vector<volk_gnsssdr_test_results_t> *results, bool update_result)
@ -234,17 +257,21 @@ void write_results(const std::vector<volk_gnsssdr_test_results_t> *results, bool
}
std::ofstream config;
if(update_result) {
if (update_result)
{
std::cout << "Updating " << path << " ..." << std::endl;
config.open(path.c_str(), std::ofstream::app);
if (!config.is_open()) { //either we don't have write access or we don't have the dir yet
if (!config.is_open())
{ //either we don't have write access or we don't have the dir yet
std::cout << "Error opening file " << path << std::endl;
}
}
else {
else
{
std::cout << "Writing " << path << " ..." << std::endl;
config.open(path.c_str());
if (!config.is_open()) { //either we don't have write access or we don't have the dir yet
if (!config.is_open())
{ //either we don't have write access or we don't have the dir yet
std::cout << "Error opening file " << path << std::endl;
}
@ -255,7 +282,8 @@ void write_results(const std::vector<volk_gnsssdr_test_results_t> *results, bool
}
std::vector<volk_gnsssdr_test_results_t>::const_iterator profile_results;
for(profile_results = results->begin(); profile_results != results->end(); ++profile_results) {
for (profile_results = results->begin(); profile_results != results->end(); ++profile_results)
{
config << profile_results->config_name << " "
<< profile_results->best_arch_a << " "
<< profile_results->best_arch_u << std::endl;
@ -270,7 +298,8 @@ void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_
size_t len = results.size();
size_t i = 0;
std::vector<volk_gnsssdr_test_results_t>::iterator result;
for(result = results.begin(); result != results.end(); ++result) {
for (result = results.begin(); result != results.end(); ++result)
{
json_file << " {" << std::endl;
json_file << " \"name\": \"" << result->name << "\"," << std::endl;
json_file << " \"vlen\": " << (int)(result->vlen) << "," << std::endl;
@ -284,14 +313,16 @@ void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_
size_t ri = 0;
std::map<std::string, volk_gnsssdr_test_time_t>::iterator kernel_time_pair;
for(kernel_time_pair = result->results.begin(); kernel_time_pair != result->results.end(); ++kernel_time_pair) {
for (kernel_time_pair = result->results.begin(); kernel_time_pair != result->results.end(); ++kernel_time_pair)
{
volk_gnsssdr_test_time_t time = kernel_time_pair->second;
json_file << " \"" << time.name << "\": {" << std::endl;
json_file << " \"name\": \"" << time.name << "\"," << std::endl;
json_file << " \"time\": " << time.time << "," << std::endl;
json_file << " \"units\": \"" << time.units << "\"" << std::endl;
json_file << " }";
if(ri+1 != results_len) {
if (ri + 1 != results_len)
{
json_file << ",";
}
json_file << std::endl;
@ -299,7 +330,8 @@ void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_
}
json_file << " }" << std::endl;
json_file << " }";
if(i+1 != len) {
if (i + 1 != len)
{
json_file << ",";
}
json_file << std::endl;
@ -308,5 +340,3 @@ void write_json(std::ofstream &json_file, std::vector<volk_gnsssdr_test_results_
json_file << " ]" << std::endl;
json_file << "}" << std::endl;
}

View File

@ -40,19 +40,22 @@ _mm256_complexmul_ps(__m256 x, __m256 y)
}
static inline __m256
_mm256_conjugate_ps(__m256 x){
_mm256_conjugate_ps(__m256 x)
{
const __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
return _mm256_xor_ps(x, conjugator); // conjugate y
}
static inline __m256
_mm256_complexconjugatemul_ps(__m256 x, __m256 y){
_mm256_complexconjugatemul_ps(__m256 x, __m256 y)
{
y = _mm256_conjugate_ps(y);
return _mm256_complexmul_ps(x, y);
}
static inline __m256
_mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2){
_mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2)
{
__m256 complex1, complex2;
cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
@ -61,7 +64,8 @@ _mm256_magnitudesquared_ps(__m256 cplxValue1, __m256 cplxValue2){
return _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
}
static inline __m256 _mm256_complexnormalise_ps( __m256 z ){
static inline __m256 _mm256_complexnormalise_ps(__m256 z)
{
__m256 tmp1 = _mm256_mul_ps(z, z);
__m256 tmp2 = _mm256_hadd_ps(tmp1, tmp1);
tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
@ -70,7 +74,8 @@ static inline __m256 _mm256_complexnormalise_ps( __m256 z ){
}
static inline __m256
_mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2){
_mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2)
{
return _mm256_sqrt_ps(_mm256_magnitudesquared_ps(cplxValue1, cplxValue2));
}

View File

@ -91,7 +91,9 @@
// FIXME: due to the usage of complex.h, require gcc for c-linkage
////////////////////////////////////////////////////////////////////////
#if defined(__cplusplus) && (__GNUC__)
# define __VOLK_DECL_BEGIN extern "C" {
#define __VOLK_DECL_BEGIN \
extern "C" \
{
#define __VOLK_DECL_END }
#else
#define __VOLK_DECL_BEGIN
@ -121,7 +123,8 @@
#endif
#endif
union bit128{
union bit128
{
uint8_t i8[16];
uint16_t i16[8];
uint32_t i[4];
@ -138,7 +141,8 @@ union bit128{
#endif
};
union bit256{
union bit256
{
uint8_t i8[32];
uint16_t i16[16];
uint32_t i[8];

View File

@ -55,19 +55,27 @@ typedef std::complex<int64_t> lv_64sc_t;
typedef std::complex<float> lv_32fc_t;
typedef std::complex<double> lv_64fc_t;
template <typename T> inline std::complex<T> lv_cmake(const T &r, const T &i){
template <typename T>
inline std::complex<T> lv_cmake(const T &r, const T &i)
{
return std::complex<T>(r, i);
}
template <typename T> inline typename T::value_type lv_creal(const T &x){
template <typename T>
inline typename T::value_type lv_creal(const T &x)
{
return x.real();
}
template <typename T> inline typename T::value_type lv_cimag(const T &x){
template <typename T>
inline typename T::value_type lv_cimag(const T &x)
{
return x.imag();
}
template <typename T> inline T lv_conj(const T &x){
template <typename T>
inline T lv_conj(const T &x)
{
return std::conj(x);
}

View File

@ -48,14 +48,16 @@ _mm_complexconjugatemul_ps(__m128 x, __m128 y)
}
static inline __m128
_mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2){
_mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
{
cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
return _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
}
static inline __m128
_mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2){
_mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
{
return _mm_sqrt_ps(_mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2));
}

View File

@ -27,7 +27,8 @@
#include <xmmintrin.h>
static inline __m128
_mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2){
_mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2)
{
__m128 iValue, qValue;
// Arrange in i1i2i3i4 format
iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
@ -39,7 +40,8 @@ _mm_magnitudesquared_ps(__m128 cplxValue1, __m128 cplxValue2){
}
static inline __m128
_mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2){
_mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
{
return _mm_sqrt_ps(_mm_magnitudesquared_ps(cplxValue1, cplxValue2));
}

View File

@ -279,4 +279,3 @@ static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_neon(int16_t* result,
#endif
#endif // INCLUDED_volk_gnsssdr_16i_resamplerpuppet_16i_H

View File

@ -107,7 +107,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse4_1(int16_t** resul
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -173,7 +174,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse4_1(int16_t** resul
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -240,7 +242,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_sse3(int16_t** result,
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -310,7 +313,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_sse3(int16_t** result,
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -379,7 +383,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_a_avx(int16_t** result,
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
__VOLK_ATTR_ALIGNED(32)
int local_code_chip_index[8];
int local_code_chip_index_;
const __m256 zeros = _mm256_setzero_ps();
@ -456,7 +461,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_avx(int16_t** result,
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
__VOLK_ATTR_ALIGNED(32)
int local_code_chip_index[8];
int local_code_chip_index_;
const __m256 zeros = _mm256_setzero_ps();
@ -531,7 +537,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_neon(int16_t** result, c
const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int32_t local_code_chip_index[4];
int32_t local_code_chip_index_;
const int32x4_t zeros = vdupq_n_s32(0);
@ -539,7 +546,8 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_neon(int16_t** result, c
const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
__VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__VOLK_ATTR_ALIGNED(16)
const float vec[4] = {0.0f, 1.0f, 2.0f, 3.0f};
uint32x4_t igx;
reciprocal = vrecpeq_f32(code_length_chips_reg_f);
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
@ -605,4 +613,3 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_neon(int16_t** result, c
#endif /*INCLUDED_volk_gnsssdr_16i_xn_resampler_16i_xn_H*/

View File

@ -192,7 +192,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc
const lv_16sc_t* _in_common = in_common;
lv_16sc_t* _out = result;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
__m128i* cacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
@ -206,11 +207,13 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc
// phase rotation registers
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
__m128i pc1, pc2;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -290,7 +293,6 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
a = cacc[n_vec];
_mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
dotProduct = lv_cmake(0, 0);
@ -597,7 +599,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc
const lv_16sc_t* _in_common = in_common;
lv_16sc_t* _out = result;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
__m128i* cacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
@ -611,11 +614,13 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc
// phase rotation registers
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
__m128i pc1, pc2;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -695,7 +700,6 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
a = cacc[n_vec];
_mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
dotProduct = lv_cmake(0, 0);
@ -755,7 +759,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc
lv_16sc_t tmp16;
lv_32fc_t tmp32;
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(32)
lv_16sc_t dotProductVector[8];
lv_16sc_t dotProduct = lv_cmake(0, 0);
__m256i* cacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
@ -780,8 +785,10 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc
_phase_inc /= hypotf(lv_creal(_phase_inc), lv_cimag(_phase_inc));
#endif
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_inc[4];
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_acc[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t four_phase_inc[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t four_phase_acc[4];
for (n = 0; n < 4; ++n)
{
four_phase_inc[n] = _phase_inc;
@ -885,7 +892,6 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
}
}
}
#endif /* LV_HAVE_AVX2 */
@ -907,7 +913,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
lv_16sc_t tmp16;
lv_32fc_t tmp32;
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(32)
lv_16sc_t dotProductVector[8];
lv_16sc_t dotProduct = lv_cmake(0, 0);
__m256i* cacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
@ -932,8 +939,10 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
_phase_inc /= hypotf(lv_creal(_phase_inc), lv_cimag(_phase_inc));
#endif
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_inc[4];
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_acc[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t four_phase_inc[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t four_phase_acc[4];
for (n = 0; n < 4; ++n)
{
four_phase_inc[n] = _phase_inc;
@ -1037,7 +1046,6 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
}
}
}
#endif /* LV_HAVE_AVX2 */
@ -1596,5 +1604,3 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
//#endif [> LV_HAVE_NEON <]
#endif /*INCLUDED_volk_gnsssdr_16ic_16i_dot_prod_16ic_xn_H*/

View File

@ -379,6 +379,3 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_u_avx2(lv_
//#endif // NEON
#endif // INCLUDED_volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_H

View File

@ -231,4 +231,3 @@ static inline void volk_gnsssdr_16ic_conjugate_16ic_u_avx2(lv_16sc_t* cVector, c
//#endif /* LV_HAVE_NEON */
#endif /* INCLUDED_volk_gnsssdr_16ic_conjugate_16ic_H */

View File

@ -97,7 +97,8 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_a_sse2(lv_16sc_t* resul
lv_16sc_t* _result = result;
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
__m128 _rem_code_phase, _code_phase_step_chips;
__m128i _code_length_chips, _code_length_chips_minus1;
__m128 _code_phase_out, _code_phase_out_with_offset;
@ -105,13 +106,15 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_a_sse2(lv_16sc_t* resul
_rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips;
@ -124,9 +127,11 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_a_sse2(lv_16sc_t* resul
__m128i zero = _mm_setzero_si128();
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__VOLK_ATTR_ALIGNED(16)
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
__m128 _4output_index = _mm_load_ps(init_idx_float);
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
__VOLK_ATTR_ALIGNED(16)
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
__m128 _4constant_float = _mm_load_ps(init_4constant_float);
for (number = 0; number < quarterPoints; number++)
@ -177,7 +182,8 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_u_sse2(lv_16sc_t* resul
lv_16sc_t* _result = result;
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
__m128 _rem_code_phase, _code_phase_step_chips;
__m128i _code_length_chips, _code_length_chips_minus1;
__m128 _code_phase_out, _code_phase_out_with_offset;
@ -185,13 +191,15 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_u_sse2(lv_16sc_t* resul
_rem_code_phase = _mm_load1_ps(&rem_code_phase_chips); //load float to all four float values in m128 register
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips;
@ -204,9 +212,11 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_u_sse2(lv_16sc_t* resul
__m128i zero = _mm_setzero_si128();
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__VOLK_ATTR_ALIGNED(16)
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
__m128 _4output_index = _mm_loadu_ps(init_idx_float);
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
__VOLK_ATTR_ALIGNED(16)
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
__m128 _4constant_float = _mm_loadu_ps(init_4constant_float);
for (number = 0; number < quarterPoints; number++)
@ -257,7 +267,8 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_neon(lv_16sc_t* result,
lv_16sc_t* _result = result;
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
float32x4_t _rem_code_phase, _code_phase_step_chips;
int32x4_t _code_length_chips, _code_length_chips_minus1;
float32x4_t _code_phase_out, _code_phase_out_with_offset;
@ -266,13 +277,15 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_neon(lv_16sc_t* result,
_rem_code_phase = vld1q_dup_f32(&rem_code_phase_chips); //load float to all four float values in m128 register
_code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in m128 register
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips;
@ -285,9 +298,11 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_neon(lv_16sc_t* result,
uint32x4_t negative_indexes, overflow_indexes;
int32x4_t zero = vmovq_n_s32(0);
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__VOLK_ATTR_ALIGNED(16)
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
float32x4_t _4output_index = vld1q_f32(init_idx_float);
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
__VOLK_ATTR_ALIGNED(16)
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
for (number = 0; number < quarterPoints; number++)

View File

@ -141,11 +141,13 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
unsigned int number;
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -232,7 +234,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@ -244,11 +245,13 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc
unsigned int j;
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -385,7 +388,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@ -395,11 +397,13 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
unsigned int number;
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -498,11 +502,13 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc
unsigned int j;
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -657,8 +663,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
lv_16sc_t* _out = outVector;
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
@ -667,8 +675,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
lv_32fc_t phase3 = phase2 * phase_inc;
lv_32fc_t phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
float32x4_t _phase_real = vld1q_f32(__phase_real);
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
@ -745,8 +755,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
phase3 = phase2 * phase_inc;
phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
_phase_real = vld1q_f32(____phase_real);
_phase_imag = vld1q_f32(____phase_imag);
@ -791,8 +803,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t
lv_16sc_t* _out = outVector;
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
@ -801,8 +815,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t
lv_32fc_t phase3 = phase2 * phase_inc;
lv_32fc_t phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
float32x4_t _phase_real = vld1q_f32(__phase_real);
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
@ -879,8 +895,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t
phase3 = phase2 * phase_inc;
phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
_phase_real = vld1q_f32(____phase_real);
_phase_imag = vld1q_f32(____phase_imag);

View File

@ -96,7 +96,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
if (sse_iters > 0)
{
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
realcacc = _mm_setzero_si128();
imagcacc = _mm_setzero_si128();
@ -174,7 +175,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
if (sse_iters > 0)
{
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
realcacc = _mm_setzero_si128();
imagcacc = _mm_setzero_si128();
@ -253,7 +255,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_axv2(lv_16sc_t* out, con
if (avx_iters > 0)
{
__m256i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(32)
lv_16sc_t dotProductVector[8];
realcacc = _mm256_setzero_si256();
imagcacc = _mm256_setzero_si256();
@ -330,7 +333,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_axv2(lv_16sc_t* out, con
if (avx_iters > 0)
{
__m256i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(32)
lv_16sc_t dotProductVector[8];
realcacc = _mm256_setzero_si256();
imagcacc = _mm256_setzero_si256();
@ -407,7 +411,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const
// 2nd lane holds the imaginary part
int16x4x2_t a_val, b_val, c_val, accumulator;
int16x4x2_t tmp_real, tmp_imag;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t accum_result[4];
accumulator.val[0] = vdup_n_s16(0);
accumulator.val[1] = vdup_n_s16(0);
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
@ -474,7 +479,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_vma(lv_16sc_t* out, c
// 2nd lane holds the imaginary part
int16x4x2_t a_val, b_val, accumulator;
int16x4x2_t tmp;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t accum_result[4];
accumulator.val[0] = vdup_n_s16(0);
accumulator.val[1] = vdup_n_s16(0);
@ -526,7 +532,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_optvma(lv_16sc_t* out
// 2nd lane holds the imaginary part
int16x4x2_t a_val, b_val, accumulator1, accumulator2;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t accum_result[4];
accumulator1.val[0] = vdup_n_s16(0);
accumulator1.val[1] = vdup_n_s16(0);
accumulator2.val[0] = vdup_n_s16(0);

View File

@ -125,7 +125,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul
if (sse_iters > 0)
{
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
__m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
__m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
@ -219,7 +220,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul
if (sse_iters > 0)
{
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
__m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
__m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
@ -313,7 +315,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_avx2(lv_16sc_t* resul
if (sse_iters > 0)
{
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(32)
lv_16sc_t dotProductVector[8];
__m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
__m256i* imagcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
@ -407,7 +410,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul
if (sse_iters > 0)
{
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(32)
lv_16sc_t dotProductVector[8];
__m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
__m256i* imagcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
@ -501,7 +505,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result,
if (neon_iters > 0)
{
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
int16x4x2_t a_val, b_val, c_val;
@ -589,7 +594,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res
if (neon_iters > 0)
{
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
int16x4x2_t a_val, b_val, tmp;
@ -666,7 +672,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t*
if (neon_iters > 0)
{
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
int16x4x2_t a_val, b_val;

View File

@ -262,5 +262,3 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_optvma(lv_16sc
#endif // NEON
#endif // INCLUDED_volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_H

View File

@ -292,7 +292,6 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_avx2(lv_16sc_t* out, con
#endif /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>

View File

@ -191,7 +191,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
const lv_16sc_t* _in_common = in_common;
lv_16sc_t* _out = result;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
__m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
__m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
@ -210,11 +211,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
// phase rotation registers
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
__m128i pc1, pc2;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -369,7 +372,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l
const lv_16sc_t* _in_common = in_common;
lv_16sc_t* _out = result;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
__m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
__m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
@ -388,11 +392,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l
// phase rotation registers
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
__m128i pc1, pc2;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -594,7 +600,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
}
}
}
#endif /* LV_HAVE_SSE3 */
@ -615,7 +620,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
const lv_16sc_t* _in_common = in_common;
lv_16sc_t* _out = result;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
__m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
__m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment());
@ -634,11 +640,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
// phase rotation registers
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
__m128i pc1, pc2;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_loadu_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_loadu_ps((float*)two_phase_acc);
@ -781,7 +789,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_
lv_16sc_t tmp16;
lv_32fc_t tmp32;
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(32)
lv_16sc_t dotProductVector[8];
lv_16sc_t dotProduct = lv_cmake(0, 0);
__m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
@ -798,11 +807,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result1, result2;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -966,7 +977,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2(lv_16sc_
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
}
}
}
#endif /* LV_HAVE_AVX2 */
@ -989,7 +999,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l
lv_16sc_t tmp16;
lv_32fc_t tmp32;
__VOLK_ATTR_ALIGNED(32) lv_16sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(32)
lv_16sc_t dotProductVector[8];
lv_16sc_t dotProduct = lv_cmake(0, 0);
__m256i* realcacc = (__m256i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256i), volk_gnsssdr_get_alignment());
@ -1006,11 +1017,13 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result1, result2;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -1312,8 +1325,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
float phase_est;
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
@ -1322,14 +1337,17 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
lv_32fc_t phase3 = phase2 * phase_inc;
lv_32fc_t phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
float32x4_t _phase_real = vld1q_f32(__phase_real);
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
int16x4x2_t a_val, b_val, c_val;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
float32x4_t half = vdupq_n_f32(0.5f);
int16x4x2_t tmp16;
int32x4x2_t tmp32i;
@ -1426,8 +1444,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
phase3 = phase2 * phase_inc;
phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
_phase_real = vld1q_f32(____phase_real);
_phase_imag = vld1q_f32(____phase_imag);
@ -1495,8 +1515,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
float phase_est;
//printf("arg phase0: %f", arg_phase0);
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
@ -1505,14 +1527,17 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
lv_32fc_t phase3 = phase2 * phase_inc;
lv_32fc_t phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
float32x4_t _phase_real = vld1q_f32(__phase_real);
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
int16x4x2_t a_val, b_val;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
float32x4_t half = vdupq_n_f32(0.5f);
int16x4x2_t tmp16;
int32x4x2_t tmp32i;
@ -1589,8 +1614,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
phase3 = phase2 * phase_inc;
phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
_phase_real = vld1q_f32(____phase_real);
_phase_imag = vld1q_f32(____phase_imag);
@ -1605,7 +1632,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
//_phase_real = vmulq_f32(_phase_real, Round);
//_phase_imag = vmulq_f32(_phase_imag, Round);
//printf("After %i: %f,%f, %f\n\n", number, _phase_real[0], _phase_imag[0], sqrt(_phase_real[0]*_phase_real[0]+_phase_imag[0]*_phase_imag[0]));
}
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
@ -1686,8 +1712,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_
float phase_est;
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
@ -1696,14 +1724,17 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_
lv_32fc_t phase3 = phase2 * phase_inc;
lv_32fc_t phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
float32x4_t _phase_real = vld1q_f32(__phase_real);
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
int16x4x2_t a_val, b_val;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(16)
lv_16sc_t dotProductVector[4];
float32x4_t half = vdupq_n_f32(0.5f);
int32x4x2_t tmp32i;
@ -1782,8 +1813,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_
phase3 = phase2 * phase_inc;
phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_real[4] = {lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_imag[4] = {lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
_phase_real = vld1q_f32(____phase_real);
_phase_imag = vld1q_f32(____phase_imag);
@ -1842,4 +1875,3 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_
#endif /* LV_HAVE_NEON */
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_H*/

View File

@ -379,5 +379,3 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon_vma(lv
#endif // NEON
#endif // INCLUDED_volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_H

View File

@ -106,7 +106,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse4_1(lv_16sc_t** r
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -172,7 +173,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse4_1(lv_16sc_t** r
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -239,7 +241,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse3(lv_16sc_t** res
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -309,7 +312,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse3(lv_16sc_t** res
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -378,7 +382,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_avx(lv_16sc_t** resu
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
__VOLK_ATTR_ALIGNED(32)
int local_code_chip_index[8];
int local_code_chip_index_;
const __m256 zeros = _mm256_setzero_ps();
@ -455,7 +460,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_avx(lv_16sc_t** resu
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
__VOLK_ATTR_ALIGNED(32)
int local_code_chip_index[8];
int local_code_chip_index_;
const __m256 zeros = _mm256_setzero_ps();
@ -530,7 +536,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int32_t local_code_chip_index[4];
int32_t local_code_chip_index_;
const int32x4_t zeros = vdupq_n_s32(0);
@ -538,7 +545,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
__VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__VOLK_ATTR_ALIGNED(16)
const float vec[4] = {0.0f, 1.0f, 2.0f, 3.0f};
uint32x4_t igx;
reciprocal = vrecpeq_f32(code_length_chips_reg_f);
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
@ -604,4 +612,3 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** resul
#endif /*INCLUDED_volk_gnsssdr_16ic_xn_resampler_16ic_xn_H*/

View File

@ -102,20 +102,23 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_a_sse2(lv_16sc_t*
const unsigned int quarterPoints = num_output_samples / 4;
lv_16sc_t** _result = result;
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
float tmp_rem_code_phase_chips;
__m128 _rem_code_phase, _code_phase_step_chips;
__m128i _code_length_chips, _code_length_chips_minus1;
__m128 _code_phase_out, _code_phase_out_with_offset;
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips;
@ -128,9 +131,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_a_sse2(lv_16sc_t*
__m128i zero = _mm_setzero_si128();
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__VOLK_ATTR_ALIGNED(16)
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
__m128 _4output_index = _mm_load_ps(init_idx_float);
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
__VOLK_ATTR_ALIGNED(16)
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
__m128 _4constant_float = _mm_load_ps(init_4constant_float);
int current_vector = 0;
@ -193,20 +198,23 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_u_sse2(lv_16sc_t*
const unsigned int quarterPoints = num_output_samples / 4;
lv_16sc_t** _result = result;
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
float tmp_rem_code_phase_chips;
__m128 _rem_code_phase, _code_phase_step_chips;
__m128i _code_length_chips, _code_length_chips_minus1;
__m128 _code_phase_out, _code_phase_out_with_offset;
_code_phase_step_chips = _mm_load1_ps(&code_phase_step_chips); //load float to all four float values in m128 register
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips;
@ -219,9 +227,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_u_sse2(lv_16sc_t*
__m128i zero = _mm_setzero_si128();
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__VOLK_ATTR_ALIGNED(16)
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
__m128 _4output_index = _mm_loadu_ps(init_idx_float);
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
__VOLK_ATTR_ALIGNED(16)
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
__m128 _4constant_float = _mm_loadu_ps(init_4constant_float);
int current_vector = 0;
@ -285,7 +295,8 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_neon(lv_16sc_t**
float32x4_t half = vdupq_n_f32(0.5f);
lv_16sc_t** _result = result;
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
float tmp_rem_code_phase_chips;
float32x4_t _rem_code_phase, _code_phase_step_chips;
int32x4_t _code_length_chips, _code_length_chips_minus1;
@ -293,13 +304,15 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_neon(lv_16sc_t**
float32x4_t sign, PlusHalf, Round;
_code_phase_step_chips = vld1q_dup_f32(&code_phase_step_chips); //load float to all four float values in float32x4_t register
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips_minus1[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips_minus1[4];
four_times_code_length_chips_minus1[0] = code_length_chips - 1;
four_times_code_length_chips_minus1[1] = code_length_chips - 1;
four_times_code_length_chips_minus1[2] = code_length_chips - 1;
four_times_code_length_chips_minus1[3] = code_length_chips - 1;
__VOLK_ATTR_ALIGNED(16) int four_times_code_length_chips[4];
__VOLK_ATTR_ALIGNED(16)
int four_times_code_length_chips[4];
four_times_code_length_chips[0] = code_length_chips;
four_times_code_length_chips[1] = code_length_chips;
four_times_code_length_chips[2] = code_length_chips;
@ -312,9 +325,11 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_neon(lv_16sc_t**
uint32x4_t negative_indexes, overflow_indexes;
int32x4_t zero = vmovq_n_s32(0);
__VOLK_ATTR_ALIGNED(16) float init_idx_float[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__VOLK_ATTR_ALIGNED(16)
float init_idx_float[4] = {0.0f, 1.0f, 2.0f, 3.0f};
float32x4_t _4output_index = vld1q_f32(init_idx_float);
__VOLK_ATTR_ALIGNED(16) float init_4constant_float[4] = { 4.0f, 4.0f, 4.0f, 4.0f };
__VOLK_ATTR_ALIGNED(16)
float init_4constant_float[4] = {4.0f, 4.0f, 4.0f, 4.0f};
float32x4_t _4constant_float = vld1q_f32(init_4constant_float);
int current_vector = 0;

View File

@ -29,7 +29,6 @@
*/
/*!
* \page volk_gnsssdr_32f_index_max_32u.h
*
@ -80,12 +79,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_a_avx(uint32_t* target, const
__m256 compareResults;
__m256 currentValues;
__VOLK_ATTR_ALIGNED(32) float maxValuesBuffer[8];
__VOLK_ATTR_ALIGNED(32) float maxIndexesBuffer[8];
__VOLK_ATTR_ALIGNED(32)
float maxValuesBuffer[8];
__VOLK_ATTR_ALIGNED(32)
float maxIndexesBuffer[8];
for (; number < quarterPoints; number++)
{
currentValues = _mm256_load_ps(inputPtr); inputPtr += 8;
currentValues = _mm256_load_ps(inputPtr);
inputPtr += 8;
currentIndexes = _mm256_add_ps(currentIndexes, indexIncrementValues);
compareResults = _mm256_cmp_ps(maxValues, currentValues, 0x1e);
maxValuesIndex = _mm256_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
@ -143,12 +145,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_u_avx(uint32_t* target, const
__m256 compareResults;
__m256 currentValues;
__VOLK_ATTR_ALIGNED(32) float maxValuesBuffer[8];
__VOLK_ATTR_ALIGNED(32) float maxIndexesBuffer[8];
__VOLK_ATTR_ALIGNED(32)
float maxValuesBuffer[8];
__VOLK_ATTR_ALIGNED(32)
float maxIndexesBuffer[8];
for (; number < quarterPoints; number++)
{
currentValues = _mm256_loadu_ps(inputPtr); inputPtr += 8;
currentValues = _mm256_loadu_ps(inputPtr);
inputPtr += 8;
currentIndexes = _mm256_add_ps(currentIndexes, indexIncrementValues);
compareResults = _mm256_cmp_ps(maxValues, currentValues, 0x1e);
maxValuesIndex = _mm256_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
@ -206,12 +211,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_a_sse4_1(uint32_t* target, con
__m128 compareResults;
__m128 currentValues;
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
__VOLK_ATTR_ALIGNED(16)
float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16)
float maxIndexesBuffer[4];
for (; number < quarterPoints; number++)
{
currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
currentValues = _mm_load_ps(inputPtr);
inputPtr += 4;
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
@ -269,12 +277,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_u_sse4_1(uint32_t* target, con
__m128 compareResults;
__m128 currentValues;
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
__VOLK_ATTR_ALIGNED(16)
float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16)
float maxIndexesBuffer[4];
for (; number < quarterPoints; number++)
{
currentValues = _mm_loadu_ps(inputPtr); inputPtr += 4;
currentValues = _mm_loadu_ps(inputPtr);
inputPtr += 4;
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
@ -333,12 +344,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_a_sse(uint32_t* target, const
__m128 compareResults;
__m128 currentValues;
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
__VOLK_ATTR_ALIGNED(16)
float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16)
float maxIndexesBuffer[4];
for (; number < quarterPoints; number++)
{
currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
currentValues = _mm_load_ps(inputPtr);
inputPtr += 4;
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex), _mm_andnot_ps(compareResults, currentIndexes));
@ -397,12 +411,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_u_sse(uint32_t* target, const
__m128 compareResults;
__m128 currentValues;
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
__VOLK_ATTR_ALIGNED(16)
float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16)
float maxIndexesBuffer[4];
for (; number < quarterPoints; number++)
{
currentValues = _mm_loadu_ps(inputPtr); inputPtr += 4;
currentValues = _mm_loadu_ps(inputPtr);
inputPtr += 4;
currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
compareResults = _mm_cmpgt_ps(maxValues, currentValues);
maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex), _mm_andnot_ps(compareResults, currentIndexes));
@ -476,7 +493,8 @@ static inline void volk_gnsssdr_32f_index_max_32u_neon(uint32_t* target, const f
float* inputPtr = (float*)src0;
float32x4_t indexIncrementValues = vdupq_n_f32(4);
__VOLK_ATTR_ALIGNED(16) float currentIndexes_float[4] = { -4.0f, -3.0f, -2.0f, -1.0f };
__VOLK_ATTR_ALIGNED(16)
float currentIndexes_float[4] = {-4.0f, -3.0f, -2.0f, -1.0f};
float32x4_t currentIndexes = vld1q_f32(currentIndexes_float);
float max = src0[0];
@ -487,12 +505,15 @@ static inline void volk_gnsssdr_32f_index_max_32u_neon(uint32_t* target, const f
uint32x4_t currentIndexes_u;
float32x4_t currentValues;
__VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
__VOLK_ATTR_ALIGNED(16)
float maxValuesBuffer[4];
__VOLK_ATTR_ALIGNED(16)
float maxIndexesBuffer[4];
for (; number < quarterPoints; number++)
{
currentValues = vld1q_f32(inputPtr); inputPtr += 4;
currentValues = vld1q_f32(inputPtr);
inputPtr += 4;
currentIndexes = vaddq_f32(currentIndexes, indexIncrementValues);
currentIndexes_u = vcvtq_u32_f32(currentIndexes);
compareResults = vcgtq_f32(maxValues, currentValues);
@ -528,4 +549,3 @@ static inline void volk_gnsssdr_32f_index_max_32u_neon(uint32_t* target, const f
#endif /*LV_HAVE_NEON*/
#endif /*INCLUDED_volk_gnsssdr_32f_index_max_32u_H*/

View File

@ -42,7 +42,6 @@
#include <string.h>
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_generic(float* result, const float* local_code, unsigned int num_points)
{
@ -276,4 +275,3 @@ static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_neon(float* result, co
#endif
#endif // INCLUDED_volk_gnsssdr_32f_resamplerpuppet_32f_H

View File

@ -268,26 +268,44 @@ static inline void volk_gnsssdr_32f_sincos_32fc_a_sse2(lv_32fc_t* out, const flo
__m128i emm0, emm2, emm4;
/* declare some SSE constants */
__VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
__VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
__VOLK_ATTR_ALIGNED(16)
static const int _ps_inv_sign_mask[4] = {~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000};
__VOLK_ATTR_ALIGNED(16)
static const int _ps_sign_mask[4] = {(int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000};
__VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
__VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
__VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
__VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
__VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_cephes_FOPI[4] = {1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_1[4] = {1, 1, 1, 1};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_inv1[4] = {~1, ~1, ~1, ~1};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_2[4] = {2, 2, 2, 2};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_4[4] = {4, 4, 4, 4};
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
__VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
__VOLK_ATTR_ALIGNED(16)
static const float _ps_minus_cephes_DP1[4] = {-0.78515625, -0.78515625, -0.78515625, -0.78515625};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_minus_cephes_DP2[4] = {-2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_minus_cephes_DP3[4] = {-3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_coscof_p0[4] = {2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_coscof_p1[4] = {-1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_coscof_p2[4] = {4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_sincof_p0[4] = {-1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_sincof_p1[4] = {8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_sincof_p2[4] = {-1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_0p5[4] = {0.5f, 0.5f, 0.5f, 0.5f};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_1[4] = {1.0f, 1.0f, 1.0f, 1.0f};
for (; number < sse_iters; number++)
{
@ -397,7 +415,6 @@ static inline void volk_gnsssdr_32f_sincos_32fc_a_sse2(lv_32fc_t* out, const flo
_in = *aPtr++;
*bPtr++ = lv_cmake((float)cosf(_in), (float)sinf(_in));
}
}
#endif /* LV_HAVE_SSE2 */
@ -421,26 +438,44 @@ static inline void volk_gnsssdr_32f_sincos_32fc_u_sse2(lv_32fc_t* out, const flo
__m128i emm0, emm2, emm4;
/* declare some SSE constants */
__VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
__VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
__VOLK_ATTR_ALIGNED(16)
static const int _ps_inv_sign_mask[4] = {~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000};
__VOLK_ATTR_ALIGNED(16)
static const int _ps_sign_mask[4] = {(int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000};
__VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
__VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
__VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
__VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
__VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_cephes_FOPI[4] = {1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_1[4] = {1, 1, 1, 1};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_inv1[4] = {~1, ~1, ~1, ~1};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_2[4] = {2, 2, 2, 2};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_4[4] = {4, 4, 4, 4};
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
__VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
__VOLK_ATTR_ALIGNED(16)
static const float _ps_minus_cephes_DP1[4] = {-0.78515625, -0.78515625, -0.78515625, -0.78515625};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_minus_cephes_DP2[4] = {-2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_minus_cephes_DP3[4] = {-3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_coscof_p0[4] = {2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_coscof_p1[4] = {-1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_coscof_p2[4] = {4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_sincof_p0[4] = {-1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_sincof_p1[4] = {8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_sincof_p2[4] = {-1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_0p5[4] = {0.5f, 0.5f, 0.5f, 0.5f};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_1[4] = {1.0f, 1.0f, 1.0f, 1.0f};
for (; number < sse_iters; number++)
{
@ -550,7 +585,6 @@ static inline void volk_gnsssdr_32f_sincos_32fc_u_sse2(lv_32fc_t* out, const flo
_in = *aPtr++;
*bPtr++ = lv_cmake((float)cosf(_in), (float)sinf(_in));
}
}
#endif /* LV_HAVE_SSE2 */

View File

@ -110,7 +110,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse3(float** result, c
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -180,7 +181,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse3(float** result, c
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -248,7 +250,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_sse4_1(float** result,
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -314,7 +317,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_sse4_1(float** result,
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -380,7 +384,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_a_avx(float** result, co
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
__VOLK_ATTR_ALIGNED(32)
int local_code_chip_index[8];
int local_code_chip_index_;
const __m256 zeros = _mm256_setzero_ps();
@ -457,7 +462,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_avx(float** result, co
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
__VOLK_ATTR_ALIGNED(32)
int local_code_chip_index[8];
int local_code_chip_index_;
const __m256 zeros = _mm256_setzero_ps();
@ -536,7 +542,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_neon(float** result, con
const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int32_t local_code_chip_index[4];
int32_t local_code_chip_index_;
const int32x4_t zeros = vdupq_n_s32(0);
@ -544,7 +551,8 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_neon(float** result, con
const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
__VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__VOLK_ATTR_ALIGNED(16)
const float vec[4] = {0.0f, 1.0f, 2.0f, 3.0f};
uint32x4_t igx;
reciprocal = vrecpeq_f32(code_length_chips_reg_f);
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);
@ -606,5 +614,3 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_neon(float** result, con
#endif
#endif /*INCLUDED_volk_gnsssdr_32f_xn_resampler_32f_xn_H*/

View File

@ -204,7 +204,8 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
// Set up the complex rotator
__m256 z0, z1, z2, z3;
__VOLK_ATTR_ALIGNED(32) lv_32fc_t phase_vec[16];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t phase_vec[16];
for (vec_ind = 0; vec_ind < 16; ++vec_ind)
{
phase_vec[vec_ind] = _phase;
@ -216,7 +217,11 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
z2 = _mm256_load_ps((float*)(phase_vec + 8));
z3 = _mm256_load_ps((float*)(phase_vec + 12));
lv_32fc_t dz = phase_inc; dz *= dz; dz *= dz; dz *= dz; dz *= dz; // dz = phase_inc^16;
lv_32fc_t dz = phase_inc;
dz *= dz;
dz *= dz;
dz *= dz;
dz *= dz; // dz = phase_inc^16;
for (vec_ind = 0; vec_ind < 4; ++vec_ind)
{
@ -282,7 +287,8 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
aPtr += 32;
}
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t dotProductVector[4];
for (vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind)
{
@ -362,7 +368,8 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
// Set up the complex rotator
__m256 z0, z1, z2, z3;
__VOLK_ATTR_ALIGNED(32) lv_32fc_t phase_vec[16];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t phase_vec[16];
for (vec_ind = 0; vec_ind < 16; ++vec_ind)
{
phase_vec[vec_ind] = _phase;
@ -374,7 +381,11 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
z2 = _mm256_load_ps((float*)(phase_vec + 8));
z3 = _mm256_load_ps((float*)(phase_vec + 12));
lv_32fc_t dz = phase_inc; dz *= dz; dz *= dz; dz *= dz; dz *= dz; // dz = phase_inc^16;
lv_32fc_t dz = phase_inc;
dz *= dz;
dz *= dz;
dz *= dz;
dz *= dz; // dz = phase_inc^16;
for (vec_ind = 0; vec_ind < 4; ++vec_ind)
{
@ -386,7 +397,6 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
for (; number < sixteenthPoints; number++)
{
a0Val = _mm256_load_ps(aPtr);
a1Val = _mm256_load_ps(aPtr + 8);
a2Val = _mm256_load_ps(aPtr + 16);
@ -441,7 +451,8 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
aPtr += 32;
}
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t dotProductVector[4];
for (vec_ind = 0; vec_ind < num_a_vectors; ++vec_ind)
{
@ -482,5 +493,3 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
#endif /* LV_HAVE_AVX */
#endif /* INCLUDED_volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_H */

View File

@ -159,4 +159,3 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_a_avx(lv_3
#endif // AVX
#endif // INCLUDED_volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_H

View File

@ -82,8 +82,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector
for (i = 0; i < sse_iters; i++)
{
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 8);
// Clip
@ -135,8 +137,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector,
for (i = 0; i < sse_iters; i++)
{
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 8);
// Clip
@ -186,8 +190,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_avx2(lv_16sc_t* outputVector
for (i = 0; i < avx2_iters; i++)
{
inputVal1 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal2 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal1 = _mm256_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
inputVal2 = _mm256_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 16);
// Clip
@ -240,8 +246,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector
for (i = 0; i < sse_iters; i++)
{
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal1 = _mm_load_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 8);
// Clip
@ -291,8 +299,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector,
for (i = 0; i < sse_iters; i++)
{
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal1 = _mm_load_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 8);
// Clip
@ -343,8 +353,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_avx2(lv_16sc_t* outputVector
for (i = 0; i < avx2_iters; i++)
{
inputVal1 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal2 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal1 = _mm256_load_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
inputVal2 = _mm256_load_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 16);
// Clip
@ -399,8 +411,10 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
for (i = 0; i < neon_iters; i++)
{
a = vld1q_f32((const float32_t*)(inputVectorPtr)); inputVectorPtr += 4;
b = vld1q_f32((const float32_t*)(inputVectorPtr)); inputVectorPtr += 4;
a = vld1q_f32((const float32_t*)(inputVectorPtr));
inputVectorPtr += 4;
b = vld1q_f32((const float32_t*)(inputVectorPtr));
inputVectorPtr += 4;
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 8);
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);

View File

@ -109,10 +109,14 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_avx2(lv_8sc_t* outputVector,
for (i = 0; i < avx2_iters; i++)
{
inputVal1 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal2 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal3 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal4 = _mm256_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal1 = _mm256_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
inputVal2 = _mm256_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
inputVal3 = _mm256_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
inputVal4 = _mm256_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 32);
inputVal1 = _mm256_mul_ps(inputVal1, vmax_val);
@ -179,10 +183,14 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_avx2(lv_8sc_t* outputVector,
for (i = 0; i < avx2_iters; i++)
{
inputVal1 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal2 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal3 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal4 = _mm256_load_ps((float*)inputVectorPtr); inputVectorPtr += 8;
inputVal1 = _mm256_load_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
inputVal2 = _mm256_load_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
inputVal3 = _mm256_load_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
inputVal4 = _mm256_load_ps((float*)inputVectorPtr);
inputVectorPtr += 8;
__VOLK_GNSSSDR_PREFETCH(inputVectorPtr + 32);
inputVal1 = _mm256_mul_ps(inputVal1, vmax_val);
@ -249,10 +257,14 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector,
for (i = 0; i < sse_iters; i++)
{
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal3 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal4 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal3 = _mm_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal4 = _mm_loadu_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal1 = _mm_mul_ps(inputVal1, vmax_val);
inputVal2 = _mm_mul_ps(inputVal2, vmax_val);
@ -315,10 +327,14 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
for (i = 0; i < sse_iters; i++)
{
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal3 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal4 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal1 = _mm_load_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal3 = _mm_load_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal4 = _mm_load_ps((float*)inputVectorPtr);
inputVectorPtr += 4;
inputVal1 = _mm_mul_ps(inputVal1, vmax_val);
inputVal2 = _mm_mul_ps(inputVal2, vmax_val);
@ -385,7 +401,8 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co
for (i = 0; i < neon_iters; i++)
{
a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
a = vld1q_f32((const float32_t*)inputVectorPtr);
inputVectorPtr += 4;
a = vmulq_f32(a, max_val);
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
@ -394,7 +411,8 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co
toint_a = vcvtq_s32_f32(Round);
intInputVal1 = vqmovn_s32(toint_a);
a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
a = vld1q_f32((const float32_t*)inputVectorPtr);
inputVectorPtr += 4;
a = vmulq_f32(a, max_val);
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
@ -406,7 +424,8 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co
pack16_8_1 = vcombine_s16(intInputVal1, intInputVal2);
res8_1 = vqmovn_s16(pack16_8_1);
a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
a = vld1q_f32((const float32_t*)inputVectorPtr);
inputVectorPtr += 4;
a = vmulq_f32(a, max_val);
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));
@ -415,7 +434,8 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co
toint_a = vcvtq_s32_f32(Round);
intInputVal1 = vqmovn_s32(toint_a);
a = vld1q_f32((const float32_t*)inputVectorPtr); inputVectorPtr += 4;
a = vld1q_f32((const float32_t*)inputVectorPtr);
inputVectorPtr += 4;
a = vmulq_f32(a, max_val);
ret1 = vmaxq_f32(vminq_f32(a, max_val), min_val);
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(ret1), 31)));

View File

@ -42,7 +42,6 @@
#include <string.h>
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_generic(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
{

View File

@ -179,7 +179,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_
const lv_32fc_t** _in_a = in_a;
const lv_32fc_t* _in_common = in_common;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t dotProductVector[2];
__m128* acc = (__m128*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128), volk_gnsssdr_get_alignment());
@ -191,11 +192,13 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_
// phase rotation registers
__m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -288,7 +291,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_
const lv_32fc_t** _in_a = in_a;
const lv_32fc_t* _in_common = in_common;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t dotProductVector[2];
__m128* acc = (__m128*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128), volk_gnsssdr_get_alignment());
@ -300,11 +304,13 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_
// phase rotation registers
__m128 a, two_phase_acc_reg, two_phase_inc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z1;
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_inc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*)two_phase_inc);
__VOLK_ATTR_ALIGNED(16) lv_32fc_t two_phase_acc[2];
__VOLK_ATTR_ALIGNED(16)
lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
@ -398,7 +404,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t
const lv_32fc_t* _in_common = in_common;
lv_32fc_t _phase = (*phase);
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t dotProductVector[4];
__m256* acc = (__m256*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256), volk_gnsssdr_get_alignment());
@ -525,7 +532,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
const lv_32fc_t* _in_common = in_common;
lv_32fc_t _phase = (*phase);
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t dotProductVector[4];
__m256* acc = (__m256*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m256), volk_gnsssdr_get_alignment());
@ -538,7 +546,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
// phase rotation registers
__m256 a, four_phase_acc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z;
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_inc[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t four_phase_inc[4];
const lv_32fc_t phase_inc2 = phase_inc * phase_inc;
const lv_32fc_t phase_inc3 = phase_inc2 * phase_inc;
const lv_32fc_t phase_inc4 = phase_inc3 * phase_inc;
@ -548,7 +557,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
four_phase_inc[3] = phase_inc4;
const __m256 four_phase_inc_reg = _mm256_load_ps((float*)four_phase_inc);
__VOLK_ATTR_ALIGNED(32) lv_32fc_t four_phase_acc[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t four_phase_acc[4];
four_phase_acc[0] = _phase;
four_phase_acc[1] = _phase * phase_inc;
four_phase_acc[2] = _phase * phase_inc2;
@ -662,8 +672,10 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
float32_t phase_est;
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_real[4] = {lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase4_imag[4] = {lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4)};
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
@ -672,13 +684,16 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
lv_32fc_t phase3 = phase2 * phase_inc;
lv_32fc_t phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((_phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((_phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_real[4] = {lv_creal((_phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t __phase_imag[4] = {lv_cimag((_phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
float32x4_t _phase_real = vld1q_f32(__phase_real);
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
__VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
__VOLK_ATTR_ALIGNED(32)
lv_32fc_t dotProductVector[4];
float32x4x2_t a_val, b_val, tmp32_real, tmp32_imag;
@ -728,8 +743,10 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
phase3 = phase2 * phase_inc;
phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((_phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((_phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_real[4] = {lv_creal((_phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4)};
__VOLK_ATTR_ALIGNED(16)
float32_t ____phase_imag[4] = {lv_cimag((_phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4)};
_phase_real = vld1q_f32(____phase_real);
_phase_imag = vld1q_f32(____phase_imag);
@ -786,4 +803,3 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
#endif /* LV_HAVE_NEON */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_H */

View File

@ -107,7 +107,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_sse3(lv_32fc_t** res
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -177,7 +178,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_sse3(lv_32fc_t** res
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -245,7 +247,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_sse4_1(lv_32fc_t** r
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -311,7 +314,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_sse4_1(lv_32fc_t** r
const __m128 rem_code_phase_chips_reg = _mm_set_ps1(rem_code_phase_chips);
const __m128 code_phase_step_chips_reg = _mm_set_ps1(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int local_code_chip_index[4];
int local_code_chip_index_;
const __m128i zeros = _mm_setzero_si128();
@ -377,7 +381,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_avx(lv_32fc_t** resu
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
__VOLK_ATTR_ALIGNED(32)
int local_code_chip_index[8];
int local_code_chip_index_;
const __m256 zeros = _mm256_setzero_ps();
@ -454,7 +459,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_avx(lv_32fc_t** resu
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
__VOLK_ATTR_ALIGNED(32)
int local_code_chip_index[8];
int local_code_chip_index_;
const __m256 zeros = _mm256_setzero_ps();
@ -531,7 +537,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_u_avx2(lv_32fc_t** res
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
__VOLK_ATTR_ALIGNED(32)
int local_code_chip_index[8];
int local_code_chip_index_;
const __m256 zeros = _mm256_setzero_ps();
@ -609,7 +616,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_avx2(lv_32fc_t** res
const __m256 rem_code_phase_chips_reg = _mm256_set1_ps(rem_code_phase_chips);
const __m256 code_phase_step_chips_reg = _mm256_set1_ps(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(32) int local_code_chip_index[8];
__VOLK_ATTR_ALIGNED(32)
int local_code_chip_index[8];
int local_code_chip_index_;
const __m256 zeros = _mm256_setzero_ps();
@ -689,7 +697,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_neon(lv_32fc_t** resul
const float32x4_t rem_code_phase_chips_reg = vdupq_n_f32(rem_code_phase_chips);
const float32x4_t code_phase_step_chips_reg = vdupq_n_f32(code_phase_step_chips);
__VOLK_ATTR_ALIGNED(16) int32_t local_code_chip_index[4];
__VOLK_ATTR_ALIGNED(16)
int32_t local_code_chip_index[4];
int32_t local_code_chip_index_;
const int32x4_t zeros = vdupq_n_s32(0);
@ -697,7 +706,8 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_neon(lv_32fc_t** resul
const int32x4_t code_length_chips_reg_i = vdupq_n_s32((int32_t)code_length_chips);
int32x4_t local_code_chip_index_reg, aux_i, negatives, i;
float32x4_t aux, aux2, shifts_chips_reg, fi, c, j, cTrunc, base, indexn, reciprocal;
__VOLK_ATTR_ALIGNED(16) const float vec[4] = { 0.0f, 1.0f, 2.0f, 3.0f };
__VOLK_ATTR_ALIGNED(16)
const float vec[4] = {0.0f, 1.0f, 2.0f, 3.0f};
uint32x4_t igx;
reciprocal = vrecpeq_f32(code_length_chips_reg_f);
reciprocal = vmulq_f32(vrecpsq_f32(code_length_chips_reg_f, reciprocal), reciprocal);

View File

@ -69,7 +69,8 @@ static inline void volk_gnsssdr_64f_accumulator_64f_u_avx(double* result, const
unsigned int i;
const double* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(32) double tempBuffer[4];
__VOLK_ATTR_ALIGNED(32)
double tempBuffer[4];
__m256d accumulator = _mm256_setzero_pd();
__m256d aVal = _mm256_setzero_pd();
@ -108,7 +109,8 @@ static inline void volk_gnsssdr_64f_accumulator_64f_u_sse3(double* result,const
unsigned int i;
const double* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(16) double tempBuffer[2];
__VOLK_ATTR_ALIGNED(16)
double tempBuffer[2];
__m128d accumulator = _mm_setzero_pd();
__m128d aVal = _mm_setzero_pd();
@ -164,7 +166,8 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_avx(double* result,const d
unsigned int i;
const double* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(32) double tempBuffer[4];
__VOLK_ATTR_ALIGNED(32)
double tempBuffer[4];
__m256d accumulator = _mm256_setzero_pd();
__m256d aVal = _mm256_setzero_pd();
@ -203,7 +206,8 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const
unsigned int i;
const double* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(16) double tempBuffer[2];
__VOLK_ATTR_ALIGNED(16)
double tempBuffer[2];
__m128d accumulator = _mm_setzero_pd();
__m128d aVal = _mm_setzero_pd();

View File

@ -70,7 +70,8 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_sse3(char* result, const ch
unsigned int i;
const char* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(16) char tempBuffer[16];
__VOLK_ATTR_ALIGNED(16)
char tempBuffer[16];
__m128i accumulator = _mm_setzero_si128();
__m128i aVal = _mm_setzero_si128();
@ -125,7 +126,8 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch
const char* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(16) char tempBuffer[16];
__VOLK_ATTR_ALIGNED(16)
char tempBuffer[16];
__m128i accumulator = _mm_setzero_si128();
__m128i aVal = _mm_setzero_si128();
@ -164,7 +166,8 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_avx2(char* result, const ch
const char* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(32) char tempBuffer[32];
__VOLK_ATTR_ALIGNED(32)
char tempBuffer[32];
__m256i accumulator = _mm256_setzero_si256();
__m256i aVal = _mm256_setzero_si256();
@ -202,7 +205,8 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_avx2(char* result, const ch
unsigned int i;
const char* aPtr = inputBuffer;
__VOLK_ATTR_ALIGNED(32) char tempBuffer[32];
__VOLK_ATTR_ALIGNED(32)
char tempBuffer[32];
__m256i accumulator = _mm256_setzero_si256();
__m256i aVal = _mm256_setzero_si256();

View File

@ -74,7 +74,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx2(unsigned int* target, co
char max = src0[0];
unsigned int index = 0;
unsigned int mask;
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
__VOLK_ATTR_ALIGNED(32)
char currentValuesBuffer[32];
__m256i maxValues, compareResults, currentValues;
maxValues = _mm256_set1_epi8(max);
@ -137,7 +138,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, con
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
__VOLK_ATTR_ALIGNED(32)
char currentValuesBuffer[32];
__m256i ones, compareResults, currentValues;
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
@ -204,7 +206,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target,
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__VOLK_ATTR_ALIGNED(16)
char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
@ -263,7 +266,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, co
char max = src0[0];
unsigned int index = 0;
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__VOLK_ATTR_ALIGNED(16)
char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
@ -351,7 +355,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx2(unsigned int* target, co
char max = src0[0];
unsigned int index = 0;
unsigned int mask;
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
__VOLK_ATTR_ALIGNED(32)
char currentValuesBuffer[32];
__m256i maxValues, compareResults, currentValues;
maxValues = _mm256_set1_epi8(max);
@ -414,7 +419,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
__VOLK_ATTR_ALIGNED(32)
char currentValuesBuffer[32];
__m256i ones, compareResults, currentValues;
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
@ -481,7 +487,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target,
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__VOLK_ATTR_ALIGNED(16)
char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
@ -540,7 +547,8 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, co
char max = src0[0];
unsigned int index = 0;
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__VOLK_ATTR_ALIGNED(16)
char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);

View File

@ -70,7 +70,8 @@ static inline void volk_gnsssdr_8i_max_s8i_u_avx2(char* target, const char* src0
unsigned int i;
char* inputPtr = (char*)src0;
char max = src0[0];
__VOLK_ATTR_ALIGNED(32) char maxValuesBuffer[32];
__VOLK_ATTR_ALIGNED(32)
char maxValuesBuffer[32];
__m256i maxValues, compareResults, currentValues;
maxValues = _mm256_set1_epi8(max);
@ -119,7 +120,8 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse4_1(char* target, const char* sr
unsigned int i;
char* inputPtr = (char*)src0;
char max = src0[0];
__VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16];
__VOLK_ATTR_ALIGNED(16)
char maxValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
@ -169,7 +171,8 @@ static inline void volk_gnsssdr_8i_max_s8i_u_sse2(char* target, const char* src0
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__VOLK_ATTR_ALIGNED(16)
char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
@ -250,7 +253,8 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse4_1(char* target, const char* sr
unsigned int i;
char* inputPtr = (char*)src0;
char max = src0[0];
__VOLK_ATTR_ALIGNED(16) char maxValuesBuffer[16];
__VOLK_ATTR_ALIGNED(16)
char maxValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
@ -299,7 +303,8 @@ static inline void volk_gnsssdr_8i_max_s8i_a_avx2(char* target, const char* src0
unsigned int i;
char* inputPtr = (char*)src0;
char max = src0[0];
__VOLK_ATTR_ALIGNED(32) char maxValuesBuffer[32];
__VOLK_ATTR_ALIGNED(32)
char maxValuesBuffer[32];
__m256i maxValues, compareResults, currentValues;
maxValues = _mm256_set1_epi8(max);
@ -349,7 +354,8 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char* target, const char* src0
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__VOLK_ATTR_ALIGNED(16)
char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);

View File

@ -155,7 +155,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_ssse3(lv_8sc_t* cVector, con
{
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_SSSE3 */
@ -188,7 +187,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_sse3(lv_8sc_t* cVector, cons
{
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_SSE3 */
@ -336,7 +334,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons
{
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_SSE3 */

View File

@ -111,7 +111,6 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_sse3(lv_8sc_t* cVector,
{
*c++ = (*a++) * scalar;
}
}
#endif /* LV_HAVE_SSE3 */
@ -204,7 +203,6 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
{
*c++ = (*a++) * scalar;
}
}
#endif /* LV_HAVE_SSE3 */

View File

@ -165,7 +165,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
totalc = _mm_or_si128(realcacc, imagcacc);
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(16)
lv_8sc_t dotProductVector[8];
_mm_storeu_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
@ -240,7 +241,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
totalc = _mm_blendv_epi8(imagcacc, realcacc, mult1);
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(16)
lv_8sc_t dotProductVector[8];
_mm_storeu_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
@ -317,7 +319,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con
totalc = _mm_or_si128(realcacc, imagcacc);
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(16)
lv_8sc_t dotProductVector[8];
_mm_store_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
@ -391,7 +394,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
totalc = _mm_blendv_epi8(imagcacc, realcacc, mult1);
__VOLK_ATTR_ALIGNED(16) lv_8sc_t dotProductVector[8];
__VOLK_ATTR_ALIGNED(16)
lv_8sc_t dotProductVector[8];
_mm_store_si128((__m128i*)dotProductVector, totalc); // Store the results back into the dot product vector
@ -446,7 +450,8 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const
// for 2-lane vectors, 1st lane holds the real part,
// 2nd lane holds the imaginary part
int8x8x2_t a_val, b_val, c_val, accumulator, tmp_real, tmp_imag;
__VOLK_ATTR_ALIGNED(16) lv_8sc_t accum_result[8] = { lv_cmake(0,0) };
__VOLK_ATTR_ALIGNED(16)
lv_8sc_t accum_result[8] = {lv_cmake(0, 0)};
accumulator.val[0] = vdup_n_s8(0);
accumulator.val[1] = vdup_n_s8(0);
unsigned int number;

View File

@ -241,29 +241,49 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_sse2(lv_32fc_t* out, const fl
__m128i emm0, emm2, emm4;
/* declare some SSE constants */
__VOLK_ATTR_ALIGNED(16) static const int _ps_inv_sign_mask[4] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
__VOLK_ATTR_ALIGNED(16) static const int _ps_sign_mask[4] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
__VOLK_ATTR_ALIGNED(16)
static const int _ps_inv_sign_mask[4] = {~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000};
__VOLK_ATTR_ALIGNED(16)
static const int _ps_sign_mask[4] = {(int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000};
__VOLK_ATTR_ALIGNED(16) static const float _ps_cephes_FOPI[4] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
__VOLK_ATTR_ALIGNED(16) static const int _pi32_1[4] = { 1, 1, 1, 1 };
__VOLK_ATTR_ALIGNED(16) static const int _pi32_inv1[4] = { ~1, ~1, ~1, ~1 };
__VOLK_ATTR_ALIGNED(16) static const int _pi32_2[4] = { 2, 2, 2, 2};
__VOLK_ATTR_ALIGNED(16) static const int _pi32_4[4] = { 4, 4, 4, 4};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_cephes_FOPI[4] = {1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_1[4] = {1, 1, 1, 1};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_inv1[4] = {~1, ~1, ~1, ~1};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_2[4] = {2, 2, 2, 2};
__VOLK_ATTR_ALIGNED(16)
static const int _pi32_4[4] = {4, 4, 4, 4};
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP1[4] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP2[4] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_minus_cephes_DP3[4] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p0[4] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p1[4] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_coscof_p2[4] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p0[4] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p1[4] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_sincof_p2[4] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
__VOLK_ATTR_ALIGNED(16) static const float _ps_0p5[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
__VOLK_ATTR_ALIGNED(16) static const float _ps_1[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
__VOLK_ATTR_ALIGNED(16)
static const float _ps_minus_cephes_DP1[4] = {-0.78515625, -0.78515625, -0.78515625, -0.78515625};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_minus_cephes_DP2[4] = {-2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_minus_cephes_DP3[4] = {-3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_coscof_p0[4] = {2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_coscof_p1[4] = {-1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_coscof_p2[4] = {4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_sincof_p0[4] = {-1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_sincof_p1[4] = {8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_sincof_p2[4] = {-1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_0p5[4] = {0.5f, 0.5f, 0.5f, 0.5f};
__VOLK_ATTR_ALIGNED(16)
static const float _ps_1[4] = {1.0f, 1.0f, 1.0f, 1.0f};
__VOLK_ATTR_ALIGNED(16) float four_phases[4] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc };
__VOLK_ATTR_ALIGNED(16) float four_phases_inc[4] = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc };
__VOLK_ATTR_ALIGNED(16)
float four_phases[4] = {_phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc};
__VOLK_ATTR_ALIGNED(16)
float four_phases_inc[4] = {4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc};
four_phases_reg = _mm_load_ps(four_phases);
const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc);
@ -456,29 +476,49 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_a_avx2(lv_32fc_t* out, const fl
__m128 aux, c1, s1;
/* declare some AXX2 constants */
__VOLK_ATTR_ALIGNED(32) static const int _ps_inv_sign_mask[8] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
__VOLK_ATTR_ALIGNED(32) static const int _ps_sign_mask[8] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
__VOLK_ATTR_ALIGNED(32)
static const int _ps_inv_sign_mask[8] = {~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000};
__VOLK_ATTR_ALIGNED(32)
static const int _ps_sign_mask[8] = {(int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000};
__VOLK_ATTR_ALIGNED(32) static const float _ps_cephes_FOPI[8] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
__VOLK_ATTR_ALIGNED(32) static const int _pi32_1[8] = { 1, 1, 1, 1, 1, 1, 1, 1 };
__VOLK_ATTR_ALIGNED(32) static const int _pi32_inv1[8] = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 };
__VOLK_ATTR_ALIGNED(32) static const int _pi32_2[8] = { 2, 2, 2, 2, 2, 2, 2, 2 };
__VOLK_ATTR_ALIGNED(32) static const int _pi32_4[8] = { 4, 4, 4, 4, 4, 4, 4, 4 };
__VOLK_ATTR_ALIGNED(32)
static const float _ps_cephes_FOPI[8] = {1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516};
__VOLK_ATTR_ALIGNED(32)
static const int _pi32_1[8] = {1, 1, 1, 1, 1, 1, 1, 1};
__VOLK_ATTR_ALIGNED(32)
static const int _pi32_inv1[8] = {~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1};
__VOLK_ATTR_ALIGNED(32)
static const int _pi32_2[8] = {2, 2, 2, 2, 2, 2, 2, 2};
__VOLK_ATTR_ALIGNED(32)
static const int _pi32_4[8] = {4, 4, 4, 4, 4, 4, 4, 4};
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP1[8] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP2[8] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP3[8] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p0[8] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p1[8] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p2[8] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p0[8] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p1[8] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p2[8] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_0p5[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
__VOLK_ATTR_ALIGNED(32) static const float _ps_1[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
__VOLK_ATTR_ALIGNED(32)
static const float _ps_minus_cephes_DP1[8] = {-0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_minus_cephes_DP2[8] = {-2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_minus_cephes_DP3[8] = {-3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_coscof_p0[8] = {2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_coscof_p1[8] = {-1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_coscof_p2[8] = {4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_sincof_p0[8] = {-1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_sincof_p1[8] = {8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_sincof_p2[8] = {-1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_0p5[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_1[8] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
__VOLK_ATTR_ALIGNED(32) float eight_phases[8] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc };
__VOLK_ATTR_ALIGNED(32) float eight_phases_inc[8] = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc };
__VOLK_ATTR_ALIGNED(32)
float eight_phases[8] = {_phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc};
__VOLK_ATTR_ALIGNED(32)
float eight_phases_inc[8] = {8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc};
eight_phases_reg = _mm256_load_ps(eight_phases);
const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);
@ -624,29 +664,49 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_avx2(lv_32fc_t* out, const fl
__m128 aux, c1, s1;
/* declare some AXX2 constants */
__VOLK_ATTR_ALIGNED(32) static const int _ps_inv_sign_mask[8] = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 };
__VOLK_ATTR_ALIGNED(32) static const int _ps_sign_mask[8] = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 };
__VOLK_ATTR_ALIGNED(32)
static const int _ps_inv_sign_mask[8] = {~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000};
__VOLK_ATTR_ALIGNED(32)
static const int _ps_sign_mask[8] = {(int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000};
__VOLK_ATTR_ALIGNED(32) static const float _ps_cephes_FOPI[8] = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 };
__VOLK_ATTR_ALIGNED(32) static const int _pi32_1[8] = { 1, 1, 1, 1, 1, 1, 1, 1 };
__VOLK_ATTR_ALIGNED(32) static const int _pi32_inv1[8] = { ~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1 };
__VOLK_ATTR_ALIGNED(32) static const int _pi32_2[8] = { 2, 2, 2, 2, 2, 2, 2, 2 };
__VOLK_ATTR_ALIGNED(32) static const int _pi32_4[8] = { 4, 4, 4, 4, 4, 4, 4, 4 };
__VOLK_ATTR_ALIGNED(32)
static const float _ps_cephes_FOPI[8] = {1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516};
__VOLK_ATTR_ALIGNED(32)
static const int _pi32_1[8] = {1, 1, 1, 1, 1, 1, 1, 1};
__VOLK_ATTR_ALIGNED(32)
static const int _pi32_inv1[8] = {~1, ~1, ~1, ~1, ~1, ~1, ~1, ~1};
__VOLK_ATTR_ALIGNED(32)
static const int _pi32_2[8] = {2, 2, 2, 2, 2, 2, 2, 2};
__VOLK_ATTR_ALIGNED(32)
static const int _pi32_4[8] = {4, 4, 4, 4, 4, 4, 4, 4};
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP1[8] = { -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP2[8] = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_minus_cephes_DP3[8] = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p0[8] = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p1[8] = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_coscof_p2[8] = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p0[8] = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p1[8] = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_sincof_p2[8] = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 };
__VOLK_ATTR_ALIGNED(32) static const float _ps_0p5[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
__VOLK_ATTR_ALIGNED(32) static const float _ps_1[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f };
__VOLK_ATTR_ALIGNED(32)
static const float _ps_minus_cephes_DP1[8] = {-0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625, -0.78515625};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_minus_cephes_DP2[8] = {-2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_minus_cephes_DP3[8] = {-3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_coscof_p0[8] = {2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_coscof_p1[8] = {-1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_coscof_p2[8] = {4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_sincof_p0[8] = {-1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_sincof_p1[8] = {8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_sincof_p2[8] = {-1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_0p5[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
__VOLK_ATTR_ALIGNED(32)
static const float _ps_1[8] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
__VOLK_ATTR_ALIGNED(32) float eight_phases[8] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc };
__VOLK_ATTR_ALIGNED(32) float eight_phases_inc[8] = { 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc };
__VOLK_ATTR_ALIGNED(32)
float eight_phases[8] = {_phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc, _phase + 4 * phase_inc, _phase + 5 * phase_inc, _phase + 6 * phase_inc, _phase + 7 * phase_inc};
__VOLK_ATTR_ALIGNED(32)
float eight_phases_inc[8] = {8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc, 8 * phase_inc};
eight_phases_reg = _mm256_load_ps(eight_phases);
const __m256 eight_phases_inc_reg = _mm256_load_ps(eight_phases_inc);
@ -783,9 +843,11 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_neon(lv_32fc_t* out, const floa
const unsigned int neon_iters = num_points / 4;
float _phase = (*phase);
__VOLK_ATTR_ALIGNED(16) float32_t four_phases[4] = { _phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc };
__VOLK_ATTR_ALIGNED(16)
float32_t four_phases[4] = {_phase, _phase + phase_inc, _phase + 2 * phase_inc, _phase + 3 * phase_inc};
float four_inc = 4 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t four_phases_inc[4] = { four_inc, four_inc, four_inc, four_inc };
__VOLK_ATTR_ALIGNED(16)
float32_t four_phases_inc[4] = {four_inc, four_inc, four_inc, four_inc};
float32x4_t four_phases_reg = vld1q_f32(four_phases);
float32x4_t four_phases_inc_reg = vld1q_f32(four_phases_inc);

View File

@ -50,7 +50,6 @@
std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t test_params)
{
// Some kernels need a lower tolerance
volk_gnsssdr_test_params_t test_params_inacc = volk_gnsssdr_test_params_t(1e-3, test_params.scalar(),
test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex());
@ -98,8 +97,7 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t
QA(VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn, test_params_int16))
QA(VOLK_INIT_PUPP(volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn, test_params_int16))
QA(VOLK_INIT_PUPP(volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc, volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn, test_params_int1))
QA(VOLK_INIT_PUPP(volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc, volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn, test_params_int1))
;
QA(VOLK_INIT_PUPP(volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc, volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn, test_params_int1));
return test_cases;
}

View File

@ -37,7 +37,8 @@
#include <vector> // for vector
float uniform() {
float uniform()
{
std::random_device r;
std::default_random_engine e1(r());
std::uniform_real_distribution<float> uniform_dist(-1, 1);
@ -60,8 +61,10 @@ void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n)
if (type.is_float)
{
if(type.size == 8) random_floats<double>((double *)data, n);
else random_floats<float>((float *)data, n);
if (type.size == 8)
random_floats<double>((double *)data, n);
else
random_floats<float>((float *)data, n);
}
else
{
@ -75,22 +78,30 @@ void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n)
switch (type.size)
{
case 8:
if(type.is_signed) ((int64_t *)data)[i] = (int64_t) scaled_rand;
else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
if (type.is_signed)
((int64_t *)data)[i] = (int64_t)scaled_rand;
else
((uint64_t *)data)[i] = (uint64_t)scaled_rand;
break;
case 4:
if(type.is_signed) ((int32_t *)data)[i] = (int32_t) scaled_rand;
else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
if (type.is_signed)
((int32_t *)data)[i] = (int32_t)scaled_rand;
else
((uint32_t *)data)[i] = (uint32_t)scaled_rand;
break;
case 2:
// 16 bit multiplication saturates very fast
// we produce here only 3 bits input range
if(type.is_signed) ((int16_t *)data)[i] = (int16_t)((int16_t) scaled_rand % 8);
else ((uint16_t *)data)[i] = (uint16_t) (int16_t)((int16_t) scaled_rand % 8);
if (type.is_signed)
((int16_t *)data)[i] = (int16_t)((int16_t)scaled_rand % 8);
else
((uint16_t *)data)[i] = (uint16_t)(int16_t)((int16_t)scaled_rand % 8);
break;
case 1:
if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
if (type.is_signed)
((int8_t *)data)[i] = (int8_t)scaled_rand;
else
((uint8_t *)data)[i] = (uint8_t)scaled_rand;
break;
default:
throw "load_random_data: no support for data size > 8 or < 1"; //no shenanigans here
@ -99,17 +110,20 @@ void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n)
}
}
static std::vector<std::string> get_arch_list(volk_gnsssdr_func_desc_t desc) {
static std::vector<std::string> get_arch_list(volk_gnsssdr_func_desc_t desc)
{
std::vector<std::string> archlist;
for(size_t i = 0; i < desc.n_impls; i++) {
for (size_t i = 0; i < desc.n_impls; i++)
{
archlist.push_back(std::string(desc.impl_names[i]));
}
return archlist;
}
volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) {
volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name)
{
volk_gnsssdr_type_t type;
type.is_float = false;
type.is_scalar = false;
@ -118,19 +132,22 @@ volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) {
type.size = 0;
type.str = name;
if(name.size() < 2) {
if (name.size() < 2)
{
throw std::string("name too short to be a datatype");
}
//is it a scalar?
if(name[0] == 's') {
if (name[0] == 's')
{
type.is_scalar = true;
name = name.substr(1, name.size() - 1);
}
//get the data size
size_t last_size_pos = name.find_last_of("0123456789");
if(last_size_pos == std::string::npos) {
if (last_size_pos == std::string::npos)
{
throw std::string("no size spec in type ").append(name);
}
//will throw if malformed
@ -139,8 +156,10 @@ volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) {
assert(((size % 8) == 0) && (size <= 64) && (size != 0));
type.size = size / 8; //in bytes
for(size_t i=last_size_pos+1; i < name.size(); i++) {
switch (name[i]) {
for (size_t i = last_size_pos + 1; i < name.size(); i++)
{
switch (name[i])
{
case 'f':
type.is_float = true;
break;
@ -163,7 +182,8 @@ volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string name) {
static void get_signatures_from_name(std::vector<volk_gnsssdr_type_t> &inputsig,
std::vector<volk_gnsssdr_type_t> &outputsig,
std::string name) {
std::string name)
{
boost::char_separator<char> sep("_");
boost::tokenizer<boost::char_separator<char> > tok(name, sep);
std::vector<std::string> toked;
@ -176,79 +196,107 @@ static void get_signatures_from_name(std::vector<volk_gnsssdr_type_t> &inputsig,
//ok. we're assuming a string in the form
//(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT;
enum
{
SIDE_INPUT,
SIDE_NAME,
SIDE_OUTPUT
} side = SIDE_INPUT;
std::string fn_name;
volk_gnsssdr_type_t type;
BOOST_FOREACH(std::string token, toked) {
try {
BOOST_FOREACH (std::string token, toked)
{
try
{
type = volk_gnsssdr_type_from_string(token);
if (side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first one after the name...
if(side == SIDE_INPUT) inputsig.push_back(type);
else outputsig.push_back(type);
} catch (...){
if(token[0] == 'x' && (token.size() > 1) && (token[1] > '0' || token[1] < '9')) {
if(side == SIDE_INPUT) assert(inputsig.size() > 0);
else assert(outputsig.size() > 0);
if (side == SIDE_INPUT)
inputsig.push_back(type);
else
outputsig.push_back(type);
}
catch (...)
{
if (token[0] == 'x' && (token.size() > 1) && (token[1] > '0' || token[1] < '9'))
{
if (side == SIDE_INPUT)
assert(inputsig.size() > 0);
else
assert(outputsig.size() > 0);
int multiplier = boost::lexical_cast<int>(token.substr(1, token.size() - 1)); //will throw if invalid ///////////
for(int i=1; i<multiplier; i++) {
if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
else outputsig.push_back(outputsig.back());
for (int i = 1; i < multiplier; i++)
{
if (side == SIDE_INPUT)
inputsig.push_back(inputsig.back());
else
outputsig.push_back(outputsig.back());
}
}
else if(side == SIDE_INPUT) { //it's the function name, at least it better be
else if (side == SIDE_INPUT)
{ //it's the function name, at least it better be
side = SIDE_NAME;
fn_name.append("_");
fn_name.append(token);
}
else if(side == SIDE_OUTPUT) {
else if (side == SIDE_OUTPUT)
{
if (token != toked.back()) throw; //the last token in the name is the alignment
}
}
}
//we don't need an output signature (some fn's operate on the input data, "in place"), but we do need at least one input!
assert(inputsig.size() != 0);
}
inline void run_cast_test1(volk_gnsssdr_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
inline void run_cast_test1(volk_gnsssdr_fn_1arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch)
{
while (iter--) func(buffs[0], vlen, arch.c_str());
}
inline void run_cast_test2(volk_gnsssdr_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
inline void run_cast_test2(volk_gnsssdr_fn_2arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch)
{
while (iter--) func(buffs[0], buffs[1], vlen, arch.c_str());
}
inline void run_cast_test3(volk_gnsssdr_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
inline void run_cast_test3(volk_gnsssdr_fn_3arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch)
{
while (iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str());
}
inline void run_cast_test4(volk_gnsssdr_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch) {
inline void run_cast_test4(volk_gnsssdr_fn_4arg func, std::vector<void *> &buffs, unsigned int vlen, unsigned int iter, std::string arch)
{
while (iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str());
}
inline void run_cast_test1_s32f(volk_gnsssdr_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
inline void run_cast_test1_s32f(volk_gnsssdr_fn_1arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch)
{
while (iter--) func(buffs[0], scalar, vlen, arch.c_str());
}
inline void run_cast_test2_s32f(volk_gnsssdr_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
inline void run_cast_test2_s32f(volk_gnsssdr_fn_2arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch)
{
while (iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
}
inline void run_cast_test3_s32f(volk_gnsssdr_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
inline void run_cast_test3_s32f(volk_gnsssdr_fn_3arg_s32f func, std::vector<void *> &buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch)
{
while (iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
}
inline void run_cast_test1_s32fc(volk_gnsssdr_fn_1arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
inline void run_cast_test1_s32fc(volk_gnsssdr_fn_1arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch)
{
while (iter--) func(buffs[0], scalar, vlen, arch.c_str());
}
inline void run_cast_test2_s32fc(volk_gnsssdr_fn_2arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
inline void run_cast_test2_s32fc(volk_gnsssdr_fn_2arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch)
{
while (iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
}
inline void run_cast_test3_s32fc(volk_gnsssdr_fn_3arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch) {
inline void run_cast_test3_s32fc(volk_gnsssdr_fn_3arg_s32fc func, std::vector<void *> &buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string arch)
{
while (iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str());
}
@ -299,26 +347,32 @@ inline void run_cast_test3_s16ic(volk_gnsssdr_fn_3arg_s16ic func, std::vector<vo
// *************** ADDED BY GNSS-SDR. END
template <class t>
bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
bool fcompare(t *in1, t *in2, unsigned int vlen, float tol)
{
bool fail = false;
int print_max_errs = 10;
for(unsigned int i=0; i<vlen; i++) {
for (unsigned int i = 0; i < vlen; i++)
{
// for very small numbers we'll see round off errors due to limited
// precision. So a special test case...
if(fabs(((t *)(in1))[i]) < 1e-30) {
if (fabs(((t *)(in1))[i]) < 1e-30)
{
if (fabs(((t *)(in2))[i]) > tol)
{
fail = true;
if(print_max_errs-- > 0) {
if (print_max_errs-- > 0)
{
std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]);
std::cout << " tolerance was: " << tol << std::endl;
}
}
}
// the primary test is the percent different greater than given tol
else if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/fabs(((t *)in1)[i]) > tol) {
else if (fabs(((t *)(in1))[i] - ((t *)(in2))[i]) / fabs(((t *)in1)[i]) > tol)
{
fail = true;
if(print_max_errs-- > 0) {
if (print_max_errs-- > 0)
{
std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) << " in2: " << t(((t *)(in2))[i]);
std::cout << " tolerance was: " << tol << std::endl;
}
@ -329,30 +383,36 @@ bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
}
template <class t>
bool ccompare(t *in1, t *in2, unsigned int vlen, float tol) {
bool ccompare(t *in1, t *in2, unsigned int vlen, float tol)
{
bool fail = false;
int print_max_errs = 10;
for(unsigned int i=0; i<2*vlen; i+=2) {
for (unsigned int i = 0; i < 2 * vlen; i += 2)
{
t diff[2] = {in1[i] - in2[i], in1[i + 1] - in2[i + 1]};
t err = std::sqrt(diff[0] * diff[0] + diff[1] * diff[1]);
t norm = std::sqrt(in1[i] * in1[i] + in1[i + 1] * in1[i + 1]);
// for very small numbers we'll see round off errors due to limited
// precision. So a special test case...
if (norm < 1e-30) {
if (norm < 1e-30)
{
if (err > tol)
{
fail = true;
if(print_max_errs-- > 0) {
if (print_max_errs-- > 0)
{
std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " << in1[i + 1] << "j in2: " << in2[i] << " + " << in2[i + 1] << "j";
std::cout << " tolerance was: " << tol << std::endl;
}
}
}
// the primary test is the percent different greater than given tol
else if((err / norm) > tol) {
else if ((err / norm) > tol)
{
fail = true;
if(print_max_errs-- > 0) {
if (print_max_errs-- > 0)
{
std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " << in1[i + 1] << "j in2: " << in2[i] << " + " << in2[i + 1] << "j";
std::cout << " tolerance was: " << tol << std::endl;
}
@ -363,13 +423,17 @@ bool ccompare(t *in1, t *in2, unsigned int vlen, float tol) {
}
template <class t>
bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol)
{
bool fail = false;
int print_max_errs = 10;
for(unsigned int i=0; i<vlen; i++) {
if(((unsigned int)abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i]))) > tol) {
for (unsigned int i = 0; i < vlen; i++)
{
if (((unsigned int)abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i]))) > tol)
{
fail = true;
if(print_max_errs-- > 0) {
if (print_max_errs-- > 0)
{
std::cout << "offset " << i << " in1: " << static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t *)(in2))[i]));
std::cout << " tolerance was: " << tol << std::endl;
}
@ -379,21 +443,27 @@ bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
return fail;
}
class volk_gnsssdr_qa_aligned_mem_pool{
class volk_gnsssdr_qa_aligned_mem_pool
{
public:
void *get_new(size_t size){
void *get_new(size_t size)
{
size_t alignment = volk_gnsssdr_get_alignment();
void *ptr = volk_gnsssdr_malloc(size, alignment);
memset(ptr, 0x00, size);
_mems.push_back(ptr);
return ptr;
}
~volk_gnsssdr_qa_aligned_mem_pool() {
for(unsigned int ii = 0; ii < _mems.size(); ++ii) {
~volk_gnsssdr_qa_aligned_mem_pool()
{
for (unsigned int ii = 0; ii < _mems.size(); ++ii)
{
volk_gnsssdr_free(_mems[ii]);
}
}
private: std::vector<void * > _mems;
private:
std::vector<void *> _mems;
};
bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
@ -401,8 +471,7 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
std::string name,
volk_gnsssdr_test_params_t test_params,
std::vector<volk_gnsssdr_test_results_t> *results,
std::string puppet_master_name
)
std::string puppet_master_name)
{
return run_volk_gnsssdr_tests(desc, manual_func, name, test_params.tol(), test_params.scalar(),
test_params.vlen(), test_params.iter(), results, puppet_master_name,
@ -439,7 +508,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
//first let's get a list of available architectures for the test
std::vector<std::string> arch_list = get_arch_list(desc);
if((!benchmark_mode) && (arch_list.size() < 2)) {
if ((!benchmark_mode) && (arch_list.size() < 2))
{
std::cout << "no architectures to test" << std::endl;
return false;
}
@ -449,10 +519,12 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
//now we have to get a function signature by parsing the name
std::vector<volk_gnsssdr_type_t> inputsig, outputsig;
try {
try
{
get_signatures_from_name(inputsig, outputsig, name);
}
catch (boost::bad_lexical_cast& error) {
catch (boost::bad_lexical_cast &error)
{
std::cerr << "Error: unable to get function signature from kernel name" << std::endl;
std::cerr << " - " << name << std::endl;
return false;
@ -460,30 +532,37 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
//pull the input scalars into their own vector
std::vector<volk_gnsssdr_type_t> inputsc;
for(size_t i=0; i<inputsig.size(); i++) {
if(inputsig[i].is_scalar) {
for (size_t i = 0; i < inputsig.size(); i++)
{
if (inputsig[i].is_scalar)
{
inputsc.push_back(inputsig[i]);
inputsig.erase(inputsig.begin() + i);
i -= 1;
}
}
std::vector<void *> inbuffs;
BOOST_FOREACH(volk_gnsssdr_type_t sig, inputsig) {
BOOST_FOREACH (volk_gnsssdr_type_t sig, inputsig)
{
if (!sig.is_scalar) //we don't make buffers for scalars
inbuffs.push_back(mem_pool.get_new(vlen * sig.size * (sig.is_complex ? 2 : 1)));
}
for(size_t i=0; i<inbuffs.size(); i++) {
for (size_t i = 0; i < inbuffs.size(); i++)
{
load_random_data(inbuffs[i], inputsig[i], vlen);
}
//ok let's make a vector of vector of void buffers, which holds the input/output vectors for each arch
std::vector<std::vector<void *> > test_data;
for(size_t i=0; i<arch_list.size(); i++) {
for (size_t i = 0; i < arch_list.size(); i++)
{
std::vector<void *> arch_buffs;
for(size_t j=0; j<outputsig.size(); j++) {
for (size_t j = 0; j < outputsig.size(); j++)
{
arch_buffs.push_back(mem_pool.get_new(vlen * outputsig[j].size * (outputsig[j].is_complex ? 2 : 1)));
}
for(size_t j=0; j<inputsig.size(); j++) {
for (size_t j = 0; j < inputsig.size(); j++)
{
void *arch_inbuff = mem_pool.get_new(vlen * inputsig[j].size * (inputsig[j].is_complex ? 2 : 1));
memcpy(arch_inbuff, inbuffs[j], vlen * inputsig[j].size * (inputsig[j].is_complex ? 2 : 1));
arch_buffs.push_back(arch_inbuff);
@ -499,7 +578,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
vlen = vlen - vlen_twiddle;
std::chrono::time_point<std::chrono::system_clock> start, end;
std::vector<double> profile_times;
for(size_t i = 0; i < arch_list.size(); i++) {
for (size_t i = 0; i < arch_list.size(); i++)
{
start = std::chrono::system_clock::now();
switch (both_sigs.size())
@ -540,7 +620,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
}
}
//ADDED BY GNSS-SDR. END
else throw "unsupported 1 arg function >1 scalars";
else
throw "unsupported 1 arg function >1 scalars";
break;
case 2:
if (inputsc.size() == 0)
@ -578,7 +659,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
}
}
//ADDED BY GNSS-SDR. END
else throw "unsupported 2 arg function >1 scalars";
else
throw "unsupported 2 arg function >1 scalars";
break;
case 3:
if (inputsc.size() == 0)
@ -618,7 +700,8 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
}
}
//ADDED BY GNSS-SDR. END
else throw "unsupported 3 arg function >1 scalars";
else
throw "unsupported 3 arg function >1 scalars";
break;
default:
throw "no function handler for this signature";
@ -642,8 +725,10 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
//and now compare each output to the generic output
//first we have to know which output is the generic one, they aren't in order...
size_t generic_offset = 0;
for(size_t i=0; i<arch_list.size(); i++) {
if (arch_list[i] == "generic") {
for (size_t i = 0; i < arch_list.size(); i++)
{
if (arch_list[i] == "generic")
{
generic_offset = i;
}
}
@ -795,9 +880,12 @@ bool run_volk_gnsssdr_tests(volk_gnsssdr_func_desc_t desc,
std::cout << "Best aligned arch: " << best_arch_a << std::endl;
std::cout << "Best unaligned arch: " << best_arch_u << std::endl;
if(puppet_master_name == "NULL") {
if (puppet_master_name == "NULL")
{
results->back().config_name = name;
} else {
}
else
{
results->back().config_name = puppet_master_name;
}
results->back().best_arch_a = best_arch_a;

View File

@ -35,7 +35,8 @@
/************************************************
* VOLK QA type definitions *
************************************************/
struct volk_gnsssdr_type_t {
struct volk_gnsssdr_type_t
{
bool is_float;
bool is_scalar;
bool is_signed;
@ -44,7 +45,8 @@ struct volk_gnsssdr_type_t {
std::string str;
};
class volk_gnsssdr_test_time_t {
class volk_gnsssdr_test_time_t
{
public:
std::string name;
double time;
@ -52,7 +54,8 @@ class volk_gnsssdr_test_time_t {
bool pass;
};
class volk_gnsssdr_test_results_t {
class volk_gnsssdr_test_results_t
{
public:
std::string name;
std::string config_name;
@ -63,7 +66,8 @@ class volk_gnsssdr_test_results_t {
std::string best_arch_u;
};
class volk_gnsssdr_test_params_t {
class volk_gnsssdr_test_params_t
{
private:
float _tol;
lv_32fc_t _scalar;
@ -71,12 +75,11 @@ class volk_gnsssdr_test_params_t {
unsigned int _iter;
bool _benchmark_mode;
std::string _kernel_regex;
public:
// ctor
volk_gnsssdr_test_params_t(float tol, lv_32fc_t scalar, unsigned int vlen, unsigned int iter,
bool benchmark_mode, std::string kernel_regex) :
_tol(tol), _scalar(scalar), _vlen(vlen), _iter(iter),
_benchmark_mode(benchmark_mode), _kernel_regex(kernel_regex) {};
bool benchmark_mode, std::string kernel_regex) : _tol(tol), _scalar(scalar), _vlen(vlen), _iter(iter), _benchmark_mode(benchmark_mode), _kernel_regex(kernel_regex){};
// setters
void set_tol(float tol) { _tol = tol; };
void set_scalar(lv_32fc_t scalar) { _scalar = scalar; };
@ -93,13 +96,15 @@ class volk_gnsssdr_test_params_t {
std::string kernel_regex() { return _kernel_regex; };
};
class volk_gnsssdr_test_case_t {
class volk_gnsssdr_test_case_t
{
private:
volk_gnsssdr_func_desc_t _desc;
void (*_kernel_ptr)();
std::string _name;
volk_gnsssdr_test_params_t _test_parameters;
std::string _puppet_master_name;
public:
volk_gnsssdr_func_desc_t desc() { return _desc; };
void (*kernel_ptr())() { return _kernel_ptr; };
@ -108,16 +113,10 @@ class volk_gnsssdr_test_case_t {
volk_gnsssdr_test_params_t test_parameters() { return _test_parameters; };
// normal ctor
volk_gnsssdr_test_case_t(volk_gnsssdr_func_desc_t desc, void (*kernel_ptr)(), std::string name,
volk_gnsssdr_test_params_t test_parameters) :
_desc(desc), _kernel_ptr(kernel_ptr), _name(name), _test_parameters(test_parameters),
_puppet_master_name("NULL")
{};
volk_gnsssdr_test_params_t test_parameters) : _desc(desc), _kernel_ptr(kernel_ptr), _name(name), _test_parameters(test_parameters), _puppet_master_name("NULL"){};
// ctor for puppets
volk_gnsssdr_test_case_t(volk_gnsssdr_func_desc_t desc, void (*kernel_ptr)(), std::string name,
std::string puppet_master_name, volk_gnsssdr_test_params_t test_parameters) :
_desc(desc), _kernel_ptr(kernel_ptr), _name(name), _test_parameters(test_parameters),
_puppet_master_name(puppet_master_name)
{};
std::string puppet_master_name, volk_gnsssdr_test_params_t test_parameters) : _desc(desc), _kernel_ptr(kernel_ptr), _name(name), _test_parameters(test_parameters), _puppet_master_name(puppet_master_name){};
};
/************************************************
@ -134,8 +133,7 @@ bool run_volk_gnsssdr_tests(
std::string,
volk_gnsssdr_test_params_t,
std::vector<volk_gnsssdr_test_results_t> *results = NULL,
std::string puppet_master_name = "NULL"
);
std::string puppet_master_name = "NULL");
bool run_volk_gnsssdr_tests(
volk_gnsssdr_func_desc_t,
@ -147,12 +145,12 @@ bool run_volk_gnsssdr_tests(
unsigned int,
std::vector<volk_gnsssdr_test_results_t> *results = NULL,
std::string puppet_master_name = "NULL",
bool benchmark_mode = false
);
bool benchmark_mode = false);
#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) \
BOOST_AUTO_TEST_CASE(func##_test) { \
BOOST_AUTO_TEST_CASE(func##_test) \
{ \
BOOST_CHECK_EQUAL(run_volk_gnsssdr_tests( \
func##_get_func_desc(), (void (*)())func##_manual, \
std::string(#func), tol, scalar, len, iter, 0, "NULL"), \

View File

@ -49,20 +49,24 @@ int main()
std::vector<std::string> qa_failures;
std::vector<volk_gnsssdr_test_results_t> results;
// Test every kernel reporting failures when they occur
for(unsigned int ii = 0; ii < test_cases.size(); ++ii) {
for (unsigned int ii = 0; ii < test_cases.size(); ++ii)
{
bool qa_result = false;
volk_gnsssdr_test_case_t test_case = test_cases[ii];
try {
try
{
qa_result = run_volk_gnsssdr_tests(test_case.desc(), test_case.kernel_ptr(), test_case.name(),
test_case.test_parameters(), &results, test_case.puppet_master_name());
}
catch(...) {
catch (...)
{
// TODO: what exceptions might we need to catch and how do we handle them?
std::cerr << "Exception found on kernel: " << test_case.name() << std::endl;
qa_result = false;
}
if(qa_result) {
if (qa_result)
{
std::cerr << "Failure on " << test_case.name() << std::endl;
qa_failures.push_back(test_case.name());
}
@ -74,9 +78,11 @@ int main()
// Summarize QA results
std::cerr << "Kernel QA finished: " << qa_failures.size() << " failures out of "
<< test_cases.size() << " tests." << std::endl;
if(qa_failures.size() > 0) {
if (qa_failures.size() > 0)
{
std::cerr << "The following kernels failed QA:" << std::endl;
for(unsigned int ii = 0; ii < qa_failures.size(); ++ii) {
for (unsigned int ii = 0; ii < qa_failures.size(); ++ii)
{
std::cerr << " " << qa_failures[ii] << std::endl;
}
qa_ret_val = 1;
@ -95,26 +101,28 @@ void print_qa_xml(std::vector<volk_gnsssdr_test_results_t> results, unsigned int
qa_file.open(".unittest/kernels.xml");
qa_file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << std::endl;
qa_file << "<testsuites name=\"kernels\" " <<
"tests=\"" << results.size() << "\" " <<
"failures=\"" << nfails << "\" id=\"1\">" << std::endl;
qa_file << "<testsuites name=\"kernels\" "
<< "tests=\"" << results.size() << "\" "
<< "failures=\"" << nfails << "\" id=\"1\">" << std::endl;
// Results are in a vector by kernel. Each element has a result
// map containing time and arch name with test result
for(unsigned int ii=0; ii < results.size(); ++ii) {
for (unsigned int ii = 0; ii < results.size(); ++ii)
{
volk_gnsssdr_test_results_t result = results[ii];
qa_file << " <testsuite name=\"" << result.name << "\">" << std::endl;
std::map<std::string, volk_gnsssdr_test_time_t>::iterator kernel_time_pair;
for(kernel_time_pair = result.results.begin(); kernel_time_pair != result.results.end(); ++kernel_time_pair) {
for (kernel_time_pair = result.results.begin(); kernel_time_pair != result.results.end(); ++kernel_time_pair)
{
volk_gnsssdr_test_time_t test_time = kernel_time_pair->second;
qa_file << " <testcase name=\"" << test_time.name << "\" " <<
"classname=\"" << result.name << "\" " <<
"time=\"" << test_time.time << "\">" << std::endl;
qa_file << " <testcase name=\"" << test_time.name << "\" "
<< "classname=\"" << result.name << "\" "
<< "time=\"" << test_time.time << "\">" << std::endl;
if (!test_time.pass)
qa_file << " <failure " <<
"message=\"fail on arch " << test_time.name << "\">" <<
"</failure>" << std::endl;
qa_file << " <failure "
<< "message=\"fail on arch " << test_time.name << "\">"
<< "</failure>" << std::endl;
qa_file << " </testcase>" << std::endl;
}
qa_file << " </testsuite>" << std::endl;
@ -123,6 +131,4 @@ void print_qa_xml(std::vector<volk_gnsssdr_test_results_t> results, unsigned int
qa_file << "</testsuites>" << std::endl;
qa_file.close();
}

View File

@ -51,7 +51,8 @@ void *volk_gnsssdr_malloc(size_t size, size_t alignment)
{
fprintf(stderr,
"VOLK_GNSSSDR: Error allocating memory "
"(posix_memalign: error %d: %s)\n", err, strerror(err));
"(posix_memalign: error %d: %s)\n",
err, strerror(err));
return NULL;
}
}
@ -112,8 +113,7 @@ volk_gnsssdr_malloc(size_t size, size_t alignment)
return user;
}
void
volk_gnsssdr_free(void *ptr)
void volk_gnsssdr_free(void *ptr)
{
struct block_info *info;

View File

@ -31,7 +31,8 @@ void volk_gnsssdr_get_config_path(char *path)
//allows config redirection via env variable
home = getenv("VOLK_CONFIGPATH");
if(home!=NULL){
if (home != NULL)
{
strncpy(path, home, 512);
strcat(path, suffix2);
return;

View File

@ -23,7 +23,8 @@
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
int volk_gnsssdr_get_index(

View File

@ -37,13 +37,17 @@ struct volk_gnsssdr_machine *get_machine(void)
if (machine != NULL)
return machine;
else {
else
{
unsigned int max_score = 0;
unsigned int i;
struct volk_gnsssdr_machine *max_machine = NULL;
for(i=0; i<n_volk_gnsssdr_machines; i++) {
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
if(volk_gnsssdr_machines[i]->caps > max_score) {
for (i = 0; i < n_volk_gnsssdr_machines; i++)
{
if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch())))
{
if (volk_gnsssdr_machines[i]->caps > max_score)
{
max_score = volk_gnsssdr_machines[i]->caps;
max_machine = volk_gnsssdr_machines[i];
}
@ -63,8 +67,10 @@ void volk_gnsssdr_list_machines(void)
extern unsigned int n_volk_gnsssdr_machines;
unsigned int i;
for(i=0; i<n_volk_gnsssdr_machines; i++) {
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
for (i = 0; i < n_volk_gnsssdr_machines; i++)
{
if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch())))
{
printf("%s;", volk_gnsssdr_machines[i]->name);
}
}
@ -79,13 +85,17 @@ const char* volk_gnsssdr_get_machine(void)
if (machine != NULL)
return machine->name;
else {
else
{
unsigned int max_score = 0;
unsigned int i;
struct volk_gnsssdr_machine *max_machine = NULL;
for(i=0; i<n_volk_gnsssdr_machines; i++) {
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
if(volk_gnsssdr_machines[i]->caps > max_score) {
for (i = 0; i < n_volk_gnsssdr_machines; i++)
{
if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch())))
{
if (volk_gnsssdr_machines[i]->caps > max_score)
{
max_score = volk_gnsssdr_machines[i]->caps;
max_machine = volk_gnsssdr_machines[i];
}
@ -118,8 +128,7 @@ bool volk_gnsssdr_is_aligned(const void *ptr)
static inline void __${kern.name}_d(${kern.arglist_full})
{
%if kern.has_dispatcher:
${kern.name}_dispatcher(${kern.arglist_names});
% if kern.has_dispatcher : ${kern.name} _dispatcher(${kern.arglist_names});
return;
%endif
@ -183,14 +192,13 @@ void ${kern.name}_manual(${kern.arglist_full}, const char* impl_name)
const int index = volk_gnsssdr_get_index(
get_machine()->${kern.name} _impl_names,
get_machine()->${kern.name} _n_impls,
impl_name
);
impl_name);
get_machine()->${kern.name} _impls[index](
${kern.arglist_names}
);
${kern.arglist_names});
}
volk_gnsssdr_func_desc_t ${kern.name}_get_func_desc(void) {
volk_gnsssdr_func_desc_t ${kern.name} _get_func_desc(void)
{
const char **impl_names = get_machine()->${kern.name} _impl_names;
const int *impl_deps = get_machine()->${kern.name} _impl_deps;
const bool *alignment = get_machine()->${kern.name} _impl_alignment;
@ -199,8 +207,7 @@ volk_gnsssdr_func_desc_t ${kern.name}_get_func_desc(void) {
impl_names,
impl_deps,
alignment,
n_impls
};
n_impls};
return desc;
}

View File

@ -21,7 +21,8 @@
%for i, arch in enumerate(archs):
//#ifndef LV_${arch.name.upper()}
#define LV_${arch.name.upper()} ${i}
#define LV_$ \
{arch.name.upper()} $ { i }
//#endif
%endfor

View File

@ -40,9 +40,12 @@ struct VOLK_CPU volk_gnsssdr_cpu;
* check for AVX capability before executing.
*/
#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) && defined(HAVE_XGETBV)
static inline unsigned long long _xgetbv(unsigned int index){
static inline unsigned long long _xgetbv(unsigned int index)
{
unsigned int eax, edx;
__VOLK_ASM __VOLK_VOLATILE ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
__VOLK_ASM __VOLK_VOLATILE("xgetbv"
: "=a"(eax), "=d"(edx)
: "c"(index));
return ((unsigned long long)edx << 32) | eax;
}
#define __xgetbv() _xgetbv(0)
@ -67,7 +70,8 @@ struct VOLK_CPU volk_gnsssdr_cpu;
#endif //defined(VOLK_CPU_x86)
static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit) {
static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit)
{
#if defined(VOLK_CPU_x86)
unsigned int regs[4] = {0};
cpuid_x86_count(level, count, regs);
@ -77,7 +81,8 @@ static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int
#endif
}
static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) {
static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit)
{
#if defined(VOLK_CPU_x86)
unsigned int regs[4];
memset(regs, 0, sizeof(unsigned int) * 4);
@ -88,7 +93,8 @@ static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsi
#endif
}
static inline unsigned int check_extended_cpuid(unsigned int val) {
static inline unsigned int check_extended_cpuid(unsigned int val)
{
#if defined(VOLK_CPU_x86)
unsigned int regs[4];
memset(regs, 0, sizeof(unsigned int) * 4);
@ -99,7 +105,8 @@ static inline unsigned int check_extended_cpuid(unsigned int val) {
#endif
}
static inline unsigned int get_avx_enabled(void) {
static inline unsigned int get_avx_enabled(void)
{
#if defined(VOLK_CPU_x86)
return __xgetbv() & 0x6;
#else
@ -107,7 +114,8 @@ static inline unsigned int get_avx_enabled(void) {
#endif
}
static inline unsigned int get_avx2_enabled(void) {
static inline unsigned int get_avx2_enabled(void)
{
#if defined(VOLK_CPU_x86)
return __xgetbv() & 0x6;
#else
@ -123,7 +131,8 @@ static inline unsigned int get_avx2_enabled(void) {
#define VOLK_CPU_ARM
#endif
static int has_neon(void){
static int has_neon(void)
{
#if defined(VOLK_CPU_ARM)
FILE *auxvec_f;
unsigned long auxvec[2];
@ -134,7 +143,8 @@ static int has_neon(void){
size_t r = 1;
//so auxv is basically 32b of ID and 32b of value
//so it goes like this
while(!found_neon && r) {
while (!found_neon && r)
{
r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
if ((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON))
found_neon = 1;
@ -148,50 +158,59 @@ static int has_neon(void){
}
%for arch in archs:
static int i_can_has_${arch.name} (void) {
static int i_can_has_${arch.name} (void)
{
%for check, params in arch.checks:
if (${check}(<% joined_params = ', '.join(params)%>${joined_params}) == 0) return 0;
%endfor
return 1;
% endfor return 1;
}
% endfor
#if defined(HAVE_FENV_H)
#if defined(FE_TONEAREST)
#include <fenv.h>
static inline void set_float_rounding(void){
static inline void
set_float_rounding(void)
{
fesetround(FE_TONEAREST);
}
#else
static inline void set_float_rounding(void){
static inline void
set_float_rounding(void)
{
//do nothing
}
#endif
#elif defined(_MSC_VER)
#include <float.h>
static inline void set_float_rounding(void){
static inline void
set_float_rounding(void)
{
unsigned int cwrd;
_controlfp_s(&cwrd, 0, 0);
_controlfp_s(&cwrd, _RC_NEAR, _MCW_RC);
}
#else
static inline void set_float_rounding(void){
static inline void
set_float_rounding(void)
{
//do nothing
}
#endif
void volk_gnsssdr_cpu_init() {
void volk_gnsssdr_cpu_init()
{
%for arch in archs:
volk_gnsssdr_cpu.has_${arch.name} = &i_can_has_${arch.name};
% endfor
set_float_rounding();
}
unsigned int volk_gnsssdr_get_lvarch() {
unsigned int volk_gnsssdr_get_lvarch()
{
unsigned int retval = 0;
volk_gnsssdr_cpu_init();
%for arch in archs:
retval += volk_gnsssdr_cpu.has_${arch.name}() << LV_${arch.name.upper()};
%endfor
return retval;
% endfor return retval;
}

View File

@ -23,7 +23,8 @@
__VOLK_DECL_BEGIN
struct VOLK_CPU {
struct VOLK_CPU
{
%for arch in archs:
int (*has_${arch.name}) ();
% endfor

View File

@ -20,7 +20,11 @@
<% arch_names = this_machine.arch_names %>
%for arch in this_machine.archs:
#define LV_HAVE_${arch.name.upper()} 1
#define LV_HAVE_$ \
{ \
arch.name.upper() \
} \
1
%endfor
#include <volk_gnsssdr/volk_gnsssdr_common.h>
@ -35,7 +39,9 @@
#include <volk_gnsssdr/${kern.name}.h>
%endfor
struct volk_gnsssdr_machine volk_gnsssdr_machine_${this_machine.name} = {
struct volk_gnsssdr_machine volk_gnsssdr_machine_$
{
this_machine.name} = {
<% make_arch_have_list = (' | '.join(['(1 << LV_%s)'%a.name.upper() for a in this_machine.archs])) %> ${make_arch_have_list},
<% this_machine_name = "\""+this_machine.name+"\"" %> ${this_machine_name},
${this_machine.alignment},

View File

@ -27,7 +27,8 @@
__VOLK_DECL_BEGIN
struct volk_gnsssdr_machine {
struct volk_gnsssdr_machine
{
const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_gnsssdr_get_lvarch format)
const char *name;
const size_t alignment; //the maximum byte alignment required for functions in this library
@ -43,7 +44,10 @@ struct volk_gnsssdr_machine {
%for machine in machines:
#ifdef LV_MACHINE_${machine.name.upper() }
extern struct volk_gnsssdr_machine volk_gnsssdr_machine_${machine.name};
extern struct volk_gnsssdr_machine volk_gnsssdr_machine_$
{
machine.name
};
#endif
% endfor