mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-12-15 12:40:35 +00:00
Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next
This commit is contained in:
commit
f6b7e1812b
@ -128,8 +128,7 @@ $ git pull --rebase upstream next
|
|||||||
|
|
||||||
### How to submit a pull request
|
### How to submit a pull request
|
||||||
|
|
||||||
Before submitting you code, please be sure to apply clang-format
|
Before submitting you code, please be sure to [apply clang-format](http://gnss-sdr.org/coding-style/#use-tools-for-automated-code-formatting).
|
||||||
(see http://gnss-sdr.org/coding-style/#use-tools-for-automated-code-formatting).
|
|
||||||
|
|
||||||
When the contribution is ready, you can [submit a pull
|
When the contribution is ready, you can [submit a pull
|
||||||
request](https://github.com/gnss-sdr/gnss-sdr/compare/). Head to your
|
request](https://github.com/gnss-sdr/gnss-sdr/compare/). Head to your
|
||||||
|
@ -31,90 +31,80 @@ static intptr_t __alignment_mask = 0;
|
|||||||
|
|
||||||
struct volk_gnsssdr_machine *get_machine(void)
|
struct volk_gnsssdr_machine *get_machine(void)
|
||||||
{
|
{
|
||||||
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
|
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
|
||||||
extern unsigned int n_volk_gnsssdr_machines;
|
extern unsigned int n_volk_gnsssdr_machines;
|
||||||
static struct volk_gnsssdr_machine *machine = NULL;
|
static struct volk_gnsssdr_machine *machine = NULL;
|
||||||
|
|
||||||
if (machine != NULL)
|
if(machine != NULL)
|
||||||
return machine;
|
return machine;
|
||||||
else
|
else {
|
||||||
{
|
unsigned int max_score = 0;
|
||||||
unsigned int max_score = 0;
|
unsigned int i;
|
||||||
unsigned int i;
|
struct volk_gnsssdr_machine *max_machine = NULL;
|
||||||
struct volk_gnsssdr_machine *max_machine = NULL;
|
for(i=0; i<n_volk_gnsssdr_machines; i++) {
|
||||||
for (i = 0; i < n_volk_gnsssdr_machines; i++)
|
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
|
||||||
{
|
if(volk_gnsssdr_machines[i]->caps > max_score) {
|
||||||
if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch())))
|
max_score = volk_gnsssdr_machines[i]->caps;
|
||||||
{
|
max_machine = volk_gnsssdr_machines[i];
|
||||||
if (volk_gnsssdr_machines[i]->caps > max_score)
|
|
||||||
{
|
|
||||||
max_score = volk_gnsssdr_machines[i]->caps;
|
|
||||||
max_machine = volk_gnsssdr_machines[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
machine = max_machine;
|
|
||||||
//printf("Using Volk machine: %s\n", machine->name);
|
|
||||||
__alignment = machine->alignment;
|
|
||||||
__alignment_mask = (intptr_t)(__alignment - 1);
|
|
||||||
return machine;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
machine = max_machine;
|
||||||
|
//printf("Using Volk machine: %s\n", machine->name);
|
||||||
|
__alignment = machine->alignment;
|
||||||
|
__alignment_mask = (intptr_t)(__alignment-1);
|
||||||
|
return machine;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void volk_gnsssdr_list_machines(void)
|
void volk_gnsssdr_list_machines(void)
|
||||||
{
|
{
|
||||||
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
|
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
|
||||||
extern unsigned int n_volk_gnsssdr_machines;
|
extern unsigned int n_volk_gnsssdr_machines;
|
||||||
|
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
for (i = 0; i < n_volk_gnsssdr_machines; i++)
|
for(i=0; i<n_volk_gnsssdr_machines; i++) {
|
||||||
{
|
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
|
||||||
if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch())))
|
printf("%s;", volk_gnsssdr_machines[i]->name);
|
||||||
{
|
}
|
||||||
printf("%s;", volk_gnsssdr_machines[i]->name);
|
}
|
||||||
}
|
printf("\n");
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *volk_gnsssdr_get_machine(void)
|
const char* volk_gnsssdr_get_machine(void)
|
||||||
{
|
{
|
||||||
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
|
extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[];
|
||||||
extern unsigned int n_volk_gnsssdr_machines;
|
extern unsigned int n_volk_gnsssdr_machines;
|
||||||
static struct volk_gnsssdr_machine *machine = NULL;
|
static struct volk_gnsssdr_machine *machine = NULL;
|
||||||
|
|
||||||
if (machine != NULL)
|
if(machine != NULL)
|
||||||
return machine->name;
|
return machine->name;
|
||||||
else
|
else {
|
||||||
{
|
unsigned int max_score = 0;
|
||||||
unsigned int max_score = 0;
|
unsigned int i;
|
||||||
unsigned int i;
|
struct volk_gnsssdr_machine *max_machine = NULL;
|
||||||
struct volk_gnsssdr_machine *max_machine = NULL;
|
for(i=0; i<n_volk_gnsssdr_machines; i++) {
|
||||||
for (i = 0; i < n_volk_gnsssdr_machines; i++)
|
if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) {
|
||||||
{
|
if(volk_gnsssdr_machines[i]->caps > max_score) {
|
||||||
if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch())))
|
max_score = volk_gnsssdr_machines[i]->caps;
|
||||||
{
|
max_machine = volk_gnsssdr_machines[i];
|
||||||
if (volk_gnsssdr_machines[i]->caps > max_score)
|
|
||||||
{
|
|
||||||
max_score = volk_gnsssdr_machines[i]->caps;
|
|
||||||
max_machine = volk_gnsssdr_machines[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
machine = max_machine;
|
|
||||||
return machine->name;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
machine = max_machine;
|
||||||
|
return machine->name;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t volk_gnsssdr_get_alignment(void)
|
size_t volk_gnsssdr_get_alignment(void)
|
||||||
{
|
{
|
||||||
get_machine(); //ensures alignment is set
|
get_machine(); //ensures alignment is set
|
||||||
return __alignment;
|
return __alignment;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool volk_gnsssdr_is_aligned(const void *ptr)
|
bool volk_gnsssdr_is_aligned(const void *ptr)
|
||||||
{
|
{
|
||||||
return ((intptr_t)(ptr)&__alignment_mask) == 0;
|
return ((intptr_t)(ptr) & __alignment_mask) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define LV_HAVE_GENERIC
|
#define LV_HAVE_GENERIC
|
||||||
@ -123,12 +113,13 @@ bool volk_gnsssdr_is_aligned(const void *ptr)
|
|||||||
%for kern in kernels:
|
%for kern in kernels:
|
||||||
|
|
||||||
%if kern.has_dispatcher:
|
%if kern.has_dispatcher:
|
||||||
#include <volk_gnsssdr/${kern.name}.h> //pulls in the dispatcher
|
#include <volk_gnsssdr/${kern.name}.h> //pulls in the dispatcher
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
static inline void __${kern.name}_d(${kern.arglist_full})
|
static inline void __${kern.name}_d(${kern.arglist_full})
|
||||||
{
|
{
|
||||||
% if kern.has_dispatcher : ${kern.name} _dispatcher(${kern.arglist_names});
|
%if kern.has_dispatcher:
|
||||||
|
${kern.name}_dispatcher(${kern.arglist_names});
|
||||||
return;
|
return;
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@ -140,41 +131,41 @@ static inline void __${kern.name}_d(${kern.arglist_full})
|
|||||||
%endfor
|
%endfor
|
||||||
0<% end_open_parens = ')'*num_open_parens %>${end_open_parens}
|
0<% end_open_parens = ')'*num_open_parens %>${end_open_parens}
|
||||||
)){
|
)){
|
||||||
${kern.name} _a(${kern.arglist_names});
|
${kern.name}_a(${kern.arglist_names});
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
${kern.name} _u(${kern.arglist_names});
|
${kern.name}_u(${kern.arglist_names});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __init_${kern.name}(void)
|
static inline void __init_${kern.name}(void)
|
||||||
{
|
{
|
||||||
const char *name = get_machine()->${kern.name} _name;
|
const char *name = get_machine()->${kern.name}_name;
|
||||||
const char **impl_names = get_machine()->${kern.name} _impl_names;
|
const char **impl_names = get_machine()->${kern.name}_impl_names;
|
||||||
const int *impl_deps = get_machine()->${kern.name} _impl_deps;
|
const int *impl_deps = get_machine()->${kern.name}_impl_deps;
|
||||||
const bool *alignment = get_machine()->${kern.name} _impl_alignment;
|
const bool *alignment = get_machine()->${kern.name}_impl_alignment;
|
||||||
const size_t n_impls = get_machine()->${kern.name} _n_impls;
|
const size_t n_impls = get_machine()->${kern.name}_n_impls;
|
||||||
const size_t index_a = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, true /*aligned*/);
|
const size_t index_a = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, true/*aligned*/);
|
||||||
const size_t index_u = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, false /*unaligned*/);
|
const size_t index_u = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, false/*unaligned*/);
|
||||||
${kern.name} _a = get_machine()->${kern.name} _impls[index_a];
|
${kern.name}_a = get_machine()->${kern.name}_impls[index_a];
|
||||||
${kern.name} _u = get_machine()->${kern.name} _impls[index_u];
|
${kern.name}_u = get_machine()->${kern.name}_impls[index_u];
|
||||||
|
|
||||||
assert(${kern.name} _a);
|
assert(${kern.name}_a);
|
||||||
assert(${kern.name} _u);
|
assert(${kern.name}_u);
|
||||||
|
|
||||||
${kern.name} = &__${kern.name} _d;
|
${kern.name} = &__${kern.name}_d;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __${kern.name} _a(${kern.arglist_full})
|
static inline void __${kern.name}_a(${kern.arglist_full})
|
||||||
{
|
{
|
||||||
__init_${kern.name}();
|
__init_${kern.name}();
|
||||||
${kern.name} _a(${kern.arglist_names});
|
${kern.name}_a(${kern.arglist_names});
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __${kern.name} _u(${kern.arglist_full})
|
static inline void __${kern.name}_u(${kern.arglist_full})
|
||||||
{
|
{
|
||||||
__init_${kern.name}();
|
__init_${kern.name}();
|
||||||
${kern.name} _u(${kern.arglist_names});
|
${kern.name}_u(${kern.arglist_names});
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __${kern.name}(${kern.arglist_full})
|
static inline void __${kern.name}(${kern.arglist_full})
|
||||||
@ -183,32 +174,34 @@ static inline void __${kern.name}(${kern.arglist_full})
|
|||||||
${kern.name}(${kern.arglist_names});
|
${kern.name}(${kern.arglist_names});
|
||||||
}
|
}
|
||||||
|
|
||||||
${kern.pname} ${kern.name} _a = &__${kern.name} _a;
|
${kern.pname} ${kern.name}_a = &__${kern.name}_a;
|
||||||
${kern.pname} ${kern.name} _u = &__${kern.name} _u;
|
${kern.pname} ${kern.name}_u = &__${kern.name}_u;
|
||||||
${kern.pname} ${kern.name} = &__${kern.name};
|
${kern.pname} ${kern.name} = &__${kern.name};
|
||||||
|
|
||||||
void ${kern.name} _manual(${kern.arglist_full}, const char *impl_name)
|
void ${kern.name}_manual(${kern.arglist_full}, const char* impl_name)
|
||||||
{
|
{
|
||||||
const int index = volk_gnsssdr_get_index(
|
const int index = volk_gnsssdr_get_index(
|
||||||
get_machine()->${kern.name} _impl_names,
|
get_machine()->${kern.name}_impl_names,
|
||||||
get_machine()->${kern.name} _n_impls,
|
get_machine()->${kern.name}_n_impls,
|
||||||
impl_name);
|
impl_name
|
||||||
get_machine()->${kern.name} _impls[index](
|
);
|
||||||
${kern.arglist_names});
|
get_machine()->${kern.name}_impls[index](
|
||||||
|
${kern.arglist_names}
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
volk_gnsssdr_func_desc_t ${kern.name} _get_func_desc(void)
|
volk_gnsssdr_func_desc_t ${kern.name}_get_func_desc(void) {
|
||||||
{
|
const char **impl_names = get_machine()->${kern.name}_impl_names;
|
||||||
const char **impl_names = get_machine()->${kern.name} _impl_names;
|
const int *impl_deps = get_machine()->${kern.name}_impl_deps;
|
||||||
const int *impl_deps = get_machine()->${kern.name} _impl_deps;
|
const bool *alignment = get_machine()->${kern.name}_impl_alignment;
|
||||||
const bool *alignment = get_machine()->${kern.name} _impl_alignment;
|
const size_t n_impls = get_machine()->${kern.name}_n_impls;
|
||||||
const size_t n_impls = get_machine()->${kern.name} _n_impls;
|
|
||||||
volk_gnsssdr_func_desc_t desc = {
|
volk_gnsssdr_func_desc_t desc = {
|
||||||
impl_names,
|
impl_names,
|
||||||
impl_deps,
|
impl_deps,
|
||||||
alignment,
|
alignment,
|
||||||
n_impls};
|
n_impls
|
||||||
|
};
|
||||||
return desc;
|
return desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
% endfor
|
%endfor
|
||||||
|
@ -42,7 +42,7 @@ typedef struct volk_gnsssdr_func_desc
|
|||||||
VOLK_API void volk_gnsssdr_list_machines(void);
|
VOLK_API void volk_gnsssdr_list_machines(void);
|
||||||
|
|
||||||
//! Returns the name of the machine this instance will use
|
//! Returns the name of the machine this instance will use
|
||||||
VOLK_API const char *volk_gnsssdr_get_machine(void);
|
VOLK_API const char* volk_gnsssdr_get_machine(void);
|
||||||
|
|
||||||
//! Get the machine alignment in bytes
|
//! Get the machine alignment in bytes
|
||||||
VOLK_API size_t volk_gnsssdr_get_alignment(void);
|
VOLK_API size_t volk_gnsssdr_get_alignment(void);
|
||||||
@ -74,19 +74,19 @@ VOLK_API bool volk_gnsssdr_is_aligned(const void *ptr);
|
|||||||
extern VOLK_API ${kern.pname} ${kern.name};
|
extern VOLK_API ${kern.pname} ${kern.name};
|
||||||
|
|
||||||
//! A function pointer to the fastest aligned implementation
|
//! A function pointer to the fastest aligned implementation
|
||||||
extern VOLK_API ${kern.pname} ${kern.name} _a;
|
extern VOLK_API ${kern.pname} ${kern.name}_a;
|
||||||
|
|
||||||
//! A function pointer to the fastest unaligned implementation
|
//! A function pointer to the fastest unaligned implementation
|
||||||
extern VOLK_API ${kern.pname} ${kern.name} _u;
|
extern VOLK_API ${kern.pname} ${kern.name}_u;
|
||||||
|
|
||||||
//! Call into a specific implementation given by name
|
//! Call into a specific implementation given by name
|
||||||
extern VOLK_API void ${kern.name} _manual(${kern.arglist_full}, const char *impl_name);
|
extern VOLK_API void ${kern.name}_manual(${kern.arglist_full}, const char* impl_name);
|
||||||
|
|
||||||
//! Get description parameters for this kernel
|
//! Get description parameters for this kernel
|
||||||
extern VOLK_API volk_gnsssdr_func_desc_t ${kern.name} _get_func_desc(void);
|
extern VOLK_API volk_gnsssdr_func_desc_t ${kern.name}_get_func_desc(void);
|
||||||
% endfor
|
%endfor
|
||||||
|
|
||||||
__VOLK_DECL_END
|
__VOLK_DECL_END
|
||||||
|
|
||||||
|
|
||||||
#endif /*INCLUDED_VOLK_GNSSSDR_RUNTIME*/
|
#endif /*INCLUDED_VOLK_GNSSSDR_RUNTIME*/
|
||||||
|
@ -21,8 +21,7 @@
|
|||||||
|
|
||||||
%for i, arch in enumerate(archs):
|
%for i, arch in enumerate(archs):
|
||||||
//#ifndef LV_${arch.name.upper()}
|
//#ifndef LV_${arch.name.upper()}
|
||||||
#define LV_$ \
|
#define LV_${arch.name.upper()} ${i}
|
||||||
{arch.name.upper()} $ { i }
|
|
||||||
//#endif
|
//#endif
|
||||||
%endfor
|
%endfor
|
||||||
|
|
||||||
|
@ -24,54 +24,50 @@
|
|||||||
struct VOLK_CPU volk_gnsssdr_cpu;
|
struct VOLK_CPU volk_gnsssdr_cpu;
|
||||||
|
|
||||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
||||||
#define VOLK_CPU_x86
|
#define VOLK_CPU_x86
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(VOLK_CPU_x86)
|
#if defined(VOLK_CPU_x86)
|
||||||
|
|
||||||
//implement get cpuid for gcc compilers using a system or local copy of cpuid.h
|
//implement get cpuid for gcc compilers using a system or local copy of cpuid.h
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
#include <cpuid.h>
|
#include <cpuid.h>
|
||||||
#define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r + 0, (unsigned int *)r + 1, (unsigned int *)r + 2, (unsigned int *)r + 3)
|
#define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3)
|
||||||
#define cpuid_x86_count(op, count, regs) __cpuid_count(op, count, *((unsigned int *)regs), *((unsigned int *)regs + 1), *((unsigned int *)regs + 2), *((unsigned int *)regs + 3))
|
#define cpuid_x86_count(op, count, regs) __cpuid_count(op, count, *((unsigned int*)regs), *((unsigned int*)regs+1), *((unsigned int*)regs+2), *((unsigned int*)regs+3))
|
||||||
|
|
||||||
/* Return Intel AVX extended CPU capabilities register.
|
/* Return Intel AVX extended CPU capabilities register.
|
||||||
* This function will bomb on non-AVX-capable machines, so
|
* This function will bomb on non-AVX-capable machines, so
|
||||||
* check for AVX capability before executing.
|
* check for AVX capability before executing.
|
||||||
*/
|
*/
|
||||||
#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) && defined(HAVE_XGETBV)
|
#if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) && defined(HAVE_XGETBV)
|
||||||
static inline unsigned long long _xgetbv(unsigned int index)
|
static inline unsigned long long _xgetbv(unsigned int index){
|
||||||
{
|
unsigned int eax, edx;
|
||||||
unsigned int eax, edx;
|
__VOLK_ASM __VOLK_VOLATILE ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
||||||
__VOLK_ASM __VOLK_VOLATILE("xgetbv"
|
return ((unsigned long long)edx << 32) | eax;
|
||||||
: "=a"(eax), "=d"(edx)
|
}
|
||||||
: "c"(index));
|
#define __xgetbv() _xgetbv(0)
|
||||||
return ((unsigned long long)edx << 32) | eax;
|
#else
|
||||||
}
|
#define __xgetbv() 0
|
||||||
#define __xgetbv() _xgetbv(0)
|
#endif
|
||||||
#else
|
|
||||||
#define __xgetbv() 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//implement get cpuid for MSVC compilers using __cpuid intrinsic
|
//implement get cpuid for MSVC compilers using __cpuid intrinsic
|
||||||
#elif defined(_MSC_VER) && defined(HAVE_INTRIN_H)
|
#elif defined(_MSC_VER) && defined(HAVE_INTRIN_H)
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#define cpuid_x86(op, r) __cpuid(((int *)r), op)
|
#define cpuid_x86(op, r) __cpuid(((int*)r), op)
|
||||||
|
|
||||||
#if defined(_XCR_XFEATURE_ENABLED_MASK)
|
#if defined(_XCR_XFEATURE_ENABLED_MASK)
|
||||||
#define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
|
#define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
|
||||||
#else
|
#else
|
||||||
#define __xgetbv() 0
|
#define __xgetbv() 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error "A get cpuid for volk_gnsssdr is not available on this compiler..."
|
#error "A get cpuid for volk_gnsssdr is not available on this compiler..."
|
||||||
#endif //defined(__GNUC__)
|
#endif //defined(__GNUC__)
|
||||||
|
|
||||||
#endif //defined(VOLK_CPU_x86)
|
#endif //defined(VOLK_CPU_x86)
|
||||||
|
|
||||||
static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit)
|
static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit) {
|
||||||
{
|
|
||||||
#if defined(VOLK_CPU_x86)
|
#if defined(VOLK_CPU_x86)
|
||||||
unsigned int regs[4] = {0};
|
unsigned int regs[4] = {0};
|
||||||
cpuid_x86_count(level, count, regs);
|
cpuid_x86_count(level, count, regs);
|
||||||
@ -81,11 +77,10 @@ static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit)
|
static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) {
|
||||||
{
|
|
||||||
#if defined(VOLK_CPU_x86)
|
#if defined(VOLK_CPU_x86)
|
||||||
unsigned int regs[4];
|
unsigned int regs[4];
|
||||||
memset(regs, 0, sizeof(unsigned int) * 4);
|
memset(regs, 0, sizeof(unsigned int)*4);
|
||||||
cpuid_x86(op, regs);
|
cpuid_x86(op, regs);
|
||||||
return regs[reg] >> bit & 0x01;
|
return regs[reg] >> bit & 0x01;
|
||||||
#else
|
#else
|
||||||
@ -93,11 +88,10 @@ static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsi
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int check_extended_cpuid(unsigned int val)
|
static inline unsigned int check_extended_cpuid(unsigned int val) {
|
||||||
{
|
|
||||||
#if defined(VOLK_CPU_x86)
|
#if defined(VOLK_CPU_x86)
|
||||||
unsigned int regs[4];
|
unsigned int regs[4];
|
||||||
memset(regs, 0, sizeof(unsigned int) * 4);
|
memset(regs, 0, sizeof(unsigned int)*4);
|
||||||
cpuid_x86(0x80000000, regs);
|
cpuid_x86(0x80000000, regs);
|
||||||
return regs[0] >= val;
|
return regs[0] >= val;
|
||||||
#else
|
#else
|
||||||
@ -105,8 +99,7 @@ static inline unsigned int check_extended_cpuid(unsigned int val)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int get_avx_enabled(void)
|
static inline unsigned int get_avx_enabled(void) {
|
||||||
{
|
|
||||||
#if defined(VOLK_CPU_x86)
|
#if defined(VOLK_CPU_x86)
|
||||||
return __xgetbv() & 0x6;
|
return __xgetbv() & 0x6;
|
||||||
#else
|
#else
|
||||||
@ -114,8 +107,7 @@ static inline unsigned int get_avx_enabled(void)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int get_avx2_enabled(void)
|
static inline unsigned int get_avx2_enabled(void) {
|
||||||
{
|
|
||||||
#if defined(VOLK_CPU_x86)
|
#if defined(VOLK_CPU_x86)
|
||||||
return __xgetbv() & 0x6;
|
return __xgetbv() & 0x6;
|
||||||
#else
|
#else
|
||||||
@ -125,30 +117,28 @@ static inline unsigned int get_avx2_enabled(void)
|
|||||||
|
|
||||||
//neon detection is linux specific
|
//neon detection is linux specific
|
||||||
#if defined(__arm__) && defined(__linux__)
|
#if defined(__arm__) && defined(__linux__)
|
||||||
#include <asm/hwcap.h>
|
#include <asm/hwcap.h>
|
||||||
#include <linux/auxvec.h>
|
#include <linux/auxvec.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#define VOLK_CPU_ARM
|
#define VOLK_CPU_ARM
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int has_neon(void)
|
static int has_neon(void){
|
||||||
{
|
|
||||||
#if defined(VOLK_CPU_ARM)
|
#if defined(VOLK_CPU_ARM)
|
||||||
FILE *auxvec_f;
|
FILE *auxvec_f;
|
||||||
unsigned long auxvec[2];
|
unsigned long auxvec[2];
|
||||||
unsigned int found_neon = 0;
|
unsigned int found_neon = 0;
|
||||||
auxvec_f = fopen("/proc/self/auxv", "rb");
|
auxvec_f = fopen("/proc/self/auxv", "rb");
|
||||||
if (!auxvec_f) return 0;
|
if(!auxvec_f) return 0;
|
||||||
|
|
||||||
size_t r = 1;
|
size_t r = 1;
|
||||||
//so auxv is basically 32b of ID and 32b of value
|
//so auxv is basically 32b of ID and 32b of value
|
||||||
//so it goes like this
|
//so it goes like this
|
||||||
while (!found_neon && r)
|
while(!found_neon && r) {
|
||||||
{
|
r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
|
||||||
r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
|
if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON))
|
||||||
if ((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON))
|
found_neon = 1;
|
||||||
found_neon = 1;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
fclose(auxvec_f);
|
fclose(auxvec_f);
|
||||||
return found_neon;
|
return found_neon;
|
||||||
@ -158,59 +148,50 @@ static int has_neon(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
%for arch in archs:
|
%for arch in archs:
|
||||||
static int i_can_has_${arch.name} (void)
|
static int i_can_has_${arch.name} (void) {
|
||||||
{
|
|
||||||
%for check, params in arch.checks:
|
%for check, params in arch.checks:
|
||||||
if (${check}(<% joined_params = ', '.join(params)%>${joined_params}) == 0) return 0;
|
if (${check}(<% joined_params = ', '.join(params)%>${joined_params}) == 0) return 0;
|
||||||
% endfor return 1;
|
%endfor
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
% endfor
|
%endfor
|
||||||
|
|
||||||
#if defined(HAVE_FENV_H)
|
#if defined(HAVE_FENV_H)
|
||||||
#if defined(FE_TONEAREST)
|
#if defined(FE_TONEAREST)
|
||||||
#include <fenv.h>
|
#include <fenv.h>
|
||||||
static inline void
|
static inline void set_float_rounding(void){
|
||||||
set_float_rounding(void)
|
fesetround(FE_TONEAREST);
|
||||||
{
|
}
|
||||||
fesetround(FE_TONEAREST);
|
#else
|
||||||
}
|
static inline void set_float_rounding(void){
|
||||||
#else
|
//do nothing
|
||||||
static inline void
|
}
|
||||||
set_float_rounding(void)
|
#endif
|
||||||
{
|
|
||||||
//do nothing
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#elif defined(_MSC_VER)
|
#elif defined(_MSC_VER)
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
static inline void
|
static inline void set_float_rounding(void){
|
||||||
set_float_rounding(void)
|
unsigned int cwrd;
|
||||||
{
|
_controlfp_s(&cwrd, 0, 0);
|
||||||
unsigned int cwrd;
|
_controlfp_s(&cwrd, _RC_NEAR, _MCW_RC);
|
||||||
_controlfp_s(&cwrd, 0, 0);
|
}
|
||||||
_controlfp_s(&cwrd, _RC_NEAR, _MCW_RC);
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
static inline void
|
static inline void set_float_rounding(void){
|
||||||
set_float_rounding(void)
|
//do nothing
|
||||||
{
|
}
|
||||||
//do nothing
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void volk_gnsssdr_cpu_init()
|
void volk_gnsssdr_cpu_init() {
|
||||||
{
|
|
||||||
%for arch in archs:
|
%for arch in archs:
|
||||||
volk_gnsssdr_cpu.has_${arch.name} = &i_can_has_${arch.name};
|
volk_gnsssdr_cpu.has_${arch.name} = &i_can_has_${arch.name};
|
||||||
% endfor
|
%endfor
|
||||||
set_float_rounding();
|
set_float_rounding();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int volk_gnsssdr_get_lvarch()
|
unsigned int volk_gnsssdr_get_lvarch() {
|
||||||
{
|
|
||||||
unsigned int retval = 0;
|
unsigned int retval = 0;
|
||||||
volk_gnsssdr_cpu_init();
|
volk_gnsssdr_cpu_init();
|
||||||
%for arch in archs:
|
%for arch in archs:
|
||||||
retval += volk_gnsssdr_cpu.has_${arch.name}() << LV_${arch.name.upper()};
|
retval += volk_gnsssdr_cpu.has_${arch.name}() << LV_${arch.name.upper()};
|
||||||
% endfor return retval;
|
%endfor
|
||||||
|
return retval;
|
||||||
}
|
}
|
||||||
|
@ -23,17 +23,16 @@
|
|||||||
|
|
||||||
__VOLK_DECL_BEGIN
|
__VOLK_DECL_BEGIN
|
||||||
|
|
||||||
struct VOLK_CPU
|
struct VOLK_CPU {
|
||||||
{
|
|
||||||
%for arch in archs:
|
%for arch in archs:
|
||||||
int (*has_${arch.name}) ();
|
int (*has_${arch.name}) ();
|
||||||
% endfor
|
%endfor
|
||||||
};
|
};
|
||||||
|
|
||||||
extern struct VOLK_CPU volk_gnsssdr_cpu;
|
extern struct VOLK_CPU volk_gnsssdr_cpu;
|
||||||
|
|
||||||
void volk_gnsssdr_cpu_init();
|
void volk_gnsssdr_cpu_init ();
|
||||||
unsigned int volk_gnsssdr_get_lvarch();
|
unsigned int volk_gnsssdr_get_lvarch ();
|
||||||
|
|
||||||
__VOLK_DECL_END
|
__VOLK_DECL_END
|
||||||
|
|
||||||
|
@ -20,11 +20,7 @@
|
|||||||
<% arch_names = this_machine.arch_names %>
|
<% arch_names = this_machine.arch_names %>
|
||||||
|
|
||||||
%for arch in this_machine.archs:
|
%for arch in this_machine.archs:
|
||||||
#define LV_HAVE_$ \
|
#define LV_HAVE_${arch.name.upper()} 1
|
||||||
{ \
|
|
||||||
arch.name.upper() \
|
|
||||||
} \
|
|
||||||
1
|
|
||||||
%endfor
|
%endfor
|
||||||
|
|
||||||
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||||
@ -39,9 +35,7 @@
|
|||||||
#include <volk_gnsssdr/${kern.name}.h>
|
#include <volk_gnsssdr/${kern.name}.h>
|
||||||
%endfor
|
%endfor
|
||||||
|
|
||||||
struct volk_gnsssdr_machine volk_gnsssdr_machine_$
|
struct volk_gnsssdr_machine volk_gnsssdr_machine_${this_machine.name} = {
|
||||||
{
|
|
||||||
this_machine.name} = {
|
|
||||||
<% make_arch_have_list = (' | '.join(['(1 << LV_%s)'%a.name.upper() for a in this_machine.archs])) %> ${make_arch_have_list},
|
<% make_arch_have_list = (' | '.join(['(1 << LV_%s)'%a.name.upper() for a in this_machine.archs])) %> ${make_arch_have_list},
|
||||||
<% this_machine_name = "\""+this_machine.name+"\"" %> ${this_machine_name},
|
<% this_machine_name = "\""+this_machine.name+"\"" %> ${this_machine_name},
|
||||||
${this_machine.alignment},
|
${this_machine.alignment},
|
||||||
|
@ -22,10 +22,10 @@
|
|||||||
|
|
||||||
struct volk_gnsssdr_machine *volk_gnsssdr_machines[] = {
|
struct volk_gnsssdr_machine *volk_gnsssdr_machines[] = {
|
||||||
%for machine in machines:
|
%for machine in machines:
|
||||||
#ifdef LV_MACHINE_${machine.name.upper() }
|
#ifdef LV_MACHINE_${machine.name.upper()}
|
||||||
&volk_gnsssdr_machine_${machine.name},
|
&volk_gnsssdr_machine_${machine.name},
|
||||||
#endif
|
#endif
|
||||||
%endfor
|
%endfor
|
||||||
};
|
};
|
||||||
|
|
||||||
unsigned int n_volk_gnsssdr_machines = sizeof(volk_gnsssdr_machines) / sizeof(*volk_gnsssdr_machines);
|
unsigned int n_volk_gnsssdr_machines = sizeof(volk_gnsssdr_machines)/sizeof(*volk_gnsssdr_machines);
|
||||||
|
@ -27,30 +27,26 @@
|
|||||||
|
|
||||||
__VOLK_DECL_BEGIN
|
__VOLK_DECL_BEGIN
|
||||||
|
|
||||||
struct volk_gnsssdr_machine
|
struct volk_gnsssdr_machine {
|
||||||
{
|
const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_gnsssdr_get_lvarch format)
|
||||||
const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_gnsssdr_get_lvarch format)
|
|
||||||
const char *name;
|
const char *name;
|
||||||
const size_t alignment; //the maximum byte alignment required for functions in this library
|
const size_t alignment; //the maximum byte alignment required for functions in this library
|
||||||
%for kern in kernels:
|
%for kern in kernels:
|
||||||
const char *${kern.name}_name;
|
const char *${kern.name}_name;
|
||||||
const char *${kern.name} _impl_names[<% len_archs = len(archs) %> ${len_archs}];
|
const char *${kern.name}_impl_names[<%len_archs=len(archs)%>${len_archs}];
|
||||||
const int ${kern.name} _impl_deps[${len_archs}];
|
const int ${kern.name}_impl_deps[${len_archs}];
|
||||||
const bool ${kern.name} _impl_alignment[${len_archs}];
|
const bool ${kern.name}_impl_alignment[${len_archs}];
|
||||||
const ${kern.pname} ${kern.name} _impls[${len_archs}];
|
const ${kern.pname} ${kern.name}_impls[${len_archs}];
|
||||||
const size_t ${kern.name} _n_impls;
|
const size_t ${kern.name}_n_impls;
|
||||||
% endfor
|
%endfor
|
||||||
};
|
};
|
||||||
|
|
||||||
%for machine in machines:
|
%for machine in machines:
|
||||||
#ifdef LV_MACHINE_${machine.name.upper() }
|
#ifdef LV_MACHINE_${machine.name.upper()}
|
||||||
extern struct volk_gnsssdr_machine volk_gnsssdr_machine_$
|
extern struct volk_gnsssdr_machine volk_gnsssdr_machine_${machine.name};
|
||||||
{
|
|
||||||
machine.name
|
|
||||||
};
|
|
||||||
#endif
|
#endif
|
||||||
% endfor
|
%endfor
|
||||||
|
|
||||||
__VOLK_DECL_END
|
__VOLK_DECL_END
|
||||||
|
|
||||||
#endif //INCLUDED_LIBVOLK_GNSSSDR_MACHINES_H
|
#endif //INCLUDED_LIBVOLK_GNSSSDR_MACHINES_H
|
||||||
|
@ -24,6 +24,6 @@
|
|||||||
|
|
||||||
%for kern in kernels:
|
%for kern in kernels:
|
||||||
typedef void (*${kern.pname})(${kern.arglist_types});
|
typedef void (*${kern.pname})(${kern.arglist_types});
|
||||||
% endfor
|
%endfor
|
||||||
|
|
||||||
#endif /*INCLUDED_VOLK_GNSSSDR_TYPEDEFS*/
|
#endif /*INCLUDED_VOLK_GNSSSDR_TYPEDEFS*/
|
||||||
|
Loading…
Reference in New Issue
Block a user