mirror of
				https://github.com/gnss-sdr/gnss-sdr
				synced 2025-10-29 06:27:44 +00:00 
			
		
		
		
	Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next
This commit is contained in:
		| @@ -128,8 +128,7 @@ $ git pull --rebase upstream next | |||||||
|  |  | ||||||
| ### How to submit a pull request | ### How to submit a pull request | ||||||
|  |  | ||||||
| Before submitting you code, please be sure to apply clang-format | Before submitting you code, please be sure to [apply clang-format](http://gnss-sdr.org/coding-style/#use-tools-for-automated-code-formatting). | ||||||
| (see http://gnss-sdr.org/coding-style/#use-tools-for-automated-code-formatting). |  | ||||||
|  |  | ||||||
| When the contribution is ready, you can [submit a pull | When the contribution is ready, you can [submit a pull | ||||||
| request](https://github.com/gnss-sdr/gnss-sdr/compare/). Head to your | request](https://github.com/gnss-sdr/gnss-sdr/compare/). Head to your | ||||||
|   | |||||||
| @@ -31,90 +31,80 @@ static intptr_t __alignment_mask = 0; | |||||||
|  |  | ||||||
| struct volk_gnsssdr_machine *get_machine(void) | struct volk_gnsssdr_machine *get_machine(void) | ||||||
| { | { | ||||||
|     extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[]; |   extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[]; | ||||||
|     extern unsigned int n_volk_gnsssdr_machines; |   extern unsigned int n_volk_gnsssdr_machines; | ||||||
|     static struct volk_gnsssdr_machine *machine = NULL; |   static struct volk_gnsssdr_machine *machine = NULL; | ||||||
|  |  | ||||||
|     if (machine != NULL) |   if(machine != NULL) | ||||||
|         return machine; |     return machine; | ||||||
|     else |   else { | ||||||
|         { |     unsigned int max_score = 0; | ||||||
|             unsigned int max_score = 0; |     unsigned int i; | ||||||
|             unsigned int i; |     struct volk_gnsssdr_machine *max_machine = NULL; | ||||||
|             struct volk_gnsssdr_machine *max_machine = NULL; |     for(i=0; i<n_volk_gnsssdr_machines; i++) { | ||||||
|             for (i = 0; i < n_volk_gnsssdr_machines; i++) |       if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) { | ||||||
|                 { |         if(volk_gnsssdr_machines[i]->caps > max_score) { | ||||||
|                     if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) |           max_score = volk_gnsssdr_machines[i]->caps; | ||||||
|                         { |           max_machine = volk_gnsssdr_machines[i]; | ||||||
|                             if (volk_gnsssdr_machines[i]->caps > max_score) |  | ||||||
|                                 { |  | ||||||
|                                     max_score = volk_gnsssdr_machines[i]->caps; |  | ||||||
|                                     max_machine = volk_gnsssdr_machines[i]; |  | ||||||
|                                 } |  | ||||||
|                         } |  | ||||||
|                 } |  | ||||||
|             machine = max_machine; |  | ||||||
|             //printf("Using Volk machine: %s\n", machine->name); |  | ||||||
|             __alignment = machine->alignment; |  | ||||||
|             __alignment_mask = (intptr_t)(__alignment - 1); |  | ||||||
|             return machine; |  | ||||||
|         } |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     machine = max_machine; | ||||||
|  |     //printf("Using Volk machine: %s\n", machine->name); | ||||||
|  |     __alignment = machine->alignment; | ||||||
|  |     __alignment_mask = (intptr_t)(__alignment-1); | ||||||
|  |     return machine; | ||||||
|  |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| void volk_gnsssdr_list_machines(void) | void volk_gnsssdr_list_machines(void) | ||||||
| { | { | ||||||
|     extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[]; |   extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[]; | ||||||
|     extern unsigned int n_volk_gnsssdr_machines; |   extern unsigned int n_volk_gnsssdr_machines; | ||||||
|  |  | ||||||
|     unsigned int i; |   unsigned int i; | ||||||
|     for (i = 0; i < n_volk_gnsssdr_machines; i++) |   for(i=0; i<n_volk_gnsssdr_machines; i++) { | ||||||
|         { |     if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) { | ||||||
|             if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) |         printf("%s;", volk_gnsssdr_machines[i]->name); | ||||||
|                 { |     } | ||||||
|                     printf("%s;", volk_gnsssdr_machines[i]->name); |   } | ||||||
|                 } |   printf("\n"); | ||||||
|         } |  | ||||||
|     printf("\n"); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| const char *volk_gnsssdr_get_machine(void) | const char* volk_gnsssdr_get_machine(void) | ||||||
| { | { | ||||||
|     extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[]; |   extern struct volk_gnsssdr_machine *volk_gnsssdr_machines[]; | ||||||
|     extern unsigned int n_volk_gnsssdr_machines; |   extern unsigned int n_volk_gnsssdr_machines; | ||||||
|     static struct volk_gnsssdr_machine *machine = NULL; |   static struct volk_gnsssdr_machine *machine = NULL; | ||||||
|  |  | ||||||
|     if (machine != NULL) |   if(machine != NULL) | ||||||
|         return machine->name; |     return machine->name; | ||||||
|     else |   else { | ||||||
|         { |     unsigned int max_score = 0; | ||||||
|             unsigned int max_score = 0; |     unsigned int i; | ||||||
|             unsigned int i; |     struct volk_gnsssdr_machine *max_machine = NULL; | ||||||
|             struct volk_gnsssdr_machine *max_machine = NULL; |     for(i=0; i<n_volk_gnsssdr_machines; i++) { | ||||||
|             for (i = 0; i < n_volk_gnsssdr_machines; i++) |       if(!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) { | ||||||
|                 { |         if(volk_gnsssdr_machines[i]->caps > max_score) { | ||||||
|                     if (!(volk_gnsssdr_machines[i]->caps & (~volk_gnsssdr_get_lvarch()))) |           max_score = volk_gnsssdr_machines[i]->caps; | ||||||
|                         { |           max_machine = volk_gnsssdr_machines[i]; | ||||||
|                             if (volk_gnsssdr_machines[i]->caps > max_score) |  | ||||||
|                                 { |  | ||||||
|                                     max_score = volk_gnsssdr_machines[i]->caps; |  | ||||||
|                                     max_machine = volk_gnsssdr_machines[i]; |  | ||||||
|                                 } |  | ||||||
|                         } |  | ||||||
|                 } |  | ||||||
|             machine = max_machine; |  | ||||||
|             return machine->name; |  | ||||||
|         } |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |     machine = max_machine; | ||||||
|  |     return machine->name; | ||||||
|  |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| size_t volk_gnsssdr_get_alignment(void) | size_t volk_gnsssdr_get_alignment(void) | ||||||
| { | { | ||||||
|     get_machine();  //ensures alignment is set |     get_machine(); //ensures alignment is set | ||||||
|     return __alignment; |     return __alignment; | ||||||
| } | } | ||||||
|  |  | ||||||
| bool volk_gnsssdr_is_aligned(const void *ptr) | bool volk_gnsssdr_is_aligned(const void *ptr) | ||||||
| { | { | ||||||
|     return ((intptr_t)(ptr)&__alignment_mask) == 0; |     return ((intptr_t)(ptr) & __alignment_mask) == 0; | ||||||
| } | } | ||||||
|  |  | ||||||
| #define LV_HAVE_GENERIC | #define LV_HAVE_GENERIC | ||||||
| @@ -123,12 +113,13 @@ bool volk_gnsssdr_is_aligned(const void *ptr) | |||||||
| %for kern in kernels: | %for kern in kernels: | ||||||
|  |  | ||||||
| %if kern.has_dispatcher: | %if kern.has_dispatcher: | ||||||
| #include <volk_gnsssdr/${kern.name}.h>  //pulls in the dispatcher | #include <volk_gnsssdr/${kern.name}.h> //pulls in the dispatcher | ||||||
| %endif | %endif | ||||||
|  |  | ||||||
| static inline void __${kern.name}_d(${kern.arglist_full}) | static inline void __${kern.name}_d(${kern.arglist_full}) | ||||||
| { | { | ||||||
|     % if kern.has_dispatcher : ${kern.name} _dispatcher(${kern.arglist_names}); |     %if kern.has_dispatcher: | ||||||
|  |     ${kern.name}_dispatcher(${kern.arglist_names}); | ||||||
|     return; |     return; | ||||||
|     %endif |     %endif | ||||||
|  |  | ||||||
| @@ -140,41 +131,41 @@ static inline void __${kern.name}_d(${kern.arglist_full}) | |||||||
|     %endfor |     %endfor | ||||||
|         0<% end_open_parens = ')'*num_open_parens %>${end_open_parens} |         0<% end_open_parens = ')'*num_open_parens %>${end_open_parens} | ||||||
|     )){ |     )){ | ||||||
|         ${kern.name} _a(${kern.arglist_names}); |         ${kern.name}_a(${kern.arglist_names}); | ||||||
|     } |     } | ||||||
|     else{ |     else{ | ||||||
|         ${kern.name} _u(${kern.arglist_names}); |         ${kern.name}_u(${kern.arglist_names}); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| static inline void __init_${kern.name}(void) | static inline void __init_${kern.name}(void) | ||||||
| { | { | ||||||
|     const char *name = get_machine()->${kern.name} _name; |     const char *name = get_machine()->${kern.name}_name; | ||||||
|     const char **impl_names = get_machine()->${kern.name} _impl_names; |     const char **impl_names = get_machine()->${kern.name}_impl_names; | ||||||
|     const int *impl_deps = get_machine()->${kern.name} _impl_deps; |     const int *impl_deps = get_machine()->${kern.name}_impl_deps; | ||||||
|     const bool *alignment = get_machine()->${kern.name} _impl_alignment; |     const bool *alignment = get_machine()->${kern.name}_impl_alignment; | ||||||
|     const size_t n_impls = get_machine()->${kern.name} _n_impls; |     const size_t n_impls = get_machine()->${kern.name}_n_impls; | ||||||
|     const size_t index_a = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, true /*aligned*/); |     const size_t index_a = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, true/*aligned*/); | ||||||
|     const size_t index_u = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, false /*unaligned*/); |     const size_t index_u = volk_gnsssdr_rank_archs(name, impl_names, impl_deps, alignment, n_impls, false/*unaligned*/); | ||||||
|     ${kern.name} _a = get_machine()->${kern.name} _impls[index_a]; |     ${kern.name}_a = get_machine()->${kern.name}_impls[index_a]; | ||||||
|     ${kern.name} _u = get_machine()->${kern.name} _impls[index_u]; |     ${kern.name}_u = get_machine()->${kern.name}_impls[index_u]; | ||||||
|  |  | ||||||
|     assert(${kern.name} _a); |     assert(${kern.name}_a); | ||||||
|     assert(${kern.name} _u); |     assert(${kern.name}_u); | ||||||
|  |  | ||||||
|     ${kern.name} = &__${kern.name} _d; |     ${kern.name} = &__${kern.name}_d; | ||||||
| } | } | ||||||
|  |  | ||||||
| static inline void __${kern.name} _a(${kern.arglist_full}) | static inline void __${kern.name}_a(${kern.arglist_full}) | ||||||
| { | { | ||||||
|     __init_${kern.name}(); |     __init_${kern.name}(); | ||||||
|     ${kern.name} _a(${kern.arglist_names}); |     ${kern.name}_a(${kern.arglist_names}); | ||||||
| } | } | ||||||
|  |  | ||||||
| static inline void __${kern.name} _u(${kern.arglist_full}) | static inline void __${kern.name}_u(${kern.arglist_full}) | ||||||
| { | { | ||||||
|     __init_${kern.name}(); |     __init_${kern.name}(); | ||||||
|     ${kern.name} _u(${kern.arglist_names}); |     ${kern.name}_u(${kern.arglist_names}); | ||||||
| } | } | ||||||
|  |  | ||||||
| static inline void __${kern.name}(${kern.arglist_full}) | static inline void __${kern.name}(${kern.arglist_full}) | ||||||
| @@ -183,32 +174,34 @@ static inline void __${kern.name}(${kern.arglist_full}) | |||||||
|     ${kern.name}(${kern.arglist_names}); |     ${kern.name}(${kern.arglist_names}); | ||||||
| } | } | ||||||
|  |  | ||||||
| ${kern.pname} ${kern.name} _a = &__${kern.name} _a; | ${kern.pname} ${kern.name}_a = &__${kern.name}_a; | ||||||
| ${kern.pname} ${kern.name} _u = &__${kern.name} _u; | ${kern.pname} ${kern.name}_u = &__${kern.name}_u; | ||||||
| ${kern.pname} ${kern.name} = &__${kern.name}; | ${kern.pname} ${kern.name}   = &__${kern.name}; | ||||||
|  |  | ||||||
| void ${kern.name} _manual(${kern.arglist_full}, const char *impl_name) | void ${kern.name}_manual(${kern.arglist_full}, const char* impl_name) | ||||||
| { | { | ||||||
|     const int index = volk_gnsssdr_get_index( |     const int index = volk_gnsssdr_get_index( | ||||||
|         get_machine()->${kern.name} _impl_names, |         get_machine()->${kern.name}_impl_names, | ||||||
|         get_machine()->${kern.name} _n_impls, |         get_machine()->${kern.name}_n_impls, | ||||||
|         impl_name); |         impl_name | ||||||
|     get_machine()->${kern.name} _impls[index]( |     ); | ||||||
|         ${kern.arglist_names}); |     get_machine()->${kern.name}_impls[index]( | ||||||
|  |         ${kern.arglist_names} | ||||||
|  |     ); | ||||||
| } | } | ||||||
|  |  | ||||||
| volk_gnsssdr_func_desc_t ${kern.name} _get_func_desc(void) | volk_gnsssdr_func_desc_t ${kern.name}_get_func_desc(void) { | ||||||
| { |     const char **impl_names = get_machine()->${kern.name}_impl_names; | ||||||
|     const char **impl_names = get_machine()->${kern.name} _impl_names; |     const int *impl_deps = get_machine()->${kern.name}_impl_deps; | ||||||
|     const int *impl_deps = get_machine()->${kern.name} _impl_deps; |     const bool *alignment = get_machine()->${kern.name}_impl_alignment; | ||||||
|     const bool *alignment = get_machine()->${kern.name} _impl_alignment; |     const size_t n_impls = get_machine()->${kern.name}_n_impls; | ||||||
|     const size_t n_impls = get_machine()->${kern.name} _n_impls; |  | ||||||
|     volk_gnsssdr_func_desc_t desc = { |     volk_gnsssdr_func_desc_t desc = { | ||||||
|         impl_names, |         impl_names, | ||||||
|         impl_deps, |         impl_deps, | ||||||
|         alignment, |         alignment, | ||||||
|         n_impls}; |         n_impls | ||||||
|  |     }; | ||||||
|     return desc; |     return desc; | ||||||
| } | } | ||||||
|  |  | ||||||
| % endfor | %endfor | ||||||
|   | |||||||
| @@ -42,7 +42,7 @@ typedef struct volk_gnsssdr_func_desc | |||||||
| VOLK_API void volk_gnsssdr_list_machines(void); | VOLK_API void volk_gnsssdr_list_machines(void); | ||||||
|  |  | ||||||
| //! Returns the name of the machine this instance will use | //! Returns the name of the machine this instance will use | ||||||
| VOLK_API const char *volk_gnsssdr_get_machine(void); | VOLK_API const char* volk_gnsssdr_get_machine(void); | ||||||
|  |  | ||||||
| //! Get the machine alignment in bytes | //! Get the machine alignment in bytes | ||||||
| VOLK_API size_t volk_gnsssdr_get_alignment(void); | VOLK_API size_t volk_gnsssdr_get_alignment(void); | ||||||
| @@ -74,19 +74,19 @@ VOLK_API bool volk_gnsssdr_is_aligned(const void *ptr); | |||||||
| extern VOLK_API ${kern.pname} ${kern.name}; | extern VOLK_API ${kern.pname} ${kern.name}; | ||||||
|  |  | ||||||
| //! A function pointer to the fastest aligned implementation | //! A function pointer to the fastest aligned implementation | ||||||
| extern VOLK_API ${kern.pname} ${kern.name} _a; | extern VOLK_API ${kern.pname} ${kern.name}_a; | ||||||
|  |  | ||||||
| //! A function pointer to the fastest unaligned implementation | //! A function pointer to the fastest unaligned implementation | ||||||
| extern VOLK_API ${kern.pname} ${kern.name} _u; | extern VOLK_API ${kern.pname} ${kern.name}_u; | ||||||
|  |  | ||||||
| //! Call into a specific implementation given by name | //! Call into a specific implementation given by name | ||||||
| extern VOLK_API void ${kern.name} _manual(${kern.arglist_full}, const char *impl_name); | extern VOLK_API void ${kern.name}_manual(${kern.arglist_full}, const char* impl_name); | ||||||
|  |  | ||||||
| //! Get description parameters for this kernel | //! Get description parameters for this kernel | ||||||
| extern VOLK_API volk_gnsssdr_func_desc_t ${kern.name} _get_func_desc(void); | extern VOLK_API volk_gnsssdr_func_desc_t ${kern.name}_get_func_desc(void); | ||||||
| % endfor | %endfor | ||||||
|  |  | ||||||
|         __VOLK_DECL_END | __VOLK_DECL_END | ||||||
|  |  | ||||||
|  |  | ||||||
| #endif /*INCLUDED_VOLK_GNSSSDR_RUNTIME*/ | #endif /*INCLUDED_VOLK_GNSSSDR_RUNTIME*/ | ||||||
|   | |||||||
| @@ -21,8 +21,7 @@ | |||||||
|  |  | ||||||
| %for i, arch in enumerate(archs): | %for i, arch in enumerate(archs): | ||||||
| //#ifndef LV_${arch.name.upper()} | //#ifndef LV_${arch.name.upper()} | ||||||
| #define LV_$ \ | #define LV_${arch.name.upper()} ${i} | ||||||
|     {arch.name.upper()} $ { i } |  | ||||||
| //#endif | //#endif | ||||||
| %endfor | %endfor | ||||||
|  |  | ||||||
|   | |||||||
| @@ -24,54 +24,50 @@ | |||||||
| struct VOLK_CPU volk_gnsssdr_cpu; | struct VOLK_CPU volk_gnsssdr_cpu; | ||||||
|  |  | ||||||
| #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) | #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) | ||||||
| #define VOLK_CPU_x86 |     #define VOLK_CPU_x86 | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| #if defined(VOLK_CPU_x86) | #if defined(VOLK_CPU_x86) | ||||||
|  |  | ||||||
| //implement get cpuid for gcc compilers using a system or local copy of cpuid.h | //implement get cpuid for gcc compilers using a system or local copy of cpuid.h | ||||||
| #if defined(__GNUC__) | #if defined(__GNUC__) | ||||||
| #include <cpuid.h> |     #include <cpuid.h> | ||||||
| #define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r + 0, (unsigned int *)r + 1, (unsigned int *)r + 2, (unsigned int *)r + 3) |     #define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3) | ||||||
| #define cpuid_x86_count(op, count, regs) __cpuid_count(op, count, *((unsigned int *)regs), *((unsigned int *)regs + 1), *((unsigned int *)regs + 2), *((unsigned int *)regs + 3)) |     #define cpuid_x86_count(op, count, regs) __cpuid_count(op, count, *((unsigned int*)regs), *((unsigned int*)regs+1), *((unsigned int*)regs+2), *((unsigned int*)regs+3)) | ||||||
|  |  | ||||||
| /* Return Intel AVX extended CPU capabilities register. |     /* Return Intel AVX extended CPU capabilities register. | ||||||
|      * This function will bomb on non-AVX-capable machines, so |      * This function will bomb on non-AVX-capable machines, so | ||||||
|      * check for AVX capability before executing. |      * check for AVX capability before executing. | ||||||
|      */ |      */ | ||||||
| #if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) && defined(HAVE_XGETBV) |     #if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3)) && defined(HAVE_XGETBV) | ||||||
| static inline unsigned long long _xgetbv(unsigned int index) |     static inline unsigned long long _xgetbv(unsigned int index){ | ||||||
| { |         unsigned int eax, edx; | ||||||
|     unsigned int eax, edx; |         __VOLK_ASM __VOLK_VOLATILE ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); | ||||||
|     __VOLK_ASM __VOLK_VOLATILE("xgetbv" |         return ((unsigned long long)edx << 32) | eax; | ||||||
|                                : "=a"(eax), "=d"(edx) |     } | ||||||
|                                : "c"(index)); |     #define __xgetbv() _xgetbv(0) | ||||||
|     return ((unsigned long long)edx << 32) | eax; |     #else | ||||||
| } |     #define __xgetbv() 0 | ||||||
| #define __xgetbv() _xgetbv(0) |     #endif | ||||||
| #else |  | ||||||
| #define __xgetbv() 0 |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
| //implement get cpuid for MSVC compilers using __cpuid intrinsic | //implement get cpuid for MSVC compilers using __cpuid intrinsic | ||||||
| #elif defined(_MSC_VER) && defined(HAVE_INTRIN_H) | #elif defined(_MSC_VER) && defined(HAVE_INTRIN_H) | ||||||
| #include <intrin.h> |     #include <intrin.h> | ||||||
| #define cpuid_x86(op, r) __cpuid(((int *)r), op) |     #define cpuid_x86(op, r) __cpuid(((int*)r), op) | ||||||
|  |  | ||||||
| #if defined(_XCR_XFEATURE_ENABLED_MASK) |     #if defined(_XCR_XFEATURE_ENABLED_MASK) | ||||||
| #define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) |     #define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) | ||||||
| #else |     #else | ||||||
| #define __xgetbv() 0 |     #define __xgetbv() 0 | ||||||
| #endif |     #endif | ||||||
|  |  | ||||||
| #else | #else | ||||||
| #error "A get cpuid for volk_gnsssdr is not available on this compiler..." |     #error "A get cpuid for volk_gnsssdr is not available on this compiler..." | ||||||
| #endif  //defined(__GNUC__) | #endif //defined(__GNUC__) | ||||||
|  |  | ||||||
| #endif  //defined(VOLK_CPU_x86) | #endif //defined(VOLK_CPU_x86) | ||||||
|  |  | ||||||
| static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit) | static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int count, unsigned int reg, unsigned int bit) { | ||||||
| { |  | ||||||
| #if defined(VOLK_CPU_x86) | #if defined(VOLK_CPU_x86) | ||||||
|     unsigned int regs[4] = {0}; |     unsigned int regs[4] = {0}; | ||||||
|     cpuid_x86_count(level, count, regs); |     cpuid_x86_count(level, count, regs); | ||||||
| @@ -81,11 +77,10 @@ static inline unsigned int cpuid_count_x86_bit(unsigned int level, unsigned int | |||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) | static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsigned int bit) { | ||||||
| { |  | ||||||
| #if defined(VOLK_CPU_x86) | #if defined(VOLK_CPU_x86) | ||||||
|     unsigned int regs[4]; |     unsigned int regs[4]; | ||||||
|     memset(regs, 0, sizeof(unsigned int) * 4); |     memset(regs, 0, sizeof(unsigned int)*4); | ||||||
|     cpuid_x86(op, regs); |     cpuid_x86(op, regs); | ||||||
|     return regs[reg] >> bit & 0x01; |     return regs[reg] >> bit & 0x01; | ||||||
| #else | #else | ||||||
| @@ -93,11 +88,10 @@ static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, unsi | |||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| static inline unsigned int check_extended_cpuid(unsigned int val) | static inline unsigned int check_extended_cpuid(unsigned int val) { | ||||||
| { |  | ||||||
| #if defined(VOLK_CPU_x86) | #if defined(VOLK_CPU_x86) | ||||||
|     unsigned int regs[4]; |     unsigned int regs[4]; | ||||||
|     memset(regs, 0, sizeof(unsigned int) * 4); |     memset(regs, 0, sizeof(unsigned int)*4); | ||||||
|     cpuid_x86(0x80000000, regs); |     cpuid_x86(0x80000000, regs); | ||||||
|     return regs[0] >= val; |     return regs[0] >= val; | ||||||
| #else | #else | ||||||
| @@ -105,8 +99,7 @@ static inline unsigned int check_extended_cpuid(unsigned int val) | |||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| static inline unsigned int get_avx_enabled(void) | static inline unsigned int get_avx_enabled(void) { | ||||||
| { |  | ||||||
| #if defined(VOLK_CPU_x86) | #if defined(VOLK_CPU_x86) | ||||||
|     return __xgetbv() & 0x6; |     return __xgetbv() & 0x6; | ||||||
| #else | #else | ||||||
| @@ -114,8 +107,7 @@ static inline unsigned int get_avx_enabled(void) | |||||||
| #endif | #endif | ||||||
| } | } | ||||||
|  |  | ||||||
| static inline unsigned int get_avx2_enabled(void) | static inline unsigned int get_avx2_enabled(void) { | ||||||
| { |  | ||||||
| #if defined(VOLK_CPU_x86) | #if defined(VOLK_CPU_x86) | ||||||
|     return __xgetbv() & 0x6; |     return __xgetbv() & 0x6; | ||||||
| #else | #else | ||||||
| @@ -125,30 +117,28 @@ static inline unsigned int get_avx2_enabled(void) | |||||||
|  |  | ||||||
| //neon detection is linux specific | //neon detection is linux specific | ||||||
| #if defined(__arm__) && defined(__linux__) | #if defined(__arm__) && defined(__linux__) | ||||||
| #include <asm/hwcap.h> |     #include <asm/hwcap.h> | ||||||
| #include <linux/auxvec.h> |     #include <linux/auxvec.h> | ||||||
| #include <stdio.h> |     #include <stdio.h> | ||||||
| #define VOLK_CPU_ARM |     #define VOLK_CPU_ARM | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| static int has_neon(void) | static int has_neon(void){ | ||||||
| { |  | ||||||
| #if defined(VOLK_CPU_ARM) | #if defined(VOLK_CPU_ARM) | ||||||
|     FILE *auxvec_f; |     FILE *auxvec_f; | ||||||
|     unsigned long auxvec[2]; |     unsigned long auxvec[2]; | ||||||
|     unsigned int found_neon = 0; |     unsigned int found_neon = 0; | ||||||
|     auxvec_f = fopen("/proc/self/auxv", "rb"); |     auxvec_f = fopen("/proc/self/auxv", "rb"); | ||||||
|     if (!auxvec_f) return 0; |     if(!auxvec_f) return 0; | ||||||
|  |  | ||||||
|     size_t r = 1; |     size_t r = 1; | ||||||
|     //so auxv is basically 32b of ID and 32b of value |     //so auxv is basically 32b of ID and 32b of value | ||||||
|     //so it goes like this |     //so it goes like this | ||||||
|     while (!found_neon && r) |     while(!found_neon && r) { | ||||||
|         { |       r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f); | ||||||
|             r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f); |       if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON)) | ||||||
|             if ((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON)) |         found_neon = 1; | ||||||
|                 found_neon = 1; |     } | ||||||
|         } |  | ||||||
|  |  | ||||||
|     fclose(auxvec_f); |     fclose(auxvec_f); | ||||||
|     return found_neon; |     return found_neon; | ||||||
| @@ -158,59 +148,50 @@ static int has_neon(void) | |||||||
| } | } | ||||||
|  |  | ||||||
| %for arch in archs: | %for arch in archs: | ||||||
| static int i_can_has_${arch.name} (void) | static int i_can_has_${arch.name} (void) { | ||||||
| { |  | ||||||
|     %for check, params in arch.checks: |     %for check, params in arch.checks: | ||||||
|     if (${check}(<% joined_params = ', '.join(params)%>${joined_params}) == 0) return 0; |     if (${check}(<% joined_params = ', '.join(params)%>${joined_params}) == 0) return 0; | ||||||
|     % endfor return 1; |     %endfor | ||||||
|  |     return 1; | ||||||
| } | } | ||||||
| % endfor | %endfor | ||||||
|  |  | ||||||
| #if defined(HAVE_FENV_H) | #if defined(HAVE_FENV_H) | ||||||
| #if defined(FE_TONEAREST) |     #if defined(FE_TONEAREST) | ||||||
| #include <fenv.h> |         #include <fenv.h> | ||||||
|     static inline void |         static inline void set_float_rounding(void){ | ||||||
|     set_float_rounding(void) |             fesetround(FE_TONEAREST); | ||||||
| { |         } | ||||||
|     fesetround(FE_TONEAREST); |     #else | ||||||
| } |         static inline void set_float_rounding(void){ | ||||||
| #else |             //do nothing | ||||||
|     static inline void |         } | ||||||
|     set_float_rounding(void) |     #endif | ||||||
| { |  | ||||||
|     //do nothing |  | ||||||
| } |  | ||||||
| #endif |  | ||||||
| #elif defined(_MSC_VER) | #elif defined(_MSC_VER) | ||||||
| #include <float.h> |     #include <float.h> | ||||||
|     static inline void |     static inline void set_float_rounding(void){ | ||||||
|     set_float_rounding(void) |         unsigned int cwrd; | ||||||
| { |         _controlfp_s(&cwrd, 0, 0); | ||||||
|     unsigned int cwrd; |         _controlfp_s(&cwrd, _RC_NEAR, _MCW_RC); | ||||||
|     _controlfp_s(&cwrd, 0, 0); |     } | ||||||
|     _controlfp_s(&cwrd, _RC_NEAR, _MCW_RC); |  | ||||||
| } |  | ||||||
| #else | #else | ||||||
|     static inline void |     static inline void set_float_rounding(void){ | ||||||
|     set_float_rounding(void) |         //do nothing | ||||||
| { |     } | ||||||
|     //do nothing |  | ||||||
| } |  | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| void volk_gnsssdr_cpu_init() | void volk_gnsssdr_cpu_init() { | ||||||
| { |  | ||||||
|     %for arch in archs: |     %for arch in archs: | ||||||
|     volk_gnsssdr_cpu.has_${arch.name} = &i_can_has_${arch.name}; |     volk_gnsssdr_cpu.has_${arch.name} = &i_can_has_${arch.name}; | ||||||
|     % endfor |     %endfor | ||||||
|         set_float_rounding(); |     set_float_rounding(); | ||||||
| } | } | ||||||
|  |  | ||||||
| unsigned int volk_gnsssdr_get_lvarch() | unsigned int volk_gnsssdr_get_lvarch() { | ||||||
| { |  | ||||||
|     unsigned int retval = 0; |     unsigned int retval = 0; | ||||||
|     volk_gnsssdr_cpu_init(); |     volk_gnsssdr_cpu_init(); | ||||||
|     %for arch in archs: |     %for arch in archs: | ||||||
|     retval += volk_gnsssdr_cpu.has_${arch.name}() << LV_${arch.name.upper()}; |     retval += volk_gnsssdr_cpu.has_${arch.name}() << LV_${arch.name.upper()}; | ||||||
|     % endfor return retval; |     %endfor | ||||||
|  |     return retval; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -23,17 +23,16 @@ | |||||||
|  |  | ||||||
| __VOLK_DECL_BEGIN | __VOLK_DECL_BEGIN | ||||||
|  |  | ||||||
| struct VOLK_CPU | struct VOLK_CPU { | ||||||
| { |  | ||||||
|     %for arch in archs: |     %for arch in archs: | ||||||
|     int (*has_${arch.name}) (); |     int (*has_${arch.name}) (); | ||||||
|     % endfor |     %endfor | ||||||
| }; | }; | ||||||
|  |  | ||||||
| extern struct VOLK_CPU volk_gnsssdr_cpu; | extern struct VOLK_CPU volk_gnsssdr_cpu; | ||||||
|  |  | ||||||
| void volk_gnsssdr_cpu_init(); | void volk_gnsssdr_cpu_init (); | ||||||
| unsigned int volk_gnsssdr_get_lvarch(); | unsigned int volk_gnsssdr_get_lvarch (); | ||||||
|  |  | ||||||
| __VOLK_DECL_END | __VOLK_DECL_END | ||||||
|  |  | ||||||
|   | |||||||
| @@ -20,11 +20,7 @@ | |||||||
| <% arch_names = this_machine.arch_names %> | <% arch_names = this_machine.arch_names %> | ||||||
|  |  | ||||||
| %for arch in this_machine.archs: | %for arch in this_machine.archs: | ||||||
| #define LV_HAVE_$         \ | #define LV_HAVE_${arch.name.upper()} 1 | ||||||
|     {                     \ |  | ||||||
|         arch.name.upper() \ |  | ||||||
|     }                     \ |  | ||||||
|     1 |  | ||||||
| %endfor | %endfor | ||||||
|  |  | ||||||
| #include <volk_gnsssdr/volk_gnsssdr_common.h> | #include <volk_gnsssdr/volk_gnsssdr_common.h> | ||||||
| @@ -39,9 +35,7 @@ | |||||||
| #include <volk_gnsssdr/${kern.name}.h> | #include <volk_gnsssdr/${kern.name}.h> | ||||||
| %endfor | %endfor | ||||||
|  |  | ||||||
| struct volk_gnsssdr_machine volk_gnsssdr_machine_$ | struct volk_gnsssdr_machine volk_gnsssdr_machine_${this_machine.name} = { | ||||||
| { |  | ||||||
|     this_machine.name} = { |  | ||||||
| <% make_arch_have_list = (' | '.join(['(1 << LV_%s)'%a.name.upper() for a in this_machine.archs])) %>    ${make_arch_have_list}, | <% make_arch_have_list = (' | '.join(['(1 << LV_%s)'%a.name.upper() for a in this_machine.archs])) %>    ${make_arch_have_list}, | ||||||
| <% this_machine_name = "\""+this_machine.name+"\"" %>    ${this_machine_name}, | <% this_machine_name = "\""+this_machine.name+"\"" %>    ${this_machine_name}, | ||||||
|     ${this_machine.alignment}, |     ${this_machine.alignment}, | ||||||
|   | |||||||
| @@ -22,10 +22,10 @@ | |||||||
|  |  | ||||||
| struct volk_gnsssdr_machine *volk_gnsssdr_machines[] = { | struct volk_gnsssdr_machine *volk_gnsssdr_machines[] = { | ||||||
| %for machine in machines: | %for machine in machines: | ||||||
| #ifdef LV_MACHINE_${machine.name.upper() } | #ifdef LV_MACHINE_${machine.name.upper()} | ||||||
| &volk_gnsssdr_machine_${machine.name}, | &volk_gnsssdr_machine_${machine.name}, | ||||||
| #endif | #endif | ||||||
| %endfor | %endfor | ||||||
| }; | }; | ||||||
|  |  | ||||||
| unsigned int n_volk_gnsssdr_machines = sizeof(volk_gnsssdr_machines) / sizeof(*volk_gnsssdr_machines); | unsigned int n_volk_gnsssdr_machines = sizeof(volk_gnsssdr_machines)/sizeof(*volk_gnsssdr_machines); | ||||||
|   | |||||||
| @@ -27,30 +27,26 @@ | |||||||
|  |  | ||||||
| __VOLK_DECL_BEGIN | __VOLK_DECL_BEGIN | ||||||
|  |  | ||||||
| struct volk_gnsssdr_machine | struct volk_gnsssdr_machine { | ||||||
| { |     const unsigned int caps; //capabilities (i.e., archs compiled into this machine, in the volk_gnsssdr_get_lvarch format) | ||||||
|     const unsigned int caps;  //capabilities (i.e., archs compiled into this machine, in the volk_gnsssdr_get_lvarch format) |  | ||||||
|     const char *name; |     const char *name; | ||||||
|     const size_t alignment;  //the maximum byte alignment required for functions in this library |     const size_t alignment; //the maximum byte alignment required for functions in this library | ||||||
|     %for kern in kernels: |     %for kern in kernels: | ||||||
|     const char *${kern.name}_name; |     const char *${kern.name}_name; | ||||||
|     const char *${kern.name} _impl_names[<% len_archs = len(archs) %> ${len_archs}]; |     const char *${kern.name}_impl_names[<%len_archs=len(archs)%>${len_archs}]; | ||||||
|     const int ${kern.name} _impl_deps[${len_archs}]; |     const int ${kern.name}_impl_deps[${len_archs}]; | ||||||
|     const bool ${kern.name} _impl_alignment[${len_archs}]; |     const bool ${kern.name}_impl_alignment[${len_archs}]; | ||||||
|     const ${kern.pname} ${kern.name} _impls[${len_archs}]; |     const ${kern.pname} ${kern.name}_impls[${len_archs}]; | ||||||
|     const size_t ${kern.name} _n_impls; |     const size_t ${kern.name}_n_impls; | ||||||
|     % endfor |     %endfor | ||||||
| }; | }; | ||||||
|  |  | ||||||
| %for machine in machines: | %for machine in machines: | ||||||
| #ifdef LV_MACHINE_${machine.name.upper() } | #ifdef LV_MACHINE_${machine.name.upper()} | ||||||
| extern struct volk_gnsssdr_machine volk_gnsssdr_machine_$ | extern struct volk_gnsssdr_machine volk_gnsssdr_machine_${machine.name}; | ||||||
| { |  | ||||||
|     machine.name |  | ||||||
| }; |  | ||||||
| #endif | #endif | ||||||
| % endfor | %endfor | ||||||
|  |  | ||||||
|         __VOLK_DECL_END | __VOLK_DECL_END | ||||||
|  |  | ||||||
| #endif  //INCLUDED_LIBVOLK_GNSSSDR_MACHINES_H | #endif //INCLUDED_LIBVOLK_GNSSSDR_MACHINES_H | ||||||
|   | |||||||
| @@ -24,6 +24,6 @@ | |||||||
|  |  | ||||||
| %for kern in kernels: | %for kern in kernels: | ||||||
| typedef void (*${kern.pname})(${kern.arglist_types}); | typedef void (*${kern.pname})(${kern.arglist_types}); | ||||||
| % endfor | %endfor | ||||||
|  |  | ||||||
| #endif /*INCLUDED_VOLK_GNSSSDR_TYPEDEFS*/ | #endif /*INCLUDED_VOLK_GNSSSDR_TYPEDEFS*/ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Carles Fernandez
					Carles Fernandez