mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-12-13 19:50:34 +00:00
Make SIMD instructions work on MS Windows
This commit is contained in:
parent
94337c159f
commit
7e1f0176f4
@ -57,6 +57,8 @@ SPDX-FileCopyrightText: 2011-2020 Carles Fernandez-Prades <carles.fernandez@cttc
|
||||
GNSS-SDR, the old method is still used in old development environments. No
|
||||
extra dependency is needed. This change is transparent to the user, since
|
||||
everything is managed by the CMake scripts.
|
||||
- The `volk_gnsssdr` library can be built on Microsoft Windows and can execute
|
||||
SIMD instructions on that OS.
|
||||
- Fix building with `-DENABLE_CUDA=ON` for blocks implemented with CUDA.
|
||||
|
||||
### Improvements in Usability:
|
||||
|
@ -247,7 +247,10 @@ if(CMAKE_VERSION VERSION_GREATER 3.0 AND SUPPORTED_CPU_FEATURES_ARCH)
|
||||
FORCE
|
||||
)
|
||||
set(USE_CPU_FEATURES ON)
|
||||
set(BUILD_SHARED_LIBS_SAVED "${BUILD_SHARED_LIBS}")
|
||||
set(BUILD_SHARED_LIBS OFF)
|
||||
add_subdirectory(cpu_features)
|
||||
set(BUILD_SHARED_LIBS "${BUILD_SHARED_LIBS_SAVED}")
|
||||
endif()
|
||||
|
||||
# Python
|
||||
|
@ -139,6 +139,24 @@ $ volk_gnsssdr_profile
|
||||
$ sudo ldconfig
|
||||
```
|
||||
|
||||
### Building on Microsoft Windows
|
||||
|
||||
With Visual Studio 2019, in a Powershell run by administrator:
|
||||
|
||||
```
|
||||
$ git clone https://github.com/gnss-sdr/gnss-sdr
|
||||
$ cd gnss-sdr
|
||||
$ git checkout next
|
||||
$ cd build
|
||||
$ cmake -G "Visual Studio 16 2019" ..\src\algorithms\libs\volk_gnsssdr_module\volk_gnsssdr
|
||||
$ cd ..
|
||||
$ cmake --build build --config Release
|
||||
$ cd build
|
||||
$ ctest -C Release
|
||||
$ cd ..
|
||||
$ cmake --install build
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
If you use VOLK_GNSSSDR in your research and/or software, please cite the
|
||||
|
@ -68,7 +68,6 @@
|
||||
<check name="mmx"></check>
|
||||
<flag compiler="gnu">-mmmx</flag>
|
||||
<flag compiler="clang">-mmmx</flag>
|
||||
<flag compiler="msvc">/arch:SSE</flag>
|
||||
<alignment>8</alignment>
|
||||
</arch>
|
||||
|
||||
@ -84,7 +83,6 @@
|
||||
<check name="sse"></check>
|
||||
<flag compiler="gnu">-msse</flag>
|
||||
<flag compiler="clang">-msse</flag>
|
||||
<flag compiler="msvc">/arch:SSE</flag>
|
||||
<environment>_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);</environment>
|
||||
<include>xmmintrin.h</include>
|
||||
<alignment>16</alignment>
|
||||
@ -94,7 +92,6 @@
|
||||
<check name="sse2"></check>
|
||||
<flag compiler="gnu">-msse2</flag>
|
||||
<flag compiler="clang">-msse2</flag>
|
||||
<flag compiler="msvc">/arch:SSE2</flag>
|
||||
<alignment>16</alignment>
|
||||
</arch>
|
||||
|
||||
|
@ -63,7 +63,6 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_malloc.h>
|
||||
#include <math.h>
|
||||
// #include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
@ -74,7 +73,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic(lv_32f
|
||||
unsigned int n;
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
result[n_vec] = lv_cmake(0, 0);
|
||||
result[n_vec] = lv_cmake(0.0f, 0.0f);
|
||||
}
|
||||
for (n = 0; n < num_points; n++)
|
||||
{
|
||||
@ -115,7 +114,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload
|
||||
unsigned int j;
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
result[n_vec] = lv_cmake(0, 0);
|
||||
result[n_vec] = lv_cmake(0.0f, 0.0f);
|
||||
}
|
||||
|
||||
for (n = 0; n < num_points / ROTATOR_RELOAD; n++)
|
||||
@ -158,6 +157,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload
|
||||
#include <immintrin.h>
|
||||
static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
#ifndef WIN32
|
||||
unsigned int number = 0;
|
||||
int vec_ind = 0;
|
||||
unsigned int i = 0;
|
||||
@ -287,7 +287,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
|
||||
|
||||
_mm256_store_ps((float*)dotProductVector, dotProdVal0[vec_ind]); // Store the results back into the dot product vector
|
||||
|
||||
result[vec_ind] = lv_cmake(0, 0);
|
||||
result[vec_ind] = lv_cmake(0.0f, 0.0f);
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
result[vec_ind] += dotProductVector[i];
|
||||
@ -312,6 +312,9 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
|
||||
}
|
||||
|
||||
*phase = _phase;
|
||||
#else
|
||||
volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(result, in_common, phase_inc, phase, in_a, num_a_vectors, num_points);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_AVX */
|
||||
@ -322,6 +325,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_
|
||||
#include <immintrin.h>
|
||||
static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const float** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
#ifndef WIN32
|
||||
unsigned int number = 0;
|
||||
int vec_ind = 0;
|
||||
unsigned int i = 0;
|
||||
@ -451,7 +455,7 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
|
||||
|
||||
_mm256_store_ps((float*)dotProductVector, dotProdVal0[vec_ind]); // Store the results back into the dot product vector
|
||||
|
||||
result[vec_ind] = lv_cmake(0, 0);
|
||||
result[vec_ind] = lv_cmake(0.0f, 0.0f);
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
result[vec_ind] += dotProductVector[i];
|
||||
@ -476,6 +480,9 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_
|
||||
}
|
||||
|
||||
*phase = _phase;
|
||||
#else
|
||||
volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(result, in_common, phase_inc, phase, in_a, num_a_vectors, num_points);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -106,8 +106,11 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_u_avx(lv_3
|
||||
in_a[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((float*)in_a[n], (float*)in, sizeof(float) * num_points);
|
||||
}
|
||||
#ifndef WIN32
|
||||
volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_u_avx(result, local_code, phase_inc[0], phase, (const float**)in_a, num_a_vectors, num_points);
|
||||
|
||||
#else
|
||||
volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(result, local_code, phase_inc[0], phase, (const float**)in_a, num_a_vectors, num_points);
|
||||
#endif
|
||||
for (n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
@ -136,8 +139,11 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dotprodxnpuppet_32fc_a_avx(lv_3
|
||||
in_a[n] = (float*)volk_gnsssdr_malloc(sizeof(float) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((float*)in_a[n], (float*)in, sizeof(float) * num_points);
|
||||
}
|
||||
#ifndef WIN32
|
||||
volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_a_avx(result, local_code, phase_inc[0], phase, (const float**)in_a, num_a_vectors, num_points);
|
||||
|
||||
#else
|
||||
volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_generic_reload(result, local_code, phase_inc[0], phase, (const float**)in_a, num_a_vectors, num_points);
|
||||
#endif
|
||||
for (n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
|
@ -74,7 +74,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic(lv_32fc
|
||||
unsigned int n;
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
result[n_vec] = lv_cmake(0, 0);
|
||||
result[n_vec] = lv_cmake(0.0f, 0.0f);
|
||||
}
|
||||
for (n = 0; n < num_points; n++)
|
||||
{
|
||||
@ -115,7 +115,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic_reload(
|
||||
unsigned int j;
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
result[n_vec] = lv_cmake(0, 0);
|
||||
result[n_vec] = lv_cmake(0.0f, 0.0f);
|
||||
}
|
||||
|
||||
for (n = 0; n < num_points / ROTATOR_RELOAD; n++)
|
||||
@ -158,7 +158,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_generic_reload(
|
||||
#include <pmmintrin.h>
|
||||
static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_32fc_t dotProduct = lv_cmake(0, 0);
|
||||
lv_32fc_t dotProduct = lv_cmake(0.0f, 0.0f);
|
||||
lv_32fc_t tmp32_1, tmp32_2;
|
||||
const unsigned int sse_iters = num_points / 2;
|
||||
int n_vec;
|
||||
@ -240,7 +240,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
_mm_store_ps((float*)dotProductVector, acc[n_vec]); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0, 0);
|
||||
dotProduct = lv_cmake(0.0f, 0.0f);
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
dotProduct = dotProduct + dotProductVector[i];
|
||||
@ -270,7 +270,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_sse3(lv_32fc_
|
||||
#include <pmmintrin.h>
|
||||
static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_32fc_t dotProduct = lv_cmake(0, 0);
|
||||
lv_32fc_t dotProduct = lv_cmake(0.0f, 0.0f);
|
||||
lv_32fc_t tmp32_1, tmp32_2;
|
||||
const unsigned int sse_iters = num_points / 2;
|
||||
int n_vec;
|
||||
@ -352,7 +352,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
_mm_store_ps((float*)dotProductVector, acc[n_vec]); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0, 0);
|
||||
dotProduct = lv_cmake(0.0f, 0.0f);
|
||||
for (i = 0; i < 2; ++i)
|
||||
{
|
||||
dotProduct = dotProduct + dotProductVector[i];
|
||||
@ -382,7 +382,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_sse3(lv_32fc_
|
||||
#include <immintrin.h>
|
||||
static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_32fc_t dotProduct = lv_cmake(0, 0);
|
||||
lv_32fc_t dotProduct = lv_cmake(0.0f, 0.0f);
|
||||
lv_32fc_t tmp32_1, tmp32_2;
|
||||
const unsigned int avx_iters = num_points / 4;
|
||||
int n_vec;
|
||||
@ -401,13 +401,14 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
acc[n_vec] = _mm256_setzero_ps();
|
||||
result[n_vec] = lv_cmake(0, 0);
|
||||
result[n_vec] = lv_cmake(0.0f, 0.0f);
|
||||
}
|
||||
|
||||
// phase rotation registers
|
||||
__m256 a, four_phase_acc_reg, yl, yh, tmp1, tmp1p, tmp2, tmp2p, z;
|
||||
|
||||
__attribute__((aligned(32))) lv_32fc_t four_phase_inc[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t four_phase_inc[4];
|
||||
const lv_32fc_t phase_inc2 = phase_inc * phase_inc;
|
||||
const lv_32fc_t phase_inc3 = phase_inc2 * phase_inc;
|
||||
const lv_32fc_t phase_inc4 = phase_inc3 * phase_inc;
|
||||
@ -417,7 +418,8 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t
|
||||
four_phase_inc[3] = phase_inc4;
|
||||
const __m256 four_phase_inc_reg = _mm256_load_ps((float*)four_phase_inc);
|
||||
|
||||
__attribute__((aligned(32))) lv_32fc_t four_phase_acc[4];
|
||||
__VOLK_ATTR_ALIGNED(32)
|
||||
lv_32fc_t four_phase_acc[4];
|
||||
four_phase_acc[0] = _phase;
|
||||
four_phase_acc[1] = _phase * phase_inc;
|
||||
four_phase_acc[2] = _phase * phase_inc2;
|
||||
@ -472,7 +474,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
_mm256_store_ps((float*)dotProductVector, acc[n_vec]); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0, 0);
|
||||
dotProduct = lv_cmake(0.0f, 0.0f);
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
dotProduct = dotProduct + dotProductVector[i];
|
||||
@ -510,7 +512,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_u_avx(lv_32fc_t
|
||||
#include <immintrin.h>
|
||||
static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_32fc_t dotProduct = lv_cmake(0, 0);
|
||||
lv_32fc_t dotProduct = lv_cmake(0.0f, 0.0f);
|
||||
lv_32fc_t tmp32_1, tmp32_2;
|
||||
const unsigned int avx_iters = num_points / 4;
|
||||
int n_vec;
|
||||
@ -529,7 +531,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
acc[n_vec] = _mm256_setzero_ps();
|
||||
result[n_vec] = lv_cmake(0, 0);
|
||||
result[n_vec] = lv_cmake(0.0f, 0.0f);
|
||||
}
|
||||
|
||||
// phase rotation registers
|
||||
@ -602,7 +604,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
_mm256_store_ps((float*)dotProductVector, acc[n_vec]); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0, 0);
|
||||
dotProduct = lv_cmake(0.0f, 0.0f);
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
dotProduct = dotProduct + dotProductVector[i];
|
||||
@ -655,7 +657,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
|
||||
|
||||
if (neon_iters > 0)
|
||||
{
|
||||
lv_32fc_t dotProduct = lv_cmake(0, 0);
|
||||
lv_32fc_t dotProduct = lv_cmake(0.0f, 0.0f);
|
||||
float32_t arg_phase0 = cargf(_phase);
|
||||
float32_t arg_phase_inc = cargf(phase_inc);
|
||||
float32_t phase_est;
|
||||
@ -760,7 +762,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
|
||||
for (n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
vst2q_f32((float32_t*)dotProductVector, accumulator1[n_vec]); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0, 0);
|
||||
dotProduct = lv_cmake(0.0f, 0.0f);
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
dotProduct = dotProduct + dotProductVector[i];
|
||||
|
@ -145,6 +145,11 @@ macro(OVERRULE_ARCH arch reason)
|
||||
list(REMOVE_ITEM available_archs ${arch})
|
||||
endmacro()
|
||||
|
||||
macro(FORCE_ARCH arch reason)
|
||||
message(STATUS "${reason}, Forced arch ${arch}")
|
||||
list(APPEND available_archs ${arch})
|
||||
endmacro()
|
||||
|
||||
########################################################################
|
||||
# eliminate AVX on if not on x86, or if the compiler does not accept
|
||||
# the xgetbv instruction, or {if not cross-compiling and the xgetbv
|
||||
@ -264,13 +269,13 @@ if(NOT CROSSCOMPILE_MULTILIB AND CPU_IS_x86)
|
||||
endif()
|
||||
|
||||
# MSVC 64 bit does not have MMX, overrule it
|
||||
if(${SIZEOF_CPU} EQUAL 64 AND MSVC)
|
||||
overrule_arch(mmx "No MMX for Win64")
|
||||
if(MSVC_VERSION GREATER 1700)
|
||||
overrule_arch(sse "No SSE for Win64 Visual Studio 2013")
|
||||
if(MSVC)
|
||||
if(${SIZEOF_CPU} EQUAL 64)
|
||||
overrule_arch(mmx "No MMX for Win64")
|
||||
endif()
|
||||
force_arch(sse "Built-in for MSVC > 2013")
|
||||
force_arch(sse2 "Built-in for MSVC > 2013")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
|
@ -15,25 +15,10 @@
|
||||
#include <string.h>
|
||||
// clang-format on
|
||||
|
||||
#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4
|
||||
#define __popcnt __builtin_popcount
|
||||
#else
|
||||
inline unsigned __popcnt(unsigned num)
|
||||
{
|
||||
unsigned pop = 0;
|
||||
while (num)
|
||||
{
|
||||
if (num & 0x1) pop++;
|
||||
num >>= 1;
|
||||
}
|
||||
return pop;
|
||||
}
|
||||
#endif
|
||||
|
||||
int volk_gnsssdr_get_index(
|
||||
const char *impl_names[], //list of implementations by name
|
||||
const size_t n_impls, //number of implementations available
|
||||
const char *impl_name //the implementation name to find
|
||||
const char *impl_names[], // list of implementations by name
|
||||
const size_t n_impls, // number of implementations available
|
||||
const char *impl_name // the implementation name to find
|
||||
)
|
||||
{
|
||||
unsigned int i;
|
||||
@ -44,20 +29,20 @@ int volk_gnsssdr_get_index(
|
||||
return i;
|
||||
}
|
||||
}
|
||||
//TODO return -1;
|
||||
//something terrible should happen here
|
||||
// TODO return -1;
|
||||
// something terrible should happen here
|
||||
fprintf(stderr, "VOLK_GNSSSDR warning: no arch found, returning generic impl\n");
|
||||
return volk_gnsssdr_get_index(impl_names, n_impls, "generic"); //but we'll fake it for now
|
||||
}
|
||||
|
||||
|
||||
int volk_gnsssdr_rank_archs(
|
||||
const char *kern_name, //name of the kernel to rank
|
||||
const char *impl_names[], //list of implementations by name
|
||||
const int *impl_deps, //requirement mask per implementation
|
||||
const bool *alignment, //alignment status of each implementation
|
||||
size_t n_impls, //number of implementations available
|
||||
const bool align //if false, filter aligned implementations
|
||||
const char *kern_name, // name of the kernel to rank
|
||||
const char *impl_names[], // list of implementations by name
|
||||
const int *impl_deps, // requirement mask per implementation
|
||||
const bool *alignment, // alignment status of each implementation
|
||||
size_t n_impls, // number of implementations available
|
||||
const bool align // if false, filter aligned implementations
|
||||
)
|
||||
{
|
||||
size_t i;
|
||||
@ -78,7 +63,7 @@ int volk_gnsssdr_rank_archs(
|
||||
return volk_gnsssdr_get_index(impl_names, n_impls, "generic");
|
||||
}
|
||||
|
||||
//now look for the function name in the prefs list
|
||||
// now look for the function name in the prefs list
|
||||
for (i = 0; i < n_arch_prefs; i++)
|
||||
{
|
||||
if (!strncmp(kern_name, volk_gnsssdr_arch_prefs[i].name, sizeof(volk_gnsssdr_arch_prefs[i].name))) //found it
|
||||
@ -88,14 +73,14 @@ int volk_gnsssdr_rank_archs(
|
||||
}
|
||||
}
|
||||
|
||||
//return the best index with the largest deps
|
||||
// return the best index with the largest deps
|
||||
size_t best_index_a = 0;
|
||||
size_t best_index_u = 0;
|
||||
int best_value_a = -1;
|
||||
int best_value_u = -1;
|
||||
for (i = 0; i < n_impls; i++)
|
||||
{
|
||||
const signed val = __popcnt(impl_deps[i]);
|
||||
const signed val = impl_deps[i];
|
||||
if (alignment[i] && val > best_value_a)
|
||||
{
|
||||
best_index_a = i;
|
||||
@ -108,9 +93,9 @@ int volk_gnsssdr_rank_archs(
|
||||
}
|
||||
}
|
||||
|
||||
//when align and we found a best aligned, use it
|
||||
// when align and we found a best aligned, use it
|
||||
if (align && best_value_a != -1) return best_index_a;
|
||||
|
||||
//otherwise return the best unaligned
|
||||
// otherwise return the best unaligned
|
||||
return best_index_u;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user