Add support for aarch64

This commit is contained in:
Carles Fernandez 2018-04-29 01:19:48 +02:00
parent 1573c40938
commit a92e6de3d9
No known key found for this signature in database
GPG Key ID: 4C583C52B0C3877D
42 changed files with 289 additions and 101 deletions

View File

@ -176,20 +176,19 @@ function(VOLK_ADD_TEST test_name executable_name)
#set them in the PATH to run tests. The following appends the
#path of a target dependency.
#
#NOTE: get_target_property LOCATION is being deprecated as of
#CMake 3.2.0, which just prints a warning & notes that this
#functionality will be removed in the future. Leave it here for
#now until someone can figure out how to do this in Windows.
foreach(target ${test_name} ${VOLK_TEST_TARGET_DEPS})
get_target_property(location "${target}" LOCATION)
if(location)
get_filename_component(path ${location} PATH)
string(REGEX REPLACE "\\$\\(.*\\)" ${CMAKE_BUILD_TYPE} path ${path})
list(APPEND libpath ${path})
endif(location)
endforeach(target)
#create a list of target directories to be determined by the
#"add_test" command, via the $<FOO:BAR> operator; make sure the
#test's directory is first, since it ($1) is prepended to PATH.
unset(TARGET_DIR_LIST)
foreach(target ${executable_name} ${VOLK_TEST_TARGET_DEPS})
list(APPEND TARGET_DIR_LIST "$<TARGET_FILE_DIR:${target}>")
endforeach()
#replace list separator with the path separator (escaped)
string(REPLACE ";" "\\\\;" TARGET_DIR_LIST "${TARGET_DIR_LIST}")
list(APPEND libpath ${DLL_PATHS} "%PATH%")
#add command line argument (TARGET_DIR_LIST) to path and append current path
list(INSERT libpath 0 "%1")
list(APPEND libpath "%PATH%")
#replace list separator with the path separator (escaped)
string(REPLACE ";" "\\;" libpath "${libpath}")
@ -204,14 +203,18 @@ function(VOLK_ADD_TEST test_name executable_name)
file(APPEND ${bat_file} "SET ${environ}\n")
endforeach(environ)
set(VOLK_TEST_ARGS "${test_name}")
#redo the test args to have a space between each
string(REPLACE ";" " " VOLK_TEST_ARGS "${VOLK_TEST_ARGS}")
#finally: append the test name to execute
file(APPEND ${bat_file} ${test_name} " " ${VOLK_TEST_ARGS} "\n")
file(APPEND ${bat_file} "${executable_name} ${VOLK_TEST_ARGS}\n")
file(APPEND ${bat_file} "\n")
add_test(${test_name} ${bat_file})
add_test(NAME qa_${test_name}
COMMAND ${bat_file} ${TARGET_DIR_LIST}
)
endif(WIN32)
endfunction(VOLK_ADD_TEST)

View File

@ -0,0 +1,8 @@
########################################################################
# Toolchain file for building native on a ARM Cortex A8 w/ NEON
# Usage: cmake -DCMAKE_TOOLCHAIN_FILE=<this file> <source directory>
########################################################################
set(CMAKE_CXX_COMPILER g++)
set(CMAKE_C_COMPILER gcc)
set(CMAKE_CXX_FLAGS "-march=armv7-a -mtune=cortex-a15 -mfpu=neon -mfloat-abi=hard" CACHE STRING "" FORCE)
set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "" FORCE) #same flags for C sources

View File

@ -0,0 +1,8 @@
########################################################################
# Toolchain file for building native on a ARM Cortex A8 w/ NEON
# Usage: cmake -DCMAKE_TOOLCHAIN_FILE=<this file> <source directory>
########################################################################
set(CMAKE_CXX_COMPILER g++)
set(CMAKE_C_COMPILER gcc)
set(CMAKE_CXX_FLAGS "-march=armv7-a -mtune=cortex-a9 -mfpu=neon -mfloat-abi=hard" CACHE STRING "" FORCE)
set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "" FORCE) #same flags for C sources

View File

@ -0,0 +1,72 @@
#ifndef _MSC_VER // [
#error "Use this header only with Microsoft Visual C++ compilers!"
#endif // _MSC_VER ]
#ifndef _MSC_SYS_TIME_H_
#define _MSC_SYS_TIME_H_
#ifndef NOMINMAX
#define NOMINMAX
#endif
//http://social.msdn.microsoft.com/Forums/en/vcgeneral/thread/430449b3-f6dd-4e18-84de-eebd26a8d668
#include < time.h >
#include <windows.h> //I've omitted this line.
#if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64
#else
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
#if _MSC_VER < 1900
struct timespec
{
time_t tv_sec; /* Seconds since 00:00:00 GMT, */
/* 1 January 1970 */
long tv_nsec; /* Additional nanoseconds since */
/* tv_sec */
};
#endif
struct timezone
{
int tz_minuteswest; /* minutes W of Greenwich */
int tz_dsttime; /* type of dst correction */
};
static inline int gettimeofday(struct timeval *tv, struct timezone *tz)
{
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
if (NULL != tz)
{
if (!tzflag)
{
_tzset();
tzflag++;
}
tz->tz_minuteswest = _timezone / 60;
tz->tz_dsttime = _daylight;
}
return 0;
}

View File

@ -13,12 +13,24 @@
</arch>
<arch name="neon">
<flag compiler="gnu">-mfpu=neon</flag>
<flag compiler="gnu">-funsafe-math-optimizations</flag>
<alignment>16</alignment>
<check name="has_neon"></check>
</arch>
<arch name="neonv7">
<flag compiler="gnu">-mfpu=neon</flag>
<flag compiler="gnu">-funsafe-math-optimizations</flag>
<alignment>16</alignment>
<check name="has_neonv7"></check>
</arch>
<arch name="neonv8">
<flag compiler="gnu">-funsafe-math-optimizations</flag>
<alignment>16</alignment>
<check name="has_neonv8"></check>
</arch>
<arch name="32">
<flag compiler="gnu">-m32</flag>
</arch>

View File

@ -5,7 +5,15 @@
</machine>
<machine name="neon">
<archs>generic neon softfp|hardfp orc|</archs>
<archs>generic neon orc|</archs>
</machine>
<machine name="neonv7">
<archs>generic neon neonv7 softfp|hardfp orc|</archs>
</machine>
<machine name="neonv8">
<archs>generic neon neonv8</archs>
</machine>
<!-- trailing | bar means generate without either for MSVC -->

View File

@ -249,7 +249,7 @@ static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_a_avx(int16_t* result,
#endif
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16i_resamplerxnpuppet_16i_neon(int16_t* result, const int16_t* local_code, unsigned int num_points)
{
int code_length_chips = 2046;

View File

@ -526,7 +526,7 @@ static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_u_avx(int16_t** result,
#endif
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16i_xn_resampler_16i_xn_neon(int16_t** result, const int16_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
{

View File

@ -1049,7 +1049,7 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
}
#endif /* LV_HAVE_AVX2 */
//#ifdef LV_HAVE_NEON
//#ifdef LV_HAVE_NEONV7
//#include <arm_neon.h>
//static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const int16_t** in_a, int num_a_vectors, unsigned int num_points)
@ -1228,10 +1228,10 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
//}
//}
//#endif [> LV_HAVE_NEON <]
//#endif [> LV_HAVE_NEONV7 <]
//#ifdef LV_HAVE_NEON
//#ifdef LV_HAVE_NEONV7
//#include <arm_neon.h>
//#include <volk_gnsssdr/volk_gnsssdr_neon_intrinsics.h>
@ -1419,10 +1419,10 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
//}
//}
//#endif [> LV_HAVE_NEON <]
//#endif [> LV_HAVE_NEONV7 <]
//#ifdef LV_HAVE_NEON
//#ifdef LV_HAVE_NEONV7
//#include <arm_neon.h>
//#include <volk_gnsssdr/volk_gnsssdr_neon_intrinsics.h>
@ -1601,6 +1601,6 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_u_avx2(lv_16sc
//}
//}
//#endif [> LV_HAVE_NEON <]
//#endif [> LV_HAVE_NEONV7 <]
#endif /*INCLUDED_volk_gnsssdr_16ic_16i_dot_prod_16ic_xn_H*/

View File

@ -317,7 +317,7 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_u_avx2(lv_
//#endif // AVX2
//#ifdef LV_HAVE_NEON
//#ifdef LV_HAVE_NEONV7
//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
//{
//// phases must be normalized. Phase rotator expects a complex exponential input!
@ -348,7 +348,7 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_u_avx2(lv_
//#endif // NEON
//#ifdef LV_HAVE_NEON
//#ifdef LV_HAVE_NEONV7
//static inline void volk_gnsssdr_16ic_16i_rotator_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
//{
//// phases must be normalized. Phase rotator expects a complex exponential input!

View File

@ -202,7 +202,7 @@ static inline void volk_gnsssdr_16ic_conjugate_16ic_u_avx2(lv_16sc_t* cVector, c
//
//
//#ifdef LV_HAVE_NEON
//#ifdef LV_HAVE_NEONV7
//#include <arm_neon.h>
//
//static inline void volk_gnsssdr_16ic_conjugate_16ic_neon(lv_16sc_t* cVector, const lv_16sc_t* aVector, unsigned int num_points)
@ -228,6 +228,6 @@ static inline void volk_gnsssdr_16ic_conjugate_16ic_u_avx2(lv_16sc_t* cVector, c
// *c++ = lv_conj(*a++);
// }
//}
//#endif /* LV_HAVE_NEON */
//#endif /* LV_HAVE_NEONV7 */
#endif /* INCLUDED_volk_gnsssdr_16ic_conjugate_16ic_H */

View File

@ -180,7 +180,7 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_axv(lv_32fc_t* outputVector,
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
@ -210,6 +210,6 @@ static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector,
_in++;
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_H */

View File

@ -256,7 +256,7 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_u_sse2(lv_16sc_t* resul
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_resampler_fast_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples) //, int* scratch_buffer, float* scratch_buffer_float)
@ -342,6 +342,6 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_neon(lv_16sc_t* result,
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /*INCLUDED_volk_gnsssdr_16ic_resampler_fast_16ic_H*/

View File

@ -72,7 +72,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastpuppet_16ic_u_sse2(lv_16sc_t*
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16ic_resamplerfastpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
{
@ -82,6 +82,6 @@ static inline void volk_gnsssdr_16ic_resamplerfastpuppet_16ic_neon(lv_16sc_t* re
volk_gnsssdr_16ic_resampler_fast_16ic_neon(result, local_code, rem_code_phase_chips, code_phase_step_chips, code_length_chips, num_points);
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif // INCLUDED_volk_gnsssdr_16ic_resamplerfastpuppet_16ic_H

View File

@ -128,7 +128,7 @@ static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_u_sse2(lv_16sc_t
#endif
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16ic_resamplerfastxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
{
float code_phase_step_chips = 0.1;

View File

@ -250,7 +250,7 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_avx(lv_16sc_t* res
#endif
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, unsigned int num_points)
{
int code_length_chips = 2046;

View File

@ -137,7 +137,7 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3_reload(lv_16sc_t*
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
@ -150,10 +150,10 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon(lv_16sc_t* outVecto
volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(outVector, inVector, phase_inc[0], phase, num_points);
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
@ -166,7 +166,7 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon_reload(lv_16sc_t* o
volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(outVector, inVector, phase_inc[0], phase, num_points);
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /* INCLUDED_volk_gnsssdr_16ic_rotatorpuppet_16ic_H */

View File

@ -645,7 +645,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
@ -778,10 +778,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
@ -972,6 +972,6 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /* INCLUDED_volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_H */

View File

@ -393,7 +393,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_axv2(lv_16sc_t* out, con
#endif /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
@ -462,10 +462,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_vma(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
@ -515,10 +515,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_vma(lv_16sc_t* out, c
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_optvma(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
@ -569,6 +569,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_optvma(lv_16sc_t* out
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_H*/

View File

@ -489,7 +489,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_avx2(lv_16sc_t* resul
#endif /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
@ -575,10 +575,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result,
}
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
@ -653,10 +653,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res
}
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
@ -736,6 +736,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t*
}
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /*INCLUDED_volk_gnsssdr_16ic_xn_dot_prod_16ic_xn_H*/

View File

@ -188,7 +188,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_avx2(lv_16sc_t* r
#endif /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
@ -213,7 +213,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon(lv_16sc_t* res
#endif // NEON
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
@ -237,7 +237,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t*
#endif // NEON
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_optvma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{

View File

@ -292,7 +292,7 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_avx2(lv_16sc_t* out, con
#endif /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
@ -338,6 +338,6 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const
*out++ = (*a_ptr++) * (*b_ptr++);
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7*/
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_multiply_16ic_H*/

View File

@ -1300,7 +1300,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_avx2_reload(l
#endif /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
@ -1486,10 +1486,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
#include <volk_gnsssdr/volk_gnsssdr_neon_intrinsics.h>
@ -1683,10 +1683,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
#include <volk_gnsssdr/volk_gnsssdr_neon_intrinsics.h>
@ -1872,6 +1872,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_H*/

View File

@ -317,7 +317,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_avx2_relo
#endif // AVX2
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
@ -348,7 +348,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon(lv_16s
#endif // NEON
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!

View File

@ -525,7 +525,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_avx(lv_16sc_t** resu
#endif
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)
{

View File

@ -285,7 +285,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_u_sse2(lv_16sc_t*
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_neon(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
@ -384,6 +384,6 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_neon(lv_16sc_t**
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /*INCLUDED_volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_H*/

View File

@ -481,7 +481,7 @@ static inline void volk_gnsssdr_32f_index_max_32u_generic(uint32_t* target, cons
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_32f_index_max_32u_neon(uint32_t* target, const float* src0, uint32_t num_points)
@ -546,6 +546,6 @@ static inline void volk_gnsssdr_32f_index_max_32u_neon(uint32_t* target, const f
}
}
#endif /*LV_HAVE_NEON*/
#endif /*LV_HAVE_NEONV7*/
#endif /*INCLUDED_volk_gnsssdr_32f_index_max_32u_H*/

View File

@ -246,7 +246,7 @@ static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_u_avx(float* result, c
}
#endif
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_32f_resamplerxnpuppet_32f_neon(float* result, const float* local_code, unsigned int num_points)
{
int code_length_chips = 2046;

View File

@ -642,7 +642,7 @@ static inline void volk_gnsssdr_32f_sincos_32fc_generic_fxpt(lv_32fc_t* out, con
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
/* Adapted from http://gruntthepeon.free.fr/ssemath/neon_mathfun.h, original code from Julien Pommier */
/* Based on algorithms from the cephes library http://www.netlib.org/cephes/ */
@ -747,7 +747,7 @@ static inline void volk_gnsssdr_32f_sincos_32fc_neon(lv_32fc_t* out, const float
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /* INCLUDED_volk_gnsssdr_32f_sincos_32fc_H */

View File

@ -527,7 +527,7 @@ static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_u_avx(float** result, co
#endif
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_32f_xn_resampler_32f_xn_neon(float** result, const float* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)

View File

@ -386,7 +386,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_avx2(lv_16sc_t* outputVector
#endif /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
@ -450,7 +450,7 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#ifdef LV_HAVE_GENERIC

View File

@ -373,7 +373,7 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
@ -464,6 +464,6 @@ static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, co
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_H */

View File

@ -306,7 +306,7 @@ static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_u_avx2(lv_32fc_t* re
#endif
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_32fc_resamplerxnpuppet_32fc_neon(lv_32fc_t* result, const lv_32fc_t* local_code, unsigned int num_points)
{
int code_length_chips = 2046;

View File

@ -647,7 +647,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_a_avx(lv_32fc_t
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t* result, const lv_32fc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_32fc_t** in_a, int num_a_vectors, unsigned int num_points)
@ -800,6 +800,6 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_neon(lv_32fc_t*
(*phase) = _phase;
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_H */

View File

@ -220,7 +220,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc_a_avx(lv_32
#endif // AVX
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_32fc_x2_rotator_dotprodxnpuppet_32fc_neon(lv_32fc_t* result, const lv_32fc_t* local_code, const lv_32fc_t* in, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!

View File

@ -682,7 +682,7 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_a_avx2(lv_32fc_t** res
#endif
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_neon(lv_32fc_t** result, const lv_32fc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, float* shifts_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_points)

View File

@ -348,7 +348,7 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const
#endif /* LV_HAVE_ORC */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_8ic_conjugate_8ic_neon(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
@ -374,6 +374,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_neon(lv_8sc_t* cVector, const
*c++ = lv_conj(*a++);
}
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H */

View File

@ -436,7 +436,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons
#endif /* LV_HAVE_ORC */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const lv_8sc_t* in_a, const lv_8sc_t* in_b, unsigned int num_points)
@ -495,6 +495,6 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const
*result += dotProduct;
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H*/

View File

@ -833,7 +833,7 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_u_avx2(lv_32fc_t *out, const fl
#endif /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
#include <arm_neon.h>
/* Adapted from http://gruntthepeon.free.fr/ssemath/neon_mathfun.h, original code from Julien Pommier */
/* Based on algorithms from the cephes library http://www.netlib.org/cephes/ */
@ -948,6 +948,6 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_neon(lv_32fc_t *out, const floa
(*phase) = _phase;
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /* INCLUDED_volk_gnsssdr_s32f_sincos_32fc_H */

View File

@ -103,13 +103,13 @@ static inline void volk_gnsssdr_s32f_sincospuppet_32fc_u_avx2(lv_32fc_t* out, co
#endif /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_NEON
#ifdef LV_HAVE_NEONV7
static inline void volk_gnsssdr_s32f_sincospuppet_32fc_neon(lv_32fc_t* out, const float phase_inc, unsigned int num_points)
{
float phase[1];
phase[0] = 3;
volk_gnsssdr_s32f_sincos_32fc_neon(out, phase_inc, phase, num_points);
}
#endif /* LV_HAVE_NEON */
#endif /* LV_HAVE_NEONV7 */
#endif /* INCLUDED_volk_gnsssdr_s32f_sincospuppet_32fc_H */

View File

@ -256,6 +256,36 @@ if(NOT CPU_IS_x86)
OVERRULE_ARCH(avx "Architecture is not x86 or x86_64")
endif(NOT CPU_IS_x86)
########################################################################
# Select neon based on ARM ISA version
########################################################################
# First, compile a test program to see if compiler supports neon.
include(CheckCSourceCompiles)
check_c_source_compiles("#include <arm_neon.h>\nint main(){ uint8_t *dest; uint8x8_t res; vst1_u8(dest, res); }"
neon_compile_result)
if(neon_compile_result)
check_c_source_compiles("int main(){asm volatile(\"vrev32.8 q0, q0\");}"
have_neonv7_result )
check_c_source_compiles("int main(){asm volatile(\"sub v1.4s,v1.4s,v1.4s\");}"
have_neonv8_result )
if (have_neonv7_result)
OVERRULE_ARCH(neonv8 "CPU is armv7")
endif()
if (have_neonv8_result)
OVERRULE_ARCH(neonv7 "CPU is armv8")
endif()
else(neon_compile_result)
OVERRULE_ARCH(neon "Compiler doesn't support NEON")
OVERRULE_ARCH(neonv7 "Compiler doesn't support NEON")
OVERRULE_ARCH(neonv8 "Compiler doesn't support NEON")
endif(neon_compile_result)
########################################################################
# implement overruling in the ORC case,
# since ORC always passes flag detection
@ -423,7 +453,7 @@ include_directories(
# on by default, but let users turn it off
########################################################################
if(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
set(ASM_ARCHS_AVAILABLE "neon")
set(ASM_ARCHS_AVAILABLE "neonv7" "neonv8")
set(FULL_C_FLAGS "${CMAKE_C_FLAGS}" "${CMAKE_CXX_COMPILER_ARG1}")
@ -432,7 +462,7 @@ if(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
# set up the assembler flags and include the source files
foreach(ARCH ${ASM_ARCHS_AVAILABLE})
string(REGEX MATCH "${ARCH}" ASM_ARCH "${available_archs}")
if( ASM_ARCH STREQUAL "neon" )
if( ASM_ARCH STREQUAL "neonv7" )
message(STATUS "---- Adding ASM files") # we always use ATT syntax
message(STATUS "-- Detected neon architecture; enabling ASM")
# setup architecture specific assembler flags

View File

@ -128,12 +128,12 @@ static inline unsigned int get_avx2_enabled(void)
#include <asm/hwcap.h>
#include <linux/auxvec.h>
#include <stdio.h>
#define VOLK_CPU_ARM
#define VOLK_CPU_ARMV7
#endif
static int has_neon(void)
static int has_neonv7(void)
{
#if defined(VOLK_CPU_ARM)
#if defined(VOLK_CPU_ARMV7)
FILE *auxvec_f;
unsigned long auxvec[2];
unsigned int found_neon = 0;
@ -156,6 +156,53 @@ static int has_neon(void)
return 0;
#endif
}
//\todo: Fix this to really check for neon on aarch64
//neon detection is linux specific
#if defined(__aarch64__) && defined(__linux__)
#include <asm/hwcap.h>
#include <linux/auxvec.h>
#include <stdio.h>
#define VOLK_CPU_ARMV8
#endif
static int has_neonv8(void)
{
#if defined(VOLK_CPU_ARMV8)
FILE *auxvec_f;
unsigned long auxvec[2];
unsigned int found_neon = 0;
auxvec_f = fopen("/proc/self/auxv", "rb");
if (!auxvec_f) return 0;
size_t r = 1;
//so auxv is basically 32b of ID and 32b of value
//so it goes like this
while (!found_neon && r)
{
r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
if ((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_ASIMD))
found_neon = 1;
}
fclose(auxvec_f);
return found_neon;
#else
return 0;
#endif
}
static int has_neon(void)
{
#if defined(VOLK_CPU_ARMV8) || defined(VOLK_CPU_ARMV7)
if (has_neonv7() || has_neonv8())
return 1;
else
return 0;
#else
return 0;
#endif
}
// clang-format off
%for arch in archs: