From 9bf8b174ba35aee46bdbccfe90706bc5bda5efb8 Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Wed, 20 Jan 2016 18:16:09 +0100 Subject: [PATCH] Sout out the aligned/unaligned thing in old kernels --- .../volk_gnsssdr_32fc_convert_16ic.h | 35 +- .../volk_gnsssdr_32fc_convert_8ic.h | 35 +- .../volk_gnsssdr_64f_accumulator_64f.h | 28 +- .../volk_gnsssdr_8i_accumulator_s8i.h | 37 +- .../volk_gnsssdr_8i_index_max_16u.h | 774 +++++++++--------- .../volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h | 39 +- .../volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h | 36 +- .../volk_gnsssdr_8ic_conjugate_8ic.h | 35 +- .../volk_gnsssdr_8ic_magnitude_squared_8i.h | 36 +- .../volk_gnsssdr_8ic_s8ic_multiply_8ic.h | 57 +- .../volk_gnsssdr_8ic_x2_dot_prod_8ic.h | 70 +- .../volk_gnsssdr_8ic_x2_multiply_8ic.h | 37 +- .../volk_gnsssdr_8u_x2_multiply_8u.h | 35 +- 13 files changed, 420 insertions(+), 834 deletions(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h index 40babae61..1bcf9192d 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_16ic.h @@ -34,8 +34,8 @@ #include #include "volk_gnsssdr/volk_gnsssdr_complex.h" -#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H -#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H +#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_H +#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_H #ifdef LV_HAVE_SSE2 @@ -168,11 +168,6 @@ static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVecto } } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H */ - - -#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H -#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H #ifdef LV_HAVE_SSE2 @@ -281,28 +276,4 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, } #endif /* LV_HAVE_SSE */ -#ifdef LV_HAVE_GENERIC -/*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted - */ -static inline void volk_gnsssdr_32fc_convert_16ic_a_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) -{ - float* inputVectorPtr = (float*)inputVector; - int16_t* outputVectorPtr = (int16_t*)outputVector; - float min_val = -32768; - float max_val = 32767; - - for(unsigned int i = 0; i < num_points*2; i++) - { - if(inputVectorPtr[i] > max_val) - inputVectorPtr[i] = max_val; - else if(inputVectorPtr[i] < min_val) - inputVectorPtr[i] = min_val; - outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]); - } -} -#endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H */ +#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h index 94cc2ecc7..da66f8d87 100755 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32fc_convert_8ic.h @@ -36,8 +36,8 @@ #include "volk_gnsssdr/volk_gnsssdr_complex.h" -#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H -#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H +#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_H +#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_H #ifdef LV_HAVE_SSE2 #include @@ -126,11 +126,6 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, } } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H */ - - -#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H -#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H #ifdef LV_HAVE_SSE2 @@ -195,28 +190,4 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, } #endif /* LV_HAVE_SSE2 */ -#ifdef LV_HAVE_GENERIC -/*! - \brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part) - \param inputVector The floating point input data buffer - \param outputVector The 16 bit output data buffer - \param num_points The number of data values to be converted - */ -static inline void volk_gnsssdr_32fc_convert_8ic_a_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points) -{ - float* inputVectorPtr = (float*)inputVector; - int8_t* outputVectorPtr = (int8_t*)outputVector; - float min_val = -128; - float max_val = 127; - - for(unsigned int i = 0; i < num_points*2; i++) - { - if(inputVectorPtr[i] > max_val) - inputVectorPtr[i] = max_val; - else if(inputVectorPtr[i] < min_val) - inputVectorPtr[i] = min_val; - outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]); - } -} -#endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H */ +#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h index 4ccb5a769..a1efb949e 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_64f_accumulator_64f.h @@ -134,15 +134,6 @@ static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_u_H */ - - -#ifndef INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H -#define INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H - -#include -#include -#include #ifdef LV_HAVE_AVX #include @@ -222,21 +213,4 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const } #endif /* LV_HAVE_SSE3 */ -#ifdef LV_HAVE_GENERIC -/*! - \brief Accumulates the values in the input buffer - \param result The accumulated result - \param inputBuffer The buffer of data to be accumulated - \param num_points The number of values in inputBuffer to be accumulated - */ -static inline void volk_gnsssdr_64f_accumulator_64f_a_generic(double* result,const double* inputBuffer, unsigned int num_points){ - const double* aPtr = inputBuffer; - double returnValue = 0; - - for(unsigned int number = 0;number < num_points; number++){ - returnValue += (*aPtr++); - } - *result = returnValue; -} -#endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H */ +#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h index 35c0dfa48..942dc022f 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_accumulator_s8i.h @@ -32,11 +32,12 @@ * ------------------------------------------------------------------------- */ -#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H -#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H +#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H +#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H #include #include +#include #ifdef LV_HAVE_SSE3 #include @@ -99,16 +100,6 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const c } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H */ - - -#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H -#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H - -#include -#include -#include - #ifdef LV_HAVE_SSE3 #include /*! @@ -149,26 +140,6 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch } #endif /* LV_HAVE_SSE3 */ -#ifdef LV_HAVE_GENERIC -/*! - \brief Accumulates the values in the input buffer - \param result The accumulated result - \param inputBuffer The buffer of data to be accumulated - \param num_points The number of values in inputBuffer to be accumulated - */ -static inline void volk_gnsssdr_8i_accumulator_s8i_a_generic(char* result, const char* inputBuffer, unsigned int num_points) -{ - const char* aPtr = inputBuffer; - char returnValue = 0; - - for(unsigned int number = 0;number < num_points; number++) - { - returnValue += (*aPtr++); - } - *result = returnValue; -} -#endif /* LV_HAVE_GENERIC */ - #ifdef LV_HAVE_ORC /*! \brief Accumulates the values in the input buffer @@ -190,5 +161,5 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_orc(char* result, const cha } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H */ +#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h index e00be5978..4bad640dd 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_index_max_16u.h @@ -32,237 +32,8 @@ * ------------------------------------------------------------------------- */ -#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H -#define INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H - -#include -#include - -#ifdef LV_HAVE_AVX -#include -/*! - \brief Returns the index of the max value in src0 - \param target The index of the max value in src0 - \param src0 The buffer of data to be analysed - \param num_points The number of values in src0 to be analysed - */ -static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points) { - if(num_points > 0){ - const unsigned int sse_iters = num_points / 32; - - char* basePtr = (char*)src0; - char* inputPtr = (char*)src0; - char max = src0[0]; - unsigned int index = 0; - __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32]; - __m256i ones, compareResults, currentValues; - __m128i compareResultslo, compareResultshi, maxValues, lo, hi; - - ones = _mm256_set1_epi8(0xFF); - maxValues = _mm_set1_epi8(max); - - for(unsigned int number = 0; number < sse_iters; number++) - { - currentValues = _mm256_lddqu_si256((__m256i*)inputPtr); - - lo = _mm256_castsi256_si128(currentValues); - hi = _mm256_extractf128_si256(currentValues,1); - - compareResultslo = _mm_cmpgt_epi8(maxValues, lo); - compareResultshi = _mm_cmpgt_epi8(maxValues, hi); - - //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h - compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1); - - if (!_mm256_testc_si256(compareResults, ones)) - { - _mm256_storeu_si256((__m256i*)¤tValuesBuffer, currentValues); - - for(unsigned int i = 0; i < 32; i++) - { - if(currentValuesBuffer[i] > max) - { - index = inputPtr - basePtr + i; - max = currentValuesBuffer[i]; - } - } - maxValues = _mm_set1_epi8(max); - } - - inputPtr += 32; - } - - for(unsigned int i = 0; i<(num_points % 32); ++i) - { - if(src0[i] > max) - { - index = i; - max = src0[i]; - } - } - target[0] = index; - } -} - -#endif /*LV_HAVE_AVX*/ - -#ifdef LV_HAVE_SSE4_1 -#include -/*! - \brief Returns the index of the max value in src0 - \param target The index of the max value in src0 - \param src0 The buffer of data to be analysed - \param num_points The number of values in src0 to be analysed - */ -static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) { - if(num_points > 0){ - const unsigned int sse_iters = num_points / 16; - - char* basePtr = (char*)src0; - char* inputPtr = (char*)src0; - char max = src0[0]; - unsigned int index = 0; - __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; - __m128i maxValues, compareResults, currentValues; - - maxValues = _mm_set1_epi8(max); - - for(unsigned int number = 0; number < sse_iters; number++) - { - currentValues = _mm_lddqu_si128((__m128i*)inputPtr); - - compareResults = _mm_cmpgt_epi8(maxValues, currentValues); - - if (!_mm_test_all_ones(compareResults)) - { - _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues); - - for(unsigned int i = 0; i < 16; i++) - { - if(currentValuesBuffer[i] > max) - { - index = inputPtr - basePtr + i; - max = currentValuesBuffer[i]; - } - } - maxValues = _mm_set1_epi8(max); - } - - inputPtr += 16; - } - - for(unsigned int i = 0; i<(num_points % 16); ++i) - { - if(src0[i] > max) - { - index = i; - max = src0[i]; - } - } - target[0] = index; - } -} - -#endif /*LV_HAVE_SSE4_1*/ - -#ifdef LV_HAVE_SSE2 -#include -/*! - \brief Returns the index of the max value in src0 - \param target The index of the max value in src0 - \param src0 The buffer of data to be analysed - \param num_points The number of values in src0 to be analysed - */ -static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points) { - if(num_points > 0){ - const unsigned int sse_iters = num_points / 16; - - char* basePtr = (char*)src0; - char* inputPtr = (char*)src0; - char max = src0[0]; - unsigned int index = 0; - unsigned short mask; - __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; - __m128i maxValues, compareResults, currentValues; - - maxValues = _mm_set1_epi8(max); - - for(unsigned int number = 0; number < sse_iters; number++) - { - currentValues = _mm_loadu_si128((__m128i*)inputPtr); - compareResults = _mm_cmpgt_epi8(maxValues, currentValues); - mask = _mm_movemask_epi8(compareResults); - - if (mask != 0xFFFF) - { - _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues); - mask = ~mask; - unsigned int i = 0; - while (mask > 0) - { - if ((mask & 1) == 1) - { - if(currentValuesBuffer[i] > max) - { - index = inputPtr - basePtr + i; - max = currentValuesBuffer[i]; - } - } - i++; - mask >>= 1; - } - maxValues = _mm_set1_epi8(max); - } - inputPtr += 16; - } - - for(unsigned int i = 0; i<(num_points % 16); ++i) - { - if(src0[i] > max) - { - index = i; - max = src0[i]; - } - } - target[0] = index; - } -} - -#endif /*LV_HAVE_SSE2*/ - -#ifdef LV_HAVE_GENERIC -/*! - \brief Returns the index of the max value in src0 - \param target The index of the max value in src0 - \param src0 The buffer of data to be analysed - \param num_points The number of values in src0 to be analysed - */ -static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points) { - - if(num_points > 0) - { - char max = src0[0]; - unsigned int index = 0; - - for(unsigned int i = 1; i < num_points; ++i) - { - if(src0[i] > max) - { - index = i; - max = src0[i]; - } - } - target[0] = index; - } -} - -#endif /*LV_HAVE_GENERIC*/ - -#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H*/ - - -#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H -#define INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H +#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_H +#define INCLUDED_volk_gnsssdr_8i_index_max_16u_H #include #include @@ -276,62 +47,64 @@ static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, c \param src0 The buffer of data to be analysed \param num_points The number of values in src0 to be analysed */ -static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points) { - if(num_points > 0){ - const unsigned int sse_iters = num_points / 32; - - char* basePtr = (char*)src0; - char* inputPtr = (char*)src0; - char max = src0[0]; - unsigned int index = 0; - __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32]; - __m256i ones, compareResults, currentValues; - __m128i compareResultslo, compareResultshi, maxValues, lo, hi; - - ones = _mm256_set1_epi8(0xFF); - maxValues = _mm_set1_epi8(max); - - for(unsigned int number = 0; number < sse_iters; number++) +static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points) +{ + if(num_points > 0) { - currentValues = _mm256_load_si256((__m256i*)inputPtr); - - lo = _mm256_castsi256_si128(currentValues); - hi = _mm256_extractf128_si256(currentValues,1); - - compareResultslo = _mm_cmpgt_epi8(maxValues, lo); - compareResultshi = _mm_cmpgt_epi8(maxValues, hi); - - //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h - compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1); - - if (!_mm256_testc_si256(compareResults, ones)) - { - _mm256_store_si256((__m256i*)¤tValuesBuffer, currentValues); - - for(unsigned int i = 0; i < 32; i++) + const unsigned int sse_iters = num_points / 32; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32]; + __m256i ones, compareResults, currentValues; + __m128i compareResultslo, compareResultshi, maxValues, lo, hi; + + ones = _mm256_set1_epi8(0xFF); + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) { - if(currentValuesBuffer[i] > max) - { - index = inputPtr - basePtr + i; - max = currentValuesBuffer[i]; - } + currentValues = _mm256_lddqu_si256((__m256i*)inputPtr); + + lo = _mm256_castsi256_si128(currentValues); + hi = _mm256_extractf128_si256(currentValues,1); + + compareResultslo = _mm_cmpgt_epi8(maxValues, lo); + compareResultshi = _mm_cmpgt_epi8(maxValues, hi); + + //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h + compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1); + + if (!_mm256_testc_si256(compareResults, ones)) + { + _mm256_storeu_si256((__m256i*)¤tValuesBuffer, currentValues); + + for(unsigned int i = 0; i < 32; i++) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + maxValues = _mm_set1_epi8(max); + } + + inputPtr += 32; } - maxValues = _mm_set1_epi8(max); - } - - inputPtr += 32; + + for(unsigned int i = 0; i<(num_points % 32); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; } - - for(unsigned int i = 0; i<(num_points % 32); ++i) - { - if(src0[i] > max) - { - index = i; - max = src0[i]; - } - } - target[0] = index; - } } #endif /*LV_HAVE_AVX*/ @@ -344,53 +117,282 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con \param src0 The buffer of data to be analysed \param num_points The number of values in src0 to be analysed */ -static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) { - if(num_points > 0){ - const unsigned int sse_iters = num_points / 16; - - char* basePtr = (char*)src0; - char* inputPtr = (char*)src0; - char max = src0[0]; - unsigned int index = 0; - __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; - __m128i maxValues, compareResults, currentValues; - - maxValues = _mm_set1_epi8(max); - - for(unsigned int number = 0; number < sse_iters; number++) +static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) +{ + if(num_points > 0) { - currentValues = _mm_load_si128((__m128i*)inputPtr); - - compareResults = _mm_cmpgt_epi8(maxValues, currentValues); - - if (!_mm_test_all_ones(compareResults)) - { - _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues); - - for(unsigned int i = 0; i < 16; i++) + const unsigned int sse_iters = num_points / 16; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) { - if(currentValuesBuffer[i] > max) - { - index = inputPtr - basePtr + i; - max = currentValuesBuffer[i]; - } + currentValues = _mm_lddqu_si128((__m128i*)inputPtr); + + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + + if (!_mm_test_all_ones(compareResults)) + { + _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues); + + for(unsigned int i = 0; i < 16; i++) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + maxValues = _mm_set1_epi8(max); + } + + inputPtr += 16; } - maxValues = _mm_set1_epi8(max); - } - - inputPtr += 16; + + for(unsigned int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; } - - for(unsigned int i = 0; i<(num_points % 16); ++i) +} + +#endif /*LV_HAVE_SSE4_1*/ + +#ifdef LV_HAVE_SSE2 +#include +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points) +{ + if(num_points > 0) { - if(src0[i] > max) - { - index = i; - max = src0[i]; - } + const unsigned int sse_iters = num_points / 16; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + unsigned short mask; + __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm_loadu_si128((__m128i*)inputPtr); + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + mask = _mm_movemask_epi8(compareResults); + + if (mask != 0xFFFF) + { + _mm_storeu_si128((__m128i*)¤tValuesBuffer, currentValues); + mask = ~mask; + unsigned int i = 0; + while (mask > 0) + { + if ((mask & 1) == 1) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + i++; + mask >>= 1; + } + maxValues = _mm_set1_epi8(max); + } + inputPtr += 16; + } + + for(unsigned int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_SSE2*/ + +#ifdef LV_HAVE_GENERIC +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points) +{ + if(num_points > 0) + { + char max = src0[0]; + unsigned int index = 0; + + for(unsigned int i = 1; i < num_points; ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_GENERIC*/ + + +#ifdef LV_HAVE_AVX +#include +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points) +{ + if(num_points > 0) + { + const unsigned int sse_iters = num_points / 32; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + __VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32]; + __m256i ones, compareResults, currentValues; + __m128i compareResultslo, compareResultshi, maxValues, lo, hi; + + ones = _mm256_set1_epi8(0xFF); + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm256_load_si256((__m256i*)inputPtr); + + lo = _mm256_castsi256_si128(currentValues); + hi = _mm256_extractf128_si256(currentValues,1); + + compareResultslo = _mm_cmpgt_epi8(maxValues, lo); + compareResultshi = _mm_cmpgt_epi8(maxValues, hi); + + //compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h + compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo), (compareResultshi), 1); + + if (!_mm256_testc_si256(compareResults, ones)) + { + _mm256_store_si256((__m256i*)¤tValuesBuffer, currentValues); + + for(unsigned int i = 0; i < 32; i++) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + maxValues = _mm_set1_epi8(max); + } + + inputPtr += 32; + } + + for(unsigned int i = 0; i<(num_points % 32); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_AVX*/ + +#ifdef LV_HAVE_SSE4_1 +#include +/*! + \brief Returns the index of the max value in src0 + \param target The index of the max value in src0 + \param src0 The buffer of data to be analysed + \param num_points The number of values in src0 to be analysed + */ +static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) +{ + if(num_points > 0) + { + const unsigned int sse_iters = num_points / 16; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) + { + currentValues = _mm_load_si128((__m128i*)inputPtr); + + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + + if (!_mm_test_all_ones(compareResults)) + { + _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues); + + for(unsigned int i = 0; i < 16; i++) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + maxValues = _mm_set1_epi8(max); + } + + inputPtr += 16; + } + + for(unsigned int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; } - target[0] = index; - } } #endif /*LV_HAVE_SSE4_1*/ @@ -403,89 +405,65 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, \param src0 The buffer of data to be analysed \param num_points The number of values in src0 to be analysed */ -static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points) { - if(num_points > 0){ - const unsigned int sse_iters = num_points / 16; - - char* basePtr = (char*)src0; - char* inputPtr = (char*)src0; - char max = src0[0]; - unsigned int index = 0; - unsigned short mask; - __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; - __m128i maxValues, compareResults, currentValues; - - maxValues = _mm_set1_epi8(max); - - for(unsigned int number = 0; number < sse_iters; number++) +static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points) +{ + if(num_points > 0) { - currentValues = _mm_load_si128((__m128i*)inputPtr); - compareResults = _mm_cmpgt_epi8(maxValues, currentValues); - mask = _mm_movemask_epi8(compareResults); - - if (mask != 0xFFFF) - { - _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues); - mask = ~mask; - unsigned int i = 0; - while (mask > 0) + const unsigned int sse_iters = num_points / 16; + + char* basePtr = (char*)src0; + char* inputPtr = (char*)src0; + char max = src0[0]; + unsigned int index = 0; + unsigned short mask; + __VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16]; + __m128i maxValues, compareResults, currentValues; + + maxValues = _mm_set1_epi8(max); + + for(unsigned int number = 0; number < sse_iters; number++) { - if ((mask & 1) == 1) - { - if(currentValuesBuffer[i] > max) + currentValues = _mm_load_si128((__m128i*)inputPtr); + compareResults = _mm_cmpgt_epi8(maxValues, currentValues); + mask = _mm_movemask_epi8(compareResults); + + if (mask != 0xFFFF) { - index = inputPtr - basePtr + i; - max = currentValuesBuffer[i]; + _mm_store_si128((__m128i*)¤tValuesBuffer, currentValues); + mask = ~mask; + unsigned int i = 0; + while (mask > 0) + { + if ((mask & 1) == 1) + { + if(currentValuesBuffer[i] > max) + { + index = inputPtr - basePtr + i; + max = currentValuesBuffer[i]; + } + } + i++; + mask >>= 1; + } + maxValues = _mm_set1_epi8(max); } - } - i++; - mask >>= 1; + inputPtr += 16; } - maxValues = _mm_set1_epi8(max); - } - inputPtr += 16; + + for(unsigned int i = 0; i<(num_points % 16); ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; } - - for(unsigned int i = 0; i<(num_points % 16); ++i) - { - if(src0[i] > max) - { - index = i; - max = src0[i]; - } - } - target[0] = index; - } } #endif /*LV_HAVE_SSE2*/ -#ifdef LV_HAVE_GENERIC -/*! - \brief Returns the index of the max value in src0 - \param target The index of the max value in src0 - \param src0 The buffer of data to be analysed - \param num_points The number of values in src0 to be analysed - */ -static inline void volk_gnsssdr_8i_index_max_16u_a_generic(unsigned int* target, const char* src0, unsigned int num_points) { - - if(num_points > 0) - { - char max = src0[0]; - unsigned int index = 0; - - for(unsigned int i = 1; i < num_points; ++i) - { - if(src0[i] > max) - { - index = i; - max = src0[i]; - } - } - target[0] = index; - } -} -#endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H*/ +#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h index 7b5d9390c..156fe6a1a 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_max_s8i.h @@ -32,11 +32,13 @@ * ------------------------------------------------------------------------- */ -#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_u_H -#define INCLUDED_volk_gnsssdr_8i_max_s8i_u_H +#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_H +#define INCLUDED_volk_gnsssdr_8i_max_s8i_H + #include #include +#include #ifdef LV_HAVE_SSE4_1 #include @@ -179,15 +181,8 @@ static inline void volk_gnsssdr_8i_max_s8i_generic(char* target, const char* src #endif /*LV_HAVE_GENERIC*/ -#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_u_H*/ -#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_a_H -#define INCLUDED_volk_gnsssdr_8i_max_s8i_a_H - -#include -#include -#include #ifdef LV_HAVE_SSE4_1 #include @@ -304,29 +299,5 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char* target, const char* src0 #endif /*LV_HAVE_SSE2*/ -#ifdef LV_HAVE_GENERIC -/*! - \brief Returns the max value in src0 - \param target The max value in src0 - \param src0 The buffer of data to be analysed - \param num_points The number of values in src0 to be analysed - */ -static inline void volk_gnsssdr_8i_max_s8i_a_generic(char* target, const char* src0, unsigned int num_points) -{ - if(num_points > 0) - { - char max = src0[0]; - for(unsigned int i = 1; i < num_points; ++i) - { - if(src0[i] > max) - { - max = src0[i]; - } - } - target[0] = max; - } -} -#endif /*LV_HAVE_GENERIC*/ - -#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_a_H*/ +#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h index bf20d5946..b23dd3a05 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8i_x2_add_8i.h @@ -32,10 +32,11 @@ * ------------------------------------------------------------------------- */ -#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H -#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H +#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_H +#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_H #include +#include #ifdef LV_HAVE_SSE2 #include @@ -99,14 +100,6 @@ static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char* } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H */ - - -#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H -#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H - -#include -#include #ifdef LV_HAVE_SSE2 #include @@ -148,27 +141,6 @@ static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* a } #endif /* LV_HAVE_SSE2 */ -#ifdef LV_HAVE_GENERIC -/*! - \brief Adds the two input vectors and store their results in the third vector - \param cVector The vector where the results will be stored - \param aVector One of the vectors to be added - \param bVector One of the vectors to be added - \param num_points The number of values in aVector and bVector to be added together and stored into cVector - */ -static inline void volk_gnsssdr_8i_x2_add_8i_a_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points) -{ - char* cPtr = cVector; - const char* aPtr = aVector; - const char* bPtr= bVector; - unsigned int number = 0; - - for(; number < num_points; number++) - { - *cPtr++ = (*aPtr++) + (*bPtr++); - } -} -#endif /* LV_HAVE_GENERIC */ #ifdef LV_HAVE_ORC /*! @@ -185,4 +157,4 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_orc(char* cVector, const char* aV } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H */ +#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h index c8424e227..57faf5efe 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_conjugate_8ic.h @@ -33,10 +33,11 @@ * ------------------------------------------------------------------------- */ -#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H -#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H +#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H +#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H #include +#include #include #ifdef LV_HAVE_AVX @@ -174,15 +175,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, con } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H */ - - -#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H -#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H - -#include -#include -#include #ifdef LV_HAVE_AVX #include @@ -299,25 +291,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons } #endif /* LV_HAVE_SSE3 */ -#ifdef LV_HAVE_GENERIC -/*! - \brief Takes the conjugate of an unsigned char vector. - \param cVector The vector where the results will be stored - \param aVector Vector to be conjugated - \param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector - */ -static inline void volk_gnsssdr_8ic_conjugate_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points) -{ - lv_8sc_t* cPtr = cVector; - const lv_8sc_t* aPtr = aVector; - unsigned int number = 0; - - for(number = 0; number < num_points; number++) - { - *cPtr++ = lv_conj(*aPtr++); - } -} -#endif /* LV_HAVE_GENERIC */ #ifdef LV_HAVE_ORC /*! @@ -333,4 +306,4 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H */ +#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h index c8114e8af..63b034c72 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_magnitude_squared_8i.h @@ -34,10 +34,11 @@ * ------------------------------------------------------------------------- */ -#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H -#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H +#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_H +#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_H #include +#include #include #ifdef LV_HAVE_SSSE3 @@ -166,15 +167,6 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitude } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */ - - -#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H -#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H - -#include -#include -#include #ifdef LV_HAVE_SSSE3 #include @@ -281,26 +273,6 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV //} //#endif /* LV_HAVE_SSE */ -#ifdef LV_HAVE_GENERIC -/*! - \brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector - \param complexVector The vector containing the complex input values - \param magnitudeVector The vector containing the real output values - \param num_points The number of complex values in complexVector to be calculated and stored into cVector - */ -static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points) -{ - const char* complexVectorPtr = (char*)complexVector; - char* magnitudeVectorPtr = magnitudeVector; - - for(unsigned int number = 0; number < num_points; number++) - { - const char real = *complexVectorPtr++; - const char imag = *complexVectorPtr++; - *magnitudeVectorPtr++ = (real*real) + (imag*imag); - } -} -#endif /* LV_HAVE_GENERIC */ #ifdef LV_HAVE_ORC /*! @@ -316,4 +288,4 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_orc(char* magnitudeVe } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */ +#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h index 89e41fd14..4f38a9fcd 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_s8ic_multiply_8ic.h @@ -33,10 +33,11 @@ * ------------------------------------------------------------------------- */ -#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H -#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H +#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_H +#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_H #include +#include #include #include @@ -143,16 +144,6 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector, } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */ - - -#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H -#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H - -#include -#include -#include -#include #ifdef LV_HAVE_SSE3 #include @@ -215,46 +206,6 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector, } #endif /* LV_HAVE_SSE3 */ -#ifdef LV_HAVE_GENERIC -/*! - \brief Multiplies the input vector by a scalar and stores the results in the third vector - \param cVector The vector where the results will be stored - \param aVector The vector to be multiplied - \param scalar The complex scalar to multiply aVector - \param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector - */ -static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points) -{ - /*lv_8sc_t* cPtr = cVector; - const lv_8sc_t* aPtr = aVector; - - for (int i = 0; i= 8){ - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - *cPtr++ = (*aPtr++) * scalar; - number -= 8; - } - - // clean up any remaining - while (number-- > 0) - *cPtr++ = *aPtr++ * scalar; -} -#endif /* LV_HAVE_GENERIC */ #ifdef LV_HAVE_ORC /*! @@ -271,4 +222,4 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_orc(lv_8sc_t* cVector, c } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */ +#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h index a753a25c3..e8d21ba1e 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_dot_prod_8ic.h @@ -33,9 +33,10 @@ * ------------------------------------------------------------------------- */ -#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H -#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H +#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H +#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H +#include #include #include #include @@ -251,69 +252,6 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c #endif /*LV_HAVE_SSE4_1*/ -#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H*/ - - -#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H -#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H - -#include -#include -#include -#include - - -#ifdef LV_HAVE_GENERIC -/*! - \brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector - \param cVector The vector where the accumulated result will be stored - \param aVector One of the vectors to be multiplied and accumulated - \param bVector One of the vectors to be multiplied and accumulated - \param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector - */ -static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points) -{ - // lv_8sc_t* cPtr = result; - // const lv_8sc_t* aPtr = input; - // const lv_8sc_t* bPtr = taps; - // - // for(int number = 0; number < num_points; number++) - // { - // *cPtr += (*aPtr++) * (*bPtr++); - // } - - char * res = (char*) result; - char * in = (char*) input; - char * tp = (char*) taps; - unsigned int n_2_ccomplex_blocks = num_points/2; - unsigned int isodd = num_points & 1; - - char sum0[2] = {0,0}; - char sum1[2] = {0,0}; - unsigned int i = 0; - - for(i = 0; i < n_2_ccomplex_blocks; ++i) - { - sum0[0] += in[0] * tp[0] - in[1] * tp[1]; - sum0[1] += in[0] * tp[1] + in[1] * tp[0]; - sum1[0] += in[2] * tp[2] - in[3] * tp[3]; - sum1[1] += in[2] * tp[3] + in[3] * tp[2]; - - in += 4; - tp += 4; - } - - res[0] = sum0[0] + sum1[0]; - res[1] = sum0[1] + sum1[1]; - - // Cleanup if we had an odd number of points - for(i = 0; i < isodd; ++i) - { - *result += input[num_points - 1] * taps[num_points - 1]; - } -} - -#endif /*LV_HAVE_GENERIC*/ #ifdef LV_HAVE_SSE2 #include @@ -500,4 +438,4 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons } #endif /* LV_HAVE_ORC */ -#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H*/ +#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h index 4e2971d5e..a5cb3a172 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8ic_x2_multiply_8ic.h @@ -33,10 +33,11 @@ * ------------------------------------------------------------------------- */ -#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H -#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H +#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H +#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H #include +#include #include #ifdef LV_HAVE_SSE2 @@ -180,15 +181,6 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H */ - - -#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H -#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H - -#include -#include -#include #ifdef LV_HAVE_SSE2 #include @@ -310,27 +302,6 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, } #endif /* LV_HAVE_SSE4_1 */ -#ifdef LV_HAVE_GENERIC -/*! - \brief Multiplies the two input complex vectors and stores their results in the third vector - \param cVector The vector where the results will be stored - \param aVector One of the vectors to be multiplied - \param bVector One of the vectors to be multiplied - \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector - */ -static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) -{ - lv_8sc_t* cPtr = cVector; - const lv_8sc_t* aPtr = aVector; - const lv_8sc_t* bPtr = bVector; - - for(unsigned int number = 0; number < num_points; number++) - { - *cPtr++ = (*aPtr++) * (*bPtr++); - } - -} -#endif /* LV_HAVE_GENERIC */ #ifdef LV_HAVE_ORC /*! @@ -347,4 +318,4 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, con } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H */ +#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h index a9e7831b7..cb58d55e6 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_8u_x2_multiply_8u.h @@ -32,10 +32,11 @@ * ------------------------------------------------------------------------- */ -#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H -#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H +#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H +#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H #include +#include #ifdef LV_HAVE_SSE3 #include @@ -112,14 +113,6 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar, } #endif /* LV_HAVE_GENERIC */ -#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H */ - - -#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H -#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H - -#include -#include #ifdef LV_HAVE_SSE3 #include @@ -176,26 +169,6 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c } #endif /* LV_HAVE_SSE */ -#ifdef LV_HAVE_GENERIC -/*! - \brief Multiplies the two input unsigned char values and stores their results in the third unisgned char - \param cChar The unsigned char where the results will be stored - \param aChar One of the unsigned char to be multiplied - \param bChar One of the unsigned char to be multiplied - \param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar - */ -static inline void volk_gnsssdr_8u_x2_multiply_8u_a_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points) -{ - unsigned char* cPtr = cChar; - const unsigned char* aPtr = aChar; - const unsigned char* bPtr = bChar; - - for(unsigned int number = 0; number < num_points; number++) - { - *cPtr++ = (*aPtr++) * (*bPtr++); - } -} -#endif /* LV_HAVE_GENERIC */ #ifdef LV_HAVE_ORC /*! @@ -212,4 +185,4 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_orc(unsigned char* cVector, } #endif /* LV_HAVE_ORC */ -#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H */ +#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H */