diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_32u.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_32u.h new file mode 100644 index 000000000..8ea27b9c2 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_index_max_32u.h @@ -0,0 +1,221 @@ +/*! + * \file volk_gnsssdr_32f_index_max_32u.h + * \brief VOLK_GNSSSDR kernel: Finds and returns the index which contains the maximum value in the given vector. + * + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + + + +/*! + * \page volk_gnsssdr_32f_index_max_32u.h + * + * \b Overview + * + * Finds and returns the index which contains the maximum value in the given vector. + * + * Dispatcher Prototype + * \code + * volk_gnsssdr_32f_index_max_32u(uint32_t* target, const float* src0, uint32_t num_points) + * \endcode + * + * \b Inputs + * \li src0: The input vector of floats. + * \li num_points: The number of data points. + * + * \b Outputs + * \li target: The index of the maximum value in the input buffer. + * + */ + + +#ifndef INCLUDED_volk_gnsssdr_32f_index_max_32u_H +#define INCLUDED_volk_gnsssdr_32f_index_max_32u_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE4_1 +#include + +static inline void volk_gnsssdr_32f_index_max_32u_a_sse4_1(uint32_t* target, const float* src0, uint32_t num_points) +{ + if(num_points > 0) + { + uint32_t number = 0; + const uint32_t quarterPoints = num_points / 4; + + float* inputPtr = (float*)src0; + + __m128 indexIncrementValues = _mm_set1_ps(4); + __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4); + + float max = src0[0]; + float index = 0; + __m128 maxValues = _mm_set1_ps(max); + __m128 maxValuesIndex = _mm_setzero_ps(); + __m128 compareResults; + __m128 currentValues; + + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; + + for(;number < quarterPoints; number++) + { + + currentValues = _mm_load_ps(inputPtr); inputPtr += 4; + currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues); + + compareResults = _mm_cmpgt_ps(maxValues, currentValues); + + maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults); + maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults); + } + + // Calculate the largest value from the remaining 4 points + _mm_store_ps(maxValuesBuffer, maxValues); + _mm_store_ps(maxIndexesBuffer, maxValuesIndex); + + for(number = 0; number < 4; number++) + { + if(maxValuesBuffer[number] > max) + { + index = maxIndexesBuffer[number]; + max = maxValuesBuffer[number]; + } + } + + number = quarterPoints * 4; + for(;number < num_points; number++) + { + if(src0[number] > max) + { + index = number; + max = src0[number]; + } + } + target[0] = (uint32_t)index; + } +} + +#endif /*LV_HAVE_SSE4_1*/ + + +#ifdef LV_HAVE_SSE + +#include + +static inline void volk_gnsssdr_32f_index_max_32u_a_sse(uint32_t* target, const float* src0, uint32_t num_points) +{ + if(num_points > 0) + { + uint32_t number = 0; + const uint32_t quarterPoints = num_points / 4; + + float* inputPtr = (float*)src0; + + __m128 indexIncrementValues = _mm_set1_ps(4); + __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4); + + float max = src0[0]; + float index = 0; + __m128 maxValues = _mm_set1_ps(max); + __m128 maxValuesIndex = _mm_setzero_ps(); + __m128 compareResults; + __m128 currentValues; + + __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4]; + __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4]; + + for(;number < quarterPoints; number++) + { + currentValues = _mm_load_ps(inputPtr); inputPtr += 4; + currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues); + + compareResults = _mm_cmpgt_ps(maxValues, currentValues); + + maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes)); + + maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues)); + } + + // Calculate the largest value from the remaining 4 points + _mm_store_ps(maxValuesBuffer, maxValues); + _mm_store_ps(maxIndexesBuffer, maxValuesIndex); + + for(number = 0; number < 4; number++) + { + if(maxValuesBuffer[number] > max) + { + index = maxIndexesBuffer[number]; + max = maxValuesBuffer[number]; + } + } + + number = quarterPoints * 4; + for(;number < num_points; number++) + { + if(src0[number] > max) + { + index = number; + max = src0[number]; + } + } + target[0] = (uint32_t)index; + } +} + +#endif /*LV_HAVE_SSE*/ + + +#ifdef LV_HAVE_GENERIC + +static inline void volk_gnsssdr_32f_index_max_32u_generic(uint32_t* target, const float* src0, uint32_t num_points) +{ + if(num_points > 0) + { + float max = src0[0]; + uint32_t index = 0; + + uint32_t i = 1; + + for(; i < num_points; ++i) + { + if(src0[i] > max) + { + index = i; + max = src0[i]; + } + } + target[0] = index; + } +} + +#endif /*LV_HAVE_GENERIC*/ + + +#endif /*INCLUDED_volk_gnsssdr_32f_index_max_32u_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h index 7f92979f1..de3284a06 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h @@ -78,6 +78,7 @@ std::vector init_test_list(volk_gnsssdr_test_params_t (VOLK_INIT_TEST(volk_gnsssdr_8u_x2_multiply_8u, test_params_more_iters)) (VOLK_INIT_TEST(volk_gnsssdr_64f_accumulator_64f, test_params)) (VOLK_INIT_TEST(volk_gnsssdr_32f_sincos_32fc, test_params_inacc)) + (VOLK_INIT_TEST(volk_gnsssdr_32f_index_max_32u, test_params)) (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_8ic, test_params)) (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_16ic, test_params_more_iters)) (VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_dot_prod_16ic, test_params))