gnss-sdr/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/CommonMacros/CommonMacros.h

175 lines
6.9 KiB
C

/*!
* \file CommonMacros.h
* \brief Common macros used inside the volk protokernels.
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_gnsssdr_CommonMacros_u_H
#define INCLUDED_gnsssdr_CommonMacros_u_H
#ifdef LV_HAVE_SSE4_1
/*!
\brief Macros for U_SSE4_1
*/
#ifndef CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1
#define CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(input1, input2, real, imag)\
imag = _mm_srli_si128 (input1, 2);\
imag = _mm_blend_epi16 (input2, imag, 85);\
real = _mm_slli_si128 (input2, 2);\
real = _mm_blend_epi16 (real, input1, 85);
#endif /* CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1 */
#ifndef CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1
#define CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_ps)\
input_i_1 = _mm_cvtepi16_epi32(input);\
input = _mm_srli_si128 (input, 8);\
input_i_2 = _mm_cvtepi16_epi32(input);\
output_i32 = _mm_add_epi32 (input_i_1, input_i_2);\
output_ps = _mm_cvtepi32_ps(output_i32);
#endif /* CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */
#ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1
#define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)\
input_i_1 = _mm_cvtepi8_epi32(input);\
input = _mm_srli_si128 (input, 4);\
input_i_2 = _mm_cvtepi8_epi32(input);\
input = _mm_srli_si128 (input, 4);\
output_i32_1 = _mm_add_epi32 (input_i_1, input_i_2);\
input_i_1 = _mm_cvtepi8_epi32(input);\
input = _mm_srli_si128 (input, 4);\
input_i_2 = _mm_cvtepi8_epi32(input);\
input = _mm_srli_si128 (input, 4);\
output_i32_2 = _mm_add_epi32 (input_i_1, input_i_2);\
output_i32 = _mm_add_epi32 (output_i32_1, output_i32_2);\
output_ps = _mm_cvtepi32_ps(output_i32);
#endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
/*!
\brief Macros for U_SSE2
*/
#ifdef LV_HAVE_SSSE3
/*!
\brief Macros for U_SSSE3
*/
#ifndef CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3
#define CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, x, check_sign_sequence, rearrange_sequence, y_aux, x_abs, real_output, imag_output)\
y_aux = _mm_sign_epi8 (y, x);\
y_aux = _mm_sign_epi8 (y_aux, check_sign_sequence);\
real_output = _mm_maddubs_epi16 (x_abs, y_aux);\
\
y_aux = _mm_shuffle_epi8 (y, rearrange_sequence);\
y_aux = _mm_sign_epi8 (y_aux, x);\
imag_output = _mm_maddubs_epi16 (x_abs, y_aux);
#endif /* CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3 */
#endif /* LV_HAVE_SSSE3 */
#ifndef CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2
#define CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\
realx_mult_realy = _mm_mullo_epi16 (realx, realy);\
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);\
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);\
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);\
real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);\
imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
#endif /* CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2 */
#ifndef CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2
#define CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(input, mult1, real, imag)\
imag = _mm_srli_si128 (input, 1);\
imag = _mm_and_si128 (imag, mult1);\
real = _mm_and_si128 (input, mult1);
#endif /* CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2 */
#ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2
#define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2(input, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\
input_i_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), input);\
input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\
input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\
input_i_1 = _mm_srai_epi32(input_i_1, 24);\
input_i_2 = _mm_srai_epi32(input_i_2, 24);\
output_i32 = _mm_add_epi32(input_i_1, input_i_2);\
output_ps_1 = _mm_cvtepi32_ps(output_i32);\
\
input_i_1 = _mm_unpackhi_epi8(_mm_setzero_si128(), input);\
input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\
input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\
input_i_1 = _mm_srai_epi32(input_i_1, 24);\
input_i_2 = _mm_srai_epi32(input_i_2, 24);\
output_i32 = _mm_add_epi32(input_i_1, input_i_2);\
output_ps_2 = _mm_cvtepi32_ps(output_i32);
#endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2 */
#ifndef CM_8IC_CONTROLMINUS128_8IC_U_SSE2
#define CM_8IC_CONTROLMINUS128_8IC_U_SSE2(y, minus128, minus128control)\
minus128control = _mm_cmpeq_epi8 (y, minus128);\
y = _mm_sub_epi8 (y, minus128control);
#endif /* CM_8IC_CONTROLMINUS128_8IC_U_SSE2 */
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Macros for U_GENERIC
*/
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_CommonMacros_u_H */
#ifndef INCLUDED_gnsssdr_CommonMacros_a_H
#define INCLUDED_gnsssdr_CommonMacros_a_H
#ifdef LV_HAVE_SSE4_1
/*!
\brief Macros for A_SSE4_1
*/
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_SSE2
/*!
\brief Macros for U_SSE2
*/
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Macros for A_GENERIC
*/
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_gnsssdr_CommonMacros_a_H */