/*! * \file CommonMacros.h * \brief Common macros used inside the volk protokernels. * \authors * * ------------------------------------------------------------------------- * * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) * * GNSS-SDR is a software defined Global Navigation * Satellite Systems receiver * * This file is part of GNSS-SDR. * * GNSS-SDR is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * at your option) any later version. * * GNSS-SDR is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNSS-SDR. If not, see . * * ------------------------------------------------------------------------- */ #ifndef INCLUDED_gnsssdr_CommonMacros_u_H #define INCLUDED_gnsssdr_CommonMacros_u_H #ifdef LV_HAVE_SSE4_1 /*! \brief Macros for U_SSE4_1 */ #ifndef CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1 #define CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1(input1, input2, real, imag)\ imag = _mm_srli_si128 (input1, 2);\ imag = _mm_blend_epi16 (input2, imag, 85);\ real = _mm_slli_si128 (input2, 2);\ real = _mm_blend_epi16 (real, input1, 85); #endif /* CM_16IC_X2_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE4_1 */ #ifndef CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1 #define CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_ps)\ input_i_1 = _mm_cvtepi16_epi32(input);\ input = _mm_srli_si128 (input, 8);\ input_i_2 = _mm_cvtepi16_epi32(input);\ output_i32 = _mm_add_epi32 (input_i_1, input_i_2);\ output_ps = _mm_cvtepi32_ps(output_i32); #endif /* CM_16IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */ #ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1 #define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1(input, input_i_1, input_i_2, output_i32, output_i32_1, output_i32_2, output_ps)\ input_i_1 = _mm_cvtepi8_epi32(input);\ input = _mm_srli_si128 (input, 4);\ input_i_2 = _mm_cvtepi8_epi32(input);\ input = _mm_srli_si128 (input, 4);\ output_i32_1 = _mm_add_epi32 (input_i_1, input_i_2);\ input_i_1 = _mm_cvtepi8_epi32(input);\ input = _mm_srli_si128 (input, 4);\ input_i_2 = _mm_cvtepi8_epi32(input);\ input = _mm_srli_si128 (input, 4);\ output_i32_2 = _mm_add_epi32 (input_i_1, input_i_2);\ output_i32 = _mm_add_epi32 (output_i32_1, output_i32_2);\ output_ps = _mm_cvtepi32_ps(output_i32); #endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE4_1 */ #endif /* LV_HAVE_SSE4_1 */ #ifdef LV_HAVE_SSE2 /*! \brief Macros for U_SSE2 */ #ifdef LV_HAVE_SSSE3 /*! \brief Macros for U_SSSE3 */ #ifndef CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3 #define CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3(y, x, check_sign_sequence, rearrange_sequence, y_aux, x_abs, real_output, imag_output)\ y_aux = _mm_sign_epi8 (y, x);\ y_aux = _mm_sign_epi8 (y_aux, check_sign_sequence);\ real_output = _mm_maddubs_epi16 (x_abs, y_aux);\ \ y_aux = _mm_shuffle_epi8 (y, rearrange_sequence);\ y_aux = _mm_sign_epi8 (y_aux, x);\ imag_output = _mm_maddubs_epi16 (x_abs, y_aux); #endif /* CM_8IC_X2_SCALAR_PRODUCT_16IC_X2_U_SSSE3 */ #endif /* LV_HAVE_SSSE3 */ #ifndef CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2 #define CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)\ realx_mult_realy = _mm_mullo_epi16 (realx, realy);\ imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);\ realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);\ imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);\ real_output = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);\ imag_output = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy); #endif /* CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2 */ #ifndef CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2 #define CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2(input, mult1, real, imag)\ imag = _mm_srli_si128 (input, 1);\ imag = _mm_and_si128 (imag, mult1);\ real = _mm_and_si128 (input, mult1); #endif /* CM_8IC_REARRANGE_VECTOR_INTO_REAL_IMAG_16IC_X2_U_SSE2 */ #ifndef CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2 #define CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2(input, input_i_1, input_i_2, output_i32, output_ps_1, output_ps_2)\ input_i_1 = _mm_unpacklo_epi8(_mm_setzero_si128(), input);\ input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\ input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\ input_i_1 = _mm_srai_epi32(input_i_1, 24);\ input_i_2 = _mm_srai_epi32(input_i_2, 24);\ output_i32 = _mm_add_epi32(input_i_1, input_i_2);\ output_ps_1 = _mm_cvtepi32_ps(output_i32);\ \ input_i_1 = _mm_unpackhi_epi8(_mm_setzero_si128(), input);\ input_i_2 = _mm_unpacklo_epi16(_mm_setzero_si128(), input_i_1);\ input_i_1 = _mm_unpackhi_epi16(_mm_setzero_si128(), input_i_1);\ input_i_1 = _mm_srai_epi32(input_i_1, 24);\ input_i_2 = _mm_srai_epi32(input_i_2, 24);\ output_i32 = _mm_add_epi32(input_i_1, input_i_2);\ output_ps_2 = _mm_cvtepi32_ps(output_i32); #endif /* CM_8IC_CONVERT_AND_ACC_32FC_U_SSE2 */ #ifndef CM_8IC_CONTROLMINUS128_8IC_U_SSE2 #define CM_8IC_CONTROLMINUS128_8IC_U_SSE2(y, minus128, minus128control)\ minus128control = _mm_cmpeq_epi8 (y, minus128);\ y = _mm_sub_epi8 (y, minus128control); #endif /* CM_8IC_CONTROLMINUS128_8IC_U_SSE2 */ #endif /* LV_HAVE_SSE2 */ #ifdef LV_HAVE_GENERIC /*! \brief Macros for U_GENERIC */ #endif /* LV_HAVE_GENERIC */ #endif /* INCLUDED_gnsssdr_CommonMacros_u_H */ #ifndef INCLUDED_gnsssdr_CommonMacros_a_H #define INCLUDED_gnsssdr_CommonMacros_a_H #ifdef LV_HAVE_SSE4_1 /*! \brief Macros for A_SSE4_1 */ #endif /* LV_HAVE_SSE4_1 */ #ifdef LV_HAVE_SSE2 /*! \brief Macros for U_SSE2 */ #endif /* LV_HAVE_SSE2 */ #ifdef LV_HAVE_GENERIC /*! \brief Macros for A_GENERIC */ #endif /* LV_HAVE_GENERIC */ #endif /* INCLUDED_gnsssdr_CommonMacros_a_H */