From 07feeeee3a88e16a0c2c09a01d95babc592f8430 Mon Sep 17 00:00:00 2001 From: Javier Arribas Date: Wed, 20 Jan 2016 17:45:47 +0100 Subject: [PATCH] New volk_gnss_sdr kernel: Fast conversion between 16 bit int complex to 32 bits floating point complex --- .../volk_gnsssdr_16ic_convert_32fc.h | 113 ++++++++++++++++++ .../volk_gnsssdr/lib/kernel_tests.h | 1 + 2 files changed, 114 insertions(+) create mode 100644 src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h new file mode 100644 index 000000000..566611844 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_convert_32fc.h @@ -0,0 +1,113 @@ +/*! + * \file volk_gnsssdr_32fc_convert_16ic.h + * \brief Volk protokernel: converts 16 bit integer complex complex values to 32 bits float complex values + * \authors + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + + +#include +#include +#include + +#ifndef INCLUDED_volk_gnsssdr_16ic_convert_32fc_H +#define INCLUDED_volk_gnsssdr_16ic_convert_32fc_H + + +#ifdef LV_HAVE_GENERIC + +static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) +{ + for(unsigned int i = 0; i < num_points; i++) + { + outputVector[i]=lv_cmake((float)lv_creal(inputVector[i]),(float)lv_cimag(inputVector[i])); + } +} +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_SSE2 +#include + +static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) +{ + const unsigned int sse_iters = num_points / 2; + + const lv_16sc_t* _in = inputVector; + lv_32fc_t* _out = outputVector; + __m128 a; + for(unsigned int number = 0; number < sse_iters; number++) + { + a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg + _mm_store_ps((float*)_out, a); + _in+=2; + _out+=2; + //*_out++ = lv_cmake((float)lv_creal(*_in),(float)lv_cimag(*_in)); + //_in++; + //*_out++ = lv_cmake((float)lv_creal(*_in),(float)lv_cimag(*_in)); + //_in++; + } + for (unsigned int i = 0; i < (num_points % 2); ++i) + { + *_out++ = lv_cmake((float)lv_creal(*_in),(float)lv_cimag(*_in)); + _in++; + } + +} +#endif /* LV_HAVE_SSE2 */ + +#ifdef LV_HAVE_SSE2 +#include + +static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points) +{ + const unsigned int sse_iters = num_points / 2; + + const lv_16sc_t* _in = inputVector; + lv_32fc_t* _out = outputVector; + __m128 a; + for(unsigned int number = 0; number < sse_iters; number++) + { + a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg + _mm_storeu_ps((float*)_out, a); + _in+=2; + _out+=2; + } + for (unsigned int i = 0; i < (num_points % 2); ++i) + { + *_out++ = lv_cmake((float)lv_creal(*_in),(float)lv_cimag(*_in)); + _in++; + } + +} +#endif /* LV_HAVE_SSE2 */ + +// SSE4.1 +// a = _mm_load_si128((__m128i*)_in); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg +//use _mm_cvtepi16_epi32 !!!! + +#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H */ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h index 24c7381ee..b00540b41 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/lib/kernel_tests.h @@ -76,6 +76,7 @@ std::vector init_test_list(volk_gnsssdr_test_params_t (VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerpuppet_16ic, volk_gnsssdr_16ic_resampler_16ic, test_params)) (VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerxnpuppet_16ic, volk_gnsssdr_16ic_xn_resampler_16ic_xn, test_params)) (VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_dot_prod_16ic_xn, test_params)) + (VOLK_INIT_TEST(volk_gnsssdr_16ic_convert_32fc, test_params)) ; return test_cases;