1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2025-01-17 20:53:02 +00:00

Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next

This commit is contained in:
Carles Fernandez 2016-02-01 09:15:03 +01:00
commit d3705aa30c
20 changed files with 543 additions and 286 deletions

View File

@ -35,13 +35,13 @@ main(int argc, char **argv)
desc.add_options()
("help,h", "print help message")
("prefix", "print VOLK installation prefix")
("cc", "print VOLK C compiler version")
("cflags", "print VOLK CFLAGS")
("all-machines", "print VOLK machines built into library")
("avail-machines", "print VOLK machines the current platform can use")
("machine", "print the VOLK machine that will be used")
("version,v", "print VOLK version")
("prefix", "print VOLK_GNSSSDR installation prefix")
("cc", "print VOLK_GNSSSDR C compiler version")
("cflags", "print VOLK_GNSSSDR CFLAGS")
("all-machines", "print VOLK_GNSSSDR machines built into library")
("avail-machines", "print VOLK_GNSSSDR machines the current platform can use")
("machine", "print the VOLK_GNSSSDR machine that will be used")
("version,v", "print VOLK_GNSSSDR version")
;
try {

View File

@ -1,23 +1,24 @@
/* -*- c++ -*- */
/*
* Copyright 2006,2009,2013 Free Software Foundation, Inc.
/*!
* \file constants.h
* \brief Definition of VOLK_GNSSSDR-related constants
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* This file is part of GNU Radio
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNU Radio is free software; you can redistribute it and/or modify
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_VOLK_GNSSSDR_CONSTANTS_H

View File

@ -1,25 +1,34 @@
#ifndef SATURATION_ARITHMETIC_H_
#define SATURATION_ARITHMETIC_H_
/*!
* \file saturation_arithmetic.h
* \brief Defines addition of 16-bit integers with saturation
* \author Javier Arribas, 2015. javier.arribas(at)cttc.es
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_VOLK_GNSSSDR_SATURATION_ARITHMETIC_H_
#define INCLUDED_VOLK_GNSSSDR_SATURATION_ARITHMETIC_H_
#include <limits.h>
//#include <types.h>
static inline int16_t sat_adds16i(int16_t x, int16_t y)
{
// int16_t ux = x;
// int16_t uy = y;
// int16_t res = ux + uy;
//
// /* Calculate overflowed result. (Don't change the sign bit of ux) */
// ux = (ux >> 15) + SHRT_MAX;
//
// /* Force compiler to use cmovns instruction */
// if ((int16_t) ((ux ^ uy) | ~(uy ^ res)) >= 0)
// {
// res = ux;
// }
//
// return res;
int32_t res = (int32_t) x + (int32_t) y;
if (res < SHRT_MIN) res = SHRT_MIN;
@ -28,4 +37,4 @@ static inline int16_t sat_adds16i(int16_t x, int16_t y)
return res;
}
#endif /*SATURATION_ARITHMETIC_H_*/
#endif /* INCLUDED_VOLK_GNSSSDR_SATURATION_ARITHMETIC_H_ */

View File

@ -1,6 +1,8 @@
/*!
* \file volk_gnsssdr_avx_intrinsics.h
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* \brief This file is intended to hold AVX intrinsics of intrinsics.
* They should be used in VOLK kernels to avoid copy-paste.
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
@ -20,13 +22,9 @@
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* This file is intended to hold AVX intrinsics of intrinsics.
* They should be used in VOLK kernels to avoid copy-pasta.
*/
#ifndef INCLUDE_VOLK_VOLK_AVX_INTRINSICS_H_
#define INCLUDE_VOLK_VOLK_AVX_INTRINSICS_H_
#ifndef INCLUDED_VOLK_VOLK_AVX_INTRINSICS_H_
#define INCLUDED_VOLK_VOLK_AVX_INTRINSICS_H_
#include <immintrin.h>
static inline __m256

View File

@ -1,6 +1,8 @@
/*!
* \file volk_gnsssdr_sse3_intrinsics.h
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* \brief Holds SSE3 intrinsics of intrinsics.
* They should be used in VOLK kernels to avoid copy-paste.
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
@ -21,13 +23,8 @@
*/
/*
* This file is intended to hold SSE3 intrinsics of intrinsics.
* They should be used in VOLK kernels to avoid copy-pasta.
*/
#ifndef INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
#define INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
#ifndef INCLUDED_VOLK_VOLK_SSE3_INTRINSICS_H_
#define INCLUDED_VOLK_VOLK_SSE3_INTRINSICS_H_
#include <pmmintrin.h>
static inline __m128

View File

@ -1,6 +1,8 @@
/*!
* \file volk_gnsssdr_sse_intrinsics.h
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
* \brief Holds SSE intrinsics of intrinsics.
* They should be used in VOLK kernels to avoid copy-paste
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
@ -20,13 +22,8 @@
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* This file is intended to hold SSE intrinsics of intrinsics.
* They should be used in VOLK kernels to avoid copy-pasta.
*/
#ifndef INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
#define INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
#ifndef INCLUDED_VOLK_VOLK_SSE_INTRINSICS_H_
#define INCLUDED_VOLK_VOLK_SSE_INTRINSICS_H_
#include <xmmintrin.h>
static inline __m128
@ -46,4 +43,4 @@ _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2){
return _mm_sqrt_ps(_mm_magnitudesquared_ps(cplxValue1, cplxValue2));
}
#endif /* INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_ */
#endif /* INCLUDED_VOLK_VOLK_SSE_INTRINSICS_H_ */

View File

@ -38,6 +38,12 @@
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
for(unsigned int i = 0; i < num_points; i++)
@ -50,6 +56,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVecto
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 2;
@ -80,6 +92,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 2;
@ -110,6 +128,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
\param[out] outputVector The complex 32-bit float output data buffer
\param[in] inputVector The complex 16-bit integer input data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 2;

View File

@ -47,12 +47,15 @@
//int round_int( float r ) {
// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
//}
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param cVector The vector where the result will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)
{
@ -73,6 +76,16 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, c
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
{
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -155,6 +168,15 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
{
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -235,6 +257,16 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Resamples a complex vector (16-bit integer each component)
\param[out] result The vector where the result will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Remnant code phase [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
{
unsigned int number;
@ -281,7 +313,6 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
{
_code_phase_out = vmulq_f32(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step
_code_phase_out_with_offset = vaddq_f32(_code_phase_out, _rem_code_phase); //add the phase offset
//_code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer int32x4_t = f(float32x4_t)
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(_code_phase_out_with_offset), 31)));
PlusHalf = vaddq_f32(_code_phase_out_with_offset, half);
Round = vsubq_f32(PlusHalf, sign);

View File

@ -1,6 +1,6 @@
/*!
* \file volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h
* \brief Volk puppet for the 16-bit complex vector resampler kernel
* \brief Volk puppet for the multiple 16-bit complex vector resampler kernel
* \authors <ul>
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
* </ul>

View File

@ -1,3 +1,37 @@
/*!
* \file volk_gnsssdr_16ic_rotatorpuppet_16ic.h
* \brief Volk puppet for the 16-bit complex rotator kernel
* \authors <ul>
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
* </ul>
*
* Volk puppet for integrating the resampler into volk's test system
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_16ic_rotatorpuppet_16ic_H
#define INCLUDED_volk_gnsssdr_16ic_rotatorpuppet_16ic_H

View File

@ -45,6 +45,14 @@
#ifdef LV_HAVE_GENERIC
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
unsigned int i = 0;
@ -76,6 +84,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* ou
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -164,6 +180,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -209,6 +233,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
//next two samples
_in += 2;
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
@ -252,6 +277,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Rotates a complex vector (16-bit integer samples each component)
\param[out] outVector Rotated vector
\param[in] inVector Vector to be rotated
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
\param[in,out] phase Initial / final phase
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
unsigned int i = 0;

View File

@ -42,12 +42,13 @@
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
@ -64,6 +65,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result,
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
@ -92,10 +101,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
b = _mm_load_si128((__m128i*)_in_b);
c = _mm_mullo_epi16 (a, b); // a3.i*b3.i, a3.r*b3.r, ....
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
c_sr = _mm_srli_si128 (c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
real = _mm_subs_epi16 (c,c_sr);
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
real = _mm_subs_epi16(c,c_sr);
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
@ -105,17 +114,17 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic!
realcacc = _mm_adds_epi16 (realcacc, real);
imagcacc = _mm_adds_epi16 (imagcacc, imag);
realcacc = _mm_adds_epi16(realcacc, real);
imagcacc = _mm_adds_epi16(imagcacc, imag);
_in_a += 4;
_in_b += 4;
}
realcacc = _mm_and_si128 (realcacc, mask_real);
imagcacc = _mm_and_si128 (imagcacc, mask_imag);
realcacc = _mm_and_si128(realcacc, mask_real);
imagcacc = _mm_and_si128(imagcacc, mask_imag);
result = _mm_or_si128 (realcacc, imagcacc);
result = _mm_or_si128(realcacc, imagcacc);
_mm_store_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector
@ -128,7 +137,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
for (unsigned int i = 0; i < (num_points % 4); ++i)
{
lv_16sc_t tmp = (*_in_a++) * (*_in_b++);
dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
}
*_out = dotProduct;
@ -140,6 +149,13 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
@ -149,6 +165,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
const lv_16sc_t* _in_a = in_a;
const lv_16sc_t* _in_b = in_b;
lv_16sc_t* _out = out;
unsigned int i;
if (sse_iters > 0)
{
@ -168,10 +185,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
a = _mm_loadu_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
b = _mm_loadu_si128((__m128i*)_in_b);
c = _mm_mullo_epi16 (a, b); // a3.i*b3.i, a3.r*b3.r, ....
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
c_sr = _mm_srli_si128 (c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
real = _mm_subs_epi16 (c,c_sr);
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
real = _mm_subs_epi16(c, c_sr);
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
@ -181,30 +198,30 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic!
realcacc = _mm_adds_epi16 (realcacc, real);
imagcacc = _mm_adds_epi16 (imagcacc, imag);
realcacc = _mm_adds_epi16(realcacc, real);
imagcacc = _mm_adds_epi16(imagcacc, imag);
_in_a += 4;
_in_b += 4;
}
realcacc = _mm_and_si128 (realcacc, mask_real);
imagcacc = _mm_and_si128 (imagcacc, mask_imag);
realcacc = _mm_and_si128(realcacc, mask_real);
imagcacc = _mm_and_si128(imagcacc, mask_imag);
result = _mm_or_si128 (realcacc, imagcacc);
result = _mm_or_si128(realcacc, imagcacc);
_mm_storeu_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector
for (int i = 0; i < 4; ++i)
for (i = 0; i < 4; ++i)
{
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
}
}
for (unsigned int i = 0; i < (num_points % 4); ++i)
for (i = 0; i < (num_points % 4); ++i)
{
lv_16sc_t tmp = (*_in_a++) * (*_in_b++);
dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
}
*_out = dotProduct;
@ -214,6 +231,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
\param[out] result Value of the accumulated result
\param[in] in_a One of the vectors to be multiplied and accumulated
\param[in] in_b One of the vectors to be multiplied and accumulated
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
unsigned int quarter_points = num_points / 4;

View File

@ -42,11 +42,12 @@
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
@ -68,6 +69,15 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
@ -87,8 +97,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,
__m128i* realcacc;
__m128i* imagcacc;
realcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0
imagcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0
realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
@ -163,6 +173,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
@ -182,8 +200,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
__m128i* realcacc;
__m128i* imagcacc;
realcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0
imagcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0
realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
@ -214,8 +232,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
imag = _mm_adds_epi16(imag1, imag2);
realcacc[n_vec] = _mm_adds_epi16 (realcacc[n_vec], real);
imagcacc[n_vec] = _mm_adds_epi16 (imagcacc[n_vec], imag);
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
}
_in_common += 4;
@ -223,10 +241,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
for (int n_vec=0;n_vec<num_a_vectors;n_vec++)
{
realcacc[n_vec] = _mm_and_si128 (realcacc[n_vec], mask_real);
imagcacc[n_vec] = _mm_and_si128 (imagcacc[n_vec], mask_imag);
realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real);
imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag);
result = _mm_or_si128 (realcacc[n_vec], imagcacc[n_vec]);
result = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);
_mm_storeu_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector
dotProduct = lv_cmake(0,0);
@ -258,6 +276,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake(0,0);

View File

@ -1,6 +1,6 @@
/*!
* \file volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h
* \brief Volk puppet for the 16-bit complex vector resampler kernel
* \brief Volk puppet for the multiple 16-bit complex dot product kernel
* \authors <ul>
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
* </ul>

View File

@ -42,10 +42,10 @@
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param cVector The vector where the result will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
@ -61,6 +61,14 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result,
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -112,6 +120,14 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
@ -164,6 +180,13 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, con
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param[out] result The vector where the result will be stored
\param[in] in_a One of the vectors to be multiplied
\param[in] in_b One of the vectors to be multiplied
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
{
lv_16sc_t *a_ptr = (lv_16sc_t*) in_a;

View File

@ -48,15 +48,18 @@
//int round_int( float r ) {
// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
//}
/*!
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
\param cVector The vector where the result will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{
int local_code_chip_index;
//fesetround(FE_TONEAREST);
@ -65,9 +68,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** re
for (unsigned int n = 0; n < num_output_samples; n++)
{
// resample code for current tap
local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector]-0.5f);
local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector] - 0.5f);
if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips;
if (local_code_chip_index > (code_length_chips-1)) local_code_chip_index -= code_length_chips;
if (local_code_chip_index > (code_length_chips - 1)) local_code_chip_index -= code_length_chips;
//std::cout<<"g["<<n<<"]="<<code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips-0.5f<<","<<local_code_chip_index<<" ";
result[current_vector][n] = local_code[local_code_chip_index];
}
@ -80,6 +83,17 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** re
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -172,6 +186,16 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
@ -265,6 +289,16 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
\param[out] result Pointer to the vector where the results will be stored
\param[in] local_code One of the vectors to be multiplied
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
\param[in] code_length_chips Code length in chips
\param[in] num_out_vectors Number of output vectors
\param[in] num_output_samples Number of samples to be processed
*/
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
{
unsigned int number;

View File

@ -39,11 +39,12 @@
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
@ -92,11 +93,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector
#ifdef LV_HAVE_SSE
#include <xmmintrin.h> // __m64, __m128 ??
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
@ -146,11 +148,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector,
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
@ -199,11 +202,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector
#ifdef LV_HAVE_SSE
#include <xmmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
@ -252,11 +256,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector,
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
@ -314,11 +319,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
#endif /* LV_HAVE_NEON */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 16-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
@ -337,4 +343,5 @@ static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVecto
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_H */

View File

@ -41,11 +41,12 @@
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 8-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
@ -103,11 +104,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector,
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 8-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
@ -130,11 +132,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector,
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 8-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
@ -193,11 +196,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
\param[out] outputVector The complex 8-bit integer output data buffer
\param[in] inputVector The complex 32-bit float data buffer
\param[in] num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{

View File

@ -41,12 +41,13 @@
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{
@ -93,12 +94,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, co
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{
@ -174,12 +176,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{
@ -254,12 +257,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{
@ -335,12 +339,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{
@ -413,12 +418,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_ORC
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points);
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
@ -440,12 +446,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
\param[out] result Value of the accumulated result
\param[in] input One of the vectors to be multiplied
\param[in] taps One of the vectors to be multiplied
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{
@ -458,7 +465,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const
// for 2-lane vectors, 1st lane holds the real part,
// 2nd lane holds the imaginary part
int8x8x2_t a_val, b_val, c_val, accumulator, tmp_real, tmp_imag;
lv_8sc_t accum_result[8] = { lv_cmake(0,0) };
__VOLK_ATTR_ALIGNED(16) lv_8sc_t accum_result[8] = { lv_cmake(0,0) };
accumulator.val[0] = vdup_n_s8(0);
accumulator.val[1] = vdup_n_s8(0);
unsigned int number;

View File

@ -40,12 +40,13 @@
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
@ -63,26 +64,26 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
x = _mm_loadu_si128((__m128i*)a);
y = _mm_loadu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128 (realc, mult1);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128 (imagc, mult1);
imagc = _mm_slli_si128 (imagc, 1);
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128(realc, mult1);
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128(imagc, mult1);
imagc = _mm_slli_si128(imagc, 1);
totalc = _mm_or_si128 (realc, imagc);
totalc = _mm_or_si128(realc, imagc);
_mm_storeu_si128((__m128i*)c, totalc);
@ -100,12 +101,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
@ -125,24 +127,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector,
x = _mm_lddqu_si128((__m128i*)a);
y = _mm_lddqu_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_slli_si128 (imagc, 1);
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
imagc = _mm_slli_si128(imagc, 1);
totalc = _mm_blendv_epi8 (imagc, realc, mult1);
totalc = _mm_blendv_epi8(imagc, realc, mult1);
_mm_storeu_si128((__m128i*)c, totalc);
@ -159,12 +161,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector,
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
@ -182,12 +185,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
@ -205,24 +209,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128 (realc, mult1);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128 (imagc, mult1);
imagc = _mm_slli_si128 (imagc, 1);
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
realc = _mm_and_si128(realc, mult1);
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
imagc = _mm_and_si128(imagc, mult1);
imagc = _mm_slli_si128(imagc, 1);
totalc = _mm_or_si128 (realc, imagc);
@ -242,12 +246,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
@ -267,24 +272,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
x = _mm_load_si128((__m128i*)a);
y = _mm_load_si128((__m128i*)b);
imagx = _mm_srli_si128 (x, 1);
imagx = _mm_and_si128 (imagx, mult1);
realx = _mm_and_si128 (x, mult1);
imagx = _mm_srli_si128(x, 1);
imagx = _mm_and_si128(imagx, mult1);
realx = _mm_and_si128(x, mult1);
imagy = _mm_srli_si128 (y, 1);
imagy = _mm_and_si128 (imagy, mult1);
realy = _mm_and_si128 (y, mult1);
imagy = _mm_srli_si128(y, 1);
imagy = _mm_and_si128(imagy, mult1);
realy = _mm_and_si128(y, mult1);
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
realx_mult_realy = _mm_mullo_epi16(realx, realy);
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
imagc = _mm_slli_si128 (imagc, 1);
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
imagc = _mm_slli_si128(imagc, 1);
totalc = _mm_blendv_epi8 (imagc, realc, mult1);
totalc = _mm_blendv_epi8(imagc, realc, mult1);
_mm_store_si128((__m128i*)c, totalc);
@ -302,14 +307,16 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
#ifdef LV_HAVE_ORC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points);
/*!
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
\param[out] cVector The vector where the results will be stored
\param[in] aVector One of the vectors to be multiplied
\param[in] bVector One of the vectors to be multiplied
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(cVector, aVector, bVector, num_points);