mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2025-01-17 20:53:02 +00:00
Merge branch 'next' of https://github.com/gnss-sdr/gnss-sdr into next
This commit is contained in:
commit
d3705aa30c
@ -35,13 +35,13 @@ main(int argc, char **argv)
|
||||
|
||||
desc.add_options()
|
||||
("help,h", "print help message")
|
||||
("prefix", "print VOLK installation prefix")
|
||||
("cc", "print VOLK C compiler version")
|
||||
("cflags", "print VOLK CFLAGS")
|
||||
("all-machines", "print VOLK machines built into library")
|
||||
("avail-machines", "print VOLK machines the current platform can use")
|
||||
("machine", "print the VOLK machine that will be used")
|
||||
("version,v", "print VOLK version")
|
||||
("prefix", "print VOLK_GNSSSDR installation prefix")
|
||||
("cc", "print VOLK_GNSSSDR C compiler version")
|
||||
("cflags", "print VOLK_GNSSSDR CFLAGS")
|
||||
("all-machines", "print VOLK_GNSSSDR machines built into library")
|
||||
("avail-machines", "print VOLK_GNSSSDR machines the current platform can use")
|
||||
("machine", "print the VOLK_GNSSSDR machine that will be used")
|
||||
("version,v", "print VOLK_GNSSSDR version")
|
||||
;
|
||||
|
||||
try {
|
||||
|
@ -1,23 +1,24 @@
|
||||
/* -*- c++ -*- */
|
||||
/*
|
||||
* Copyright 2006,2009,2013 Free Software Foundation, Inc.
|
||||
/*!
|
||||
* \file constants.h
|
||||
* \brief Definition of VOLK_GNSSSDR-related constants
|
||||
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
*
|
||||
* This file is part of GNU Radio
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNU Radio is free software; you can redistribute it and/or modify
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3, or (at your option)
|
||||
* any later version.
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNU Radio is distributed in the hope that it will be useful,
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Radio; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street,
|
||||
* Boston, MA 02110-1301, USA.
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_VOLK_GNSSSDR_CONSTANTS_H
|
||||
|
@ -1,25 +1,34 @@
|
||||
#ifndef SATURATION_ARITHMETIC_H_
|
||||
#define SATURATION_ARITHMETIC_H_
|
||||
/*!
|
||||
* \file saturation_arithmetic.h
|
||||
* \brief Defines addition of 16-bit integers with saturation
|
||||
* \author Javier Arribas, 2015. javier.arribas(at)cttc.es
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef INCLUDED_VOLK_GNSSSDR_SATURATION_ARITHMETIC_H_
|
||||
#define INCLUDED_VOLK_GNSSSDR_SATURATION_ARITHMETIC_H_
|
||||
|
||||
#include <limits.h>
|
||||
//#include <types.h>
|
||||
|
||||
static inline int16_t sat_adds16i(int16_t x, int16_t y)
|
||||
{
|
||||
// int16_t ux = x;
|
||||
// int16_t uy = y;
|
||||
// int16_t res = ux + uy;
|
||||
//
|
||||
// /* Calculate overflowed result. (Don't change the sign bit of ux) */
|
||||
// ux = (ux >> 15) + SHRT_MAX;
|
||||
//
|
||||
// /* Force compiler to use cmovns instruction */
|
||||
// if ((int16_t) ((ux ^ uy) | ~(uy ^ res)) >= 0)
|
||||
// {
|
||||
// res = ux;
|
||||
// }
|
||||
//
|
||||
// return res;
|
||||
|
||||
int32_t res = (int32_t) x + (int32_t) y;
|
||||
|
||||
if (res < SHRT_MIN) res = SHRT_MIN;
|
||||
@ -28,4 +37,4 @@ static inline int16_t sat_adds16i(int16_t x, int16_t y)
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif /*SATURATION_ARITHMETIC_H_*/
|
||||
#endif /* INCLUDED_VOLK_GNSSSDR_SATURATION_ARITHMETIC_H_ */
|
||||
|
@ -1,6 +1,8 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_avx_intrinsics.h
|
||||
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* \brief This file is intended to hold AVX intrinsics of intrinsics.
|
||||
* They should be used in VOLK kernels to avoid copy-paste.
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
@ -20,13 +22,9 @@
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is intended to hold AVX intrinsics of intrinsics.
|
||||
* They should be used in VOLK kernels to avoid copy-pasta.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_VOLK_VOLK_AVX_INTRINSICS_H_
|
||||
#define INCLUDE_VOLK_VOLK_AVX_INTRINSICS_H_
|
||||
#ifndef INCLUDED_VOLK_VOLK_AVX_INTRINSICS_H_
|
||||
#define INCLUDED_VOLK_VOLK_AVX_INTRINSICS_H_
|
||||
#include <immintrin.h>
|
||||
|
||||
static inline __m256
|
||||
|
@ -1,6 +1,8 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_sse3_intrinsics.h
|
||||
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* \brief Holds SSE3 intrinsics of intrinsics.
|
||||
* They should be used in VOLK kernels to avoid copy-paste.
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
@ -21,13 +23,8 @@
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file is intended to hold SSE3 intrinsics of intrinsics.
|
||||
* They should be used in VOLK kernels to avoid copy-pasta.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
|
||||
#define INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
|
||||
#ifndef INCLUDED_VOLK_VOLK_SSE3_INTRINSICS_H_
|
||||
#define INCLUDED_VOLK_VOLK_SSE3_INTRINSICS_H_
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static inline __m128
|
||||
|
@ -1,6 +1,8 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_sse_intrinsics.h
|
||||
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||
* \brief Holds SSE intrinsics of intrinsics.
|
||||
* They should be used in VOLK kernels to avoid copy-paste
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
@ -20,13 +22,8 @@
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is intended to hold SSE intrinsics of intrinsics.
|
||||
* They should be used in VOLK kernels to avoid copy-pasta.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
|
||||
#define INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_
|
||||
#ifndef INCLUDED_VOLK_VOLK_SSE_INTRINSICS_H_
|
||||
#define INCLUDED_VOLK_VOLK_SSE_INTRINSICS_H_
|
||||
#include <xmmintrin.h>
|
||||
|
||||
static inline __m128
|
||||
@ -46,4 +43,4 @@ _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2){
|
||||
return _mm_sqrt_ps(_mm_magnitudesquared_ps(cplxValue1, cplxValue2));
|
||||
}
|
||||
|
||||
#endif /* INCLUDE_VOLK_VOLK_SSE_INTRINSICS_H_ */
|
||||
#endif /* INCLUDED_VOLK_VOLK_SSE_INTRINSICS_H_ */
|
||||
|
@ -38,6 +38,12 @@
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
|
||||
\param[out] outputVector The complex 32-bit float output data buffer
|
||||
\param[in] inputVector The complex 16-bit integer input data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
for(unsigned int i = 0; i < num_points; i++)
|
||||
@ -50,6 +56,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_generic(lv_32fc_t* outputVecto
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
|
||||
\param[out] outputVector The complex 32-bit float output data buffer
|
||||
\param[in] inputVector The complex 16-bit integer input data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 2;
|
||||
@ -80,6 +92,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
|
||||
\param[out] outputVector The complex 32-bit float output data buffer
|
||||
\param[in] inputVector The complex 16-bit integer input data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 2;
|
||||
@ -110,6 +128,12 @@ static inline void volk_gnsssdr_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a complex vector of 16-bits integer each component into a complex vector of 32-bits float each component.
|
||||
\param[out] outputVector The complex 32-bit float output data buffer
|
||||
\param[in] inputVector The complex 16-bit integer input data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_convert_32fc_neon(lv_32fc_t* outputVector, const lv_16sc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 2;
|
||||
|
@ -47,12 +47,15 @@
|
||||
//int round_int( float r ) {
|
||||
// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
|
||||
//}
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
|
||||
\param cVector The vector where the result will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\brief Resamples a complex vector (16-bit integer each component)
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Remnant code phase [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)
|
||||
{
|
||||
@ -73,6 +76,16 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_generic(lv_16sc_t* result, c
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component)
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Remnant code phase [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
|
||||
{
|
||||
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
|
||||
@ -155,6 +168,15 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_a_sse2(lv_16sc_t* result, co
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component)
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Remnant code phase [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
|
||||
{
|
||||
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
|
||||
@ -235,6 +257,16 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_u_sse2(lv_16sc_t* result, co
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component)
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Remnant code phase [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, const lv_16sc_t* local_code, float rem_code_phase_chips, float code_phase_step_chips, int code_length_chips, unsigned int num_output_samples)//, int* scratch_buffer, float* scratch_buffer_float)
|
||||
{
|
||||
unsigned int number;
|
||||
@ -281,7 +313,6 @@ static inline void volk_gnsssdr_16ic_resampler_16ic_neon(lv_16sc_t* result, cons
|
||||
{
|
||||
_code_phase_out = vmulq_f32(_code_phase_step_chips, _4output_index); //compute the code phase point with the phase step
|
||||
_code_phase_out_with_offset = vaddq_f32(_code_phase_out, _rem_code_phase); //add the phase offset
|
||||
//_code_phase_out_int = _mm_cvtps_epi32(_code_phase_out_with_offset); //convert to integer int32x4_t = f(float32x4_t)
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(_code_phase_out_with_offset), 31)));
|
||||
PlusHalf = vaddq_f32(_code_phase_out_with_offset, half);
|
||||
Round = vsubq_f32(PlusHalf, sign);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_resamplerxnpuppet_16ic.h
|
||||
* \brief Volk puppet for the 16-bit complex vector resampler kernel
|
||||
* \brief Volk puppet for the multiple 16-bit complex vector resampler kernel
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
|
||||
* </ul>
|
||||
|
@ -1,3 +1,37 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_rotatorpuppet_16ic.h
|
||||
* \brief Volk puppet for the 16-bit complex rotator kernel
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
|
||||
* </ul>
|
||||
*
|
||||
* Volk puppet for integrating the resampler into volk's test system
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_volk_gnsssdr_16ic_rotatorpuppet_16ic_H
|
||||
#define INCLUDED_volk_gnsssdr_16ic_rotatorpuppet_16ic_H
|
||||
|
||||
|
@ -45,6 +45,14 @@
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Rotates a complex vector (16-bit integer samples each component)
|
||||
\param[out] outVector Rotated vector
|
||||
\param[in] inVector Vector to be rotated
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
@ -76,6 +84,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* ou
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Rotates a complex vector (16-bit integer samples each component)
|
||||
\param[out] outVector Rotated vector
|
||||
\param[in] inVector Vector to be rotated
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -164,6 +180,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Rotates a complex vector (16-bit integer samples each component)
|
||||
\param[out] outVector Rotated vector
|
||||
\param[in] inVector Vector to be rotated
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -209,6 +233,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
|
||||
//next two samples
|
||||
_in += 2;
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
@ -252,6 +277,14 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Rotates a complex vector (16-bit integer samples each component)
|
||||
\param[out] outVector Rotated vector
|
||||
\param[in] inVector Vector to be rotated
|
||||
\param[in] phase_inc Phase increment = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad))
|
||||
\param[in,out] phase Initial / final phase
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
|
@ -42,12 +42,13 @@
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] in_a One of the vectors to be multiplied and accumulated
|
||||
\param[in] in_b One of the vectors to be multiplied and accumulated
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
@ -64,6 +65,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_generic(lv_16sc_t* result,
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] in_a One of the vectors to be multiplied and accumulated
|
||||
\param[in] in_b One of the vectors to be multiplied and accumulated
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
|
||||
@ -92,10 +101,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
|
||||
a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
|
||||
b = _mm_load_si128((__m128i*)_in_b);
|
||||
c = _mm_mullo_epi16 (a, b); // a3.i*b3.i, a3.r*b3.r, ....
|
||||
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
|
||||
|
||||
c_sr = _mm_srli_si128 (c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
|
||||
real = _mm_subs_epi16 (c,c_sr);
|
||||
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
|
||||
real = _mm_subs_epi16(c,c_sr);
|
||||
|
||||
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
|
||||
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
|
||||
@ -105,17 +114,17 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
|
||||
imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic!
|
||||
|
||||
realcacc = _mm_adds_epi16 (realcacc, real);
|
||||
imagcacc = _mm_adds_epi16 (imagcacc, imag);
|
||||
realcacc = _mm_adds_epi16(realcacc, real);
|
||||
imagcacc = _mm_adds_epi16(imagcacc, imag);
|
||||
|
||||
_in_a += 4;
|
||||
_in_b += 4;
|
||||
}
|
||||
|
||||
realcacc = _mm_and_si128 (realcacc, mask_real);
|
||||
imagcacc = _mm_and_si128 (imagcacc, mask_imag);
|
||||
realcacc = _mm_and_si128(realcacc, mask_real);
|
||||
imagcacc = _mm_and_si128(imagcacc, mask_imag);
|
||||
|
||||
result = _mm_or_si128 (realcacc, imagcacc);
|
||||
result = _mm_or_si128(realcacc, imagcacc);
|
||||
|
||||
_mm_store_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector
|
||||
|
||||
@ -128,7 +137,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
for (unsigned int i = 0; i < (num_points % 4); ++i)
|
||||
{
|
||||
lv_16sc_t tmp = (*_in_a++) * (*_in_b++);
|
||||
dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
|
||||
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
|
||||
}
|
||||
|
||||
*_out = dotProduct;
|
||||
@ -140,6 +149,13 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] in_a One of the vectors to be multiplied and accumulated
|
||||
\param[in] in_b One of the vectors to be multiplied and accumulated
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
|
||||
@ -149,6 +165,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
|
||||
const lv_16sc_t* _in_a = in_a;
|
||||
const lv_16sc_t* _in_b = in_b;
|
||||
lv_16sc_t* _out = out;
|
||||
unsigned int i;
|
||||
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
@ -168,10 +185,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
|
||||
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
|
||||
a = _mm_loadu_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
|
||||
b = _mm_loadu_si128((__m128i*)_in_b);
|
||||
c = _mm_mullo_epi16 (a, b); // a3.i*b3.i, a3.r*b3.r, ....
|
||||
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
|
||||
|
||||
c_sr = _mm_srli_si128 (c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
|
||||
real = _mm_subs_epi16 (c,c_sr);
|
||||
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
|
||||
real = _mm_subs_epi16(c, c_sr);
|
||||
|
||||
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
|
||||
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
|
||||
@ -181,30 +198,30 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
|
||||
|
||||
imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic!
|
||||
|
||||
realcacc = _mm_adds_epi16 (realcacc, real);
|
||||
imagcacc = _mm_adds_epi16 (imagcacc, imag);
|
||||
realcacc = _mm_adds_epi16(realcacc, real);
|
||||
imagcacc = _mm_adds_epi16(imagcacc, imag);
|
||||
|
||||
_in_a += 4;
|
||||
_in_b += 4;
|
||||
}
|
||||
|
||||
realcacc = _mm_and_si128 (realcacc, mask_real);
|
||||
imagcacc = _mm_and_si128 (imagcacc, mask_imag);
|
||||
realcacc = _mm_and_si128(realcacc, mask_real);
|
||||
imagcacc = _mm_and_si128(imagcacc, mask_imag);
|
||||
|
||||
result = _mm_or_si128 (realcacc, imagcacc);
|
||||
result = _mm_or_si128(realcacc, imagcacc);
|
||||
|
||||
_mm_storeu_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < (num_points % 4); ++i)
|
||||
for (i = 0; i < (num_points % 4); ++i)
|
||||
{
|
||||
lv_16sc_t tmp = (*_in_a++) * (*_in_b++);
|
||||
dotProduct = lv_cmake( sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
|
||||
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(tmp)), sat_adds16i(lv_cimag(dotProduct), lv_cimag(tmp)));
|
||||
}
|
||||
|
||||
*_out = dotProduct;
|
||||
@ -214,6 +231,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors (16-bit integer each component) and accumulates them, storing the result. Results are saturated so never go beyond the limits of the data type.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] in_a One of the vectors to be multiplied and accumulated
|
||||
\param[in] in_b One of the vectors to be multiplied and accumulated
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
unsigned int quarter_points = num_points / 4;
|
||||
|
@ -42,11 +42,12 @@
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
@ -68,6 +69,15 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
@ -87,8 +97,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,
|
||||
__m128i* realcacc;
|
||||
__m128i* imagcacc;
|
||||
|
||||
realcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0
|
||||
imagcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0
|
||||
realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
|
||||
imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
|
||||
|
||||
__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
|
||||
|
||||
@ -163,6 +173,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* out,
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
@ -182,8 +200,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
|
||||
__m128i* realcacc;
|
||||
__m128i* imagcacc;
|
||||
|
||||
realcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0
|
||||
imagcacc=(__m128i*)calloc(num_a_vectors,sizeof(__m128i)); //calloc also sets memory to 0
|
||||
realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
|
||||
imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
|
||||
|
||||
__m128i a,b,c, c_sr, mask_imag, mask_real, real, imag, imag1,imag2, b_sl, a_sl, result;
|
||||
|
||||
@ -214,8 +232,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
|
||||
|
||||
imag = _mm_adds_epi16(imag1, imag2);
|
||||
|
||||
realcacc[n_vec] = _mm_adds_epi16 (realcacc[n_vec], real);
|
||||
imagcacc[n_vec] = _mm_adds_epi16 (imagcacc[n_vec], imag);
|
||||
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
|
||||
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
|
||||
|
||||
}
|
||||
_in_common += 4;
|
||||
@ -223,10 +241,10 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
|
||||
|
||||
for (int n_vec=0;n_vec<num_a_vectors;n_vec++)
|
||||
{
|
||||
realcacc[n_vec] = _mm_and_si128 (realcacc[n_vec], mask_real);
|
||||
imagcacc[n_vec] = _mm_and_si128 (imagcacc[n_vec], mask_imag);
|
||||
realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real);
|
||||
imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag);
|
||||
|
||||
result = _mm_or_si128 (realcacc[n_vec], imagcacc[n_vec]);
|
||||
result = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);
|
||||
|
||||
_mm_storeu_si128((__m128i*)dotProductVector, result); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0,0);
|
||||
@ -258,6 +276,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* out,
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the reference complex vector with multiple versions of another complex vector, accumulates the results and stores them in the output vector
|
||||
\param[out] result Array of num_a_vectors components with the multiple versions of in_a multiplied and accumulated The vector where the accumulated result will be stored
|
||||
\param[in] in_common Pointer to one of the vectors to be multiplied and accumulated (reference vector)
|
||||
\param[in] in_a Pointer to an array of pointers to multiple versions of the other vector to be multiplied and accumulated
|
||||
\param[in] num_a_vectors Number of vectors to be multiplied by the reference vector and accumulated
|
||||
\param[in] num_points The Number of complex values to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* out, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h
|
||||
* \brief Volk puppet for the 16-bit complex vector resampler kernel
|
||||
* \brief Volk puppet for the multiple 16-bit complex dot product kernel
|
||||
* \authors <ul>
|
||||
* <li> Carles Fernandez Prades 2016 cfernandez at cttc dot cat
|
||||
* </ul>
|
||||
|
@ -42,10 +42,10 @@
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
|
||||
\param cVector The vector where the result will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] in_a One of the vectors to be multiplied
|
||||
\param[in] in_b One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
@ -61,6 +61,14 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_generic(lv_16sc_t* result,
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] in_a One of the vectors to be multiplied
|
||||
\param[in] in_b One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -112,6 +120,14 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_a_sse2(lv_16sc_t* out, con
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] in_a One of the vectors to be multiplied
|
||||
\param[in] in_b One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
@ -164,6 +180,13 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_u_sse2(lv_16sc_t* out, con
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
|
||||
\param[out] result The vector where the result will be stored
|
||||
\param[in] in_a One of the vectors to be multiplied
|
||||
\param[in] in_b One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_x2_multiply_16ic_neon(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t *a_ptr = (lv_16sc_t*) in_a;
|
||||
|
@ -48,15 +48,18 @@
|
||||
//int round_int( float r ) {
|
||||
// return (r > 0.0) ? (r + 0.5) : (r - 0.5);
|
||||
//}
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors, point-by-point, storing the result in the third vector
|
||||
\param cVector The vector where the result will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
*/
|
||||
|
||||
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
|
||||
\param[out] result Pointer to the vector where the results will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_out_vectors Number of output vectors
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips, float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
|
||||
{
|
||||
int local_code_chip_index;
|
||||
//fesetround(FE_TONEAREST);
|
||||
@ -65,9 +68,9 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** re
|
||||
for (unsigned int n = 0; n < num_output_samples; n++)
|
||||
{
|
||||
// resample code for current tap
|
||||
local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector]-0.5f);
|
||||
local_code_chip_index = round(code_phase_step_chips * (float)(n) + rem_code_phase_chips[current_vector] - 0.5f);
|
||||
if (local_code_chip_index < 0.0) local_code_chip_index += code_length_chips;
|
||||
if (local_code_chip_index > (code_length_chips-1)) local_code_chip_index -= code_length_chips;
|
||||
if (local_code_chip_index > (code_length_chips - 1)) local_code_chip_index -= code_length_chips;
|
||||
//std::cout<<"g["<<n<<"]="<<code_phase_step_chips*static_cast<float>(n) + rem_code_phase_chips-0.5f<<","<<local_code_chip_index<<" ";
|
||||
result[current_vector][n] = local_code[local_code_chip_index];
|
||||
}
|
||||
@ -80,6 +83,17 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_generic(lv_16sc_t** re
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
|
||||
\param[out] result Pointer to the vector where the results will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_out_vectors Number of output vectors
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
|
||||
{
|
||||
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
|
||||
@ -172,6 +186,16 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_a_sse2(lv_16sc_t** res
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
|
||||
\param[out] result Pointer to the vector where the results will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_out_vectors Number of output vectors
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
|
||||
{
|
||||
_MM_SET_ROUNDING_MODE (_MM_ROUND_NEAREST);//_MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO
|
||||
@ -265,6 +289,16 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_u_sse2(lv_16sc_t** res
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Resamples a complex vector (16-bit integer each component), providing num_out_vectors outputs
|
||||
\param[out] result Pointer to the vector where the results will be stored
|
||||
\param[in] local_code One of the vectors to be multiplied
|
||||
\param[in] rem_code_phase_chips Pointer to the vector containing the remnant code phase for each output [chips]
|
||||
\param[in] code_phase_step_chips Phase increment per sample [chips/sample]
|
||||
\param[in] code_length_chips Code length in chips
|
||||
\param[in] num_out_vectors Number of output vectors
|
||||
\param[in] num_output_samples Number of samples to be processed
|
||||
*/
|
||||
static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_neon(lv_16sc_t** result, const lv_16sc_t* local_code, float* rem_code_phase_chips ,float code_phase_step_chips, unsigned int code_length_chips, int num_out_vectors, unsigned int num_output_samples)
|
||||
{
|
||||
unsigned int number;
|
||||
|
@ -39,11 +39,12 @@
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
|
||||
\param inputVector The floating point input data buffer
|
||||
\param outputVector The 16 bit output data buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
@ -92,11 +93,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse2(lv_16sc_t* outputVector
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h> // __m64, __m128 ??
|
||||
|
||||
/*!
|
||||
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
|
||||
\param inputVector The floating point input data buffer
|
||||
\param outputVector The 16 bit output data buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
@ -146,11 +148,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_u_sse(lv_16sc_t* outputVector,
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
|
||||
\param inputVector The floating point input data buffer
|
||||
\param outputVector The 16 bit output data buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
@ -199,11 +202,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse2(lv_16sc_t* outputVector
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
|
||||
\param inputVector The floating point input data buffer
|
||||
\param outputVector The 16 bit output data buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
@ -252,11 +256,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector,
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
|
||||
\param inputVector The floating point input data buffer
|
||||
\param outputVector The 16 bit output data buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
@ -314,11 +319,12 @@ static inline void volk_gnsssdr_32fc_convert_16ic_neon(lv_16sc_t* outputVector,
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
|
||||
\param inputVector The floating point input data buffer
|
||||
\param outputVector The 16 bit output data buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 16-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 16-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
@ -337,4 +343,5 @@ static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVecto
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_H */
|
||||
|
@ -41,11 +41,12 @@
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
|
||||
\param inputVector The floating point input data buffer
|
||||
\param outputVector The 16 bit output data buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 8-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
@ -103,11 +104,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_u_sse2(lv_8sc_t* outputVector,
|
||||
#endif /* LV_HAVE_SSE2 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
|
||||
\param inputVector The floating point input data buffer
|
||||
\param outputVector The 16 bit output data buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 8-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
@ -130,11 +132,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector,
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
|
||||
\param inputVector The floating point input data buffer
|
||||
\param outputVector The 16 bit output data buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 8-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
@ -193,11 +196,12 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
|
||||
\param inputVector The floating point input data buffer
|
||||
\param outputVector The 16 bit output data buffer
|
||||
\param num_points The number of data values to be converted
|
||||
\brief Converts a complex vector of 32-bits float each component into a complex vector of 8-bits integer each component. Values are saturated to the limit values of the output data type.
|
||||
\param[out] outputVector The complex 8-bit integer output data buffer
|
||||
\param[in] inputVector The complex 32-bit float data buffer
|
||||
\param[in] num_points The number of data values to be converted
|
||||
*/
|
||||
static inline void volk_gnsssdr_32fc_convert_8ic_neon(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
|
||||
{
|
||||
|
@ -41,12 +41,13 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
{
|
||||
@ -93,12 +94,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_generic(lv_8sc_t* result, co
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
{
|
||||
@ -174,12 +176,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse2(lv_8sc_t* result, con
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
{
|
||||
@ -254,12 +257,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
{
|
||||
@ -335,12 +339,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse2(lv_8sc_t* result, con
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
{
|
||||
@ -413,12 +418,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_sse4_1(lv_8sc_t* result, c
|
||||
#endif /*LV_HAVE_SSE4_1*/
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
extern void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl(short* resRealShort, short* resImagShort, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points);
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
@ -440,12 +446,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
|
||||
\param cVector The vector where the accumulated result will be stored
|
||||
\param aVector One of the vectors to be multiplied and accumulated
|
||||
\param bVector One of the vectors to be multiplied and accumulated
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and accumulates them, storing the result.
|
||||
\param[out] result Value of the accumulated result
|
||||
\param[in] input One of the vectors to be multiplied
|
||||
\param[in] taps One of the vectors to be multiplied
|
||||
\param[in] num_points The number of complex values in input and taps to be multiplied together, accumulated and stored into result
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
|
||||
{
|
||||
@ -458,7 +465,7 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_neon(lv_8sc_t* result, const
|
||||
// for 2-lane vectors, 1st lane holds the real part,
|
||||
// 2nd lane holds the imaginary part
|
||||
int8x8x2_t a_val, b_val, c_val, accumulator, tmp_real, tmp_imag;
|
||||
lv_8sc_t accum_result[8] = { lv_cmake(0,0) };
|
||||
__VOLK_ATTR_ALIGNED(16) lv_8sc_t accum_result[8] = { lv_cmake(0,0) };
|
||||
accumulator.val[0] = vdup_n_s8(0);
|
||||
accumulator.val[1] = vdup_n_s8(0);
|
||||
unsigned int number;
|
||||
|
@ -40,12 +40,13 @@
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
@ -63,26 +64,26 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
|
||||
x = _mm_loadu_si128((__m128i*)a);
|
||||
y = _mm_loadu_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
imagx = _mm_srli_si128(x, 1);
|
||||
imagx = _mm_and_si128(imagx, mult1);
|
||||
realx = _mm_and_si128(x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
imagy = _mm_srli_si128(y, 1);
|
||||
imagy = _mm_and_si128(imagy, mult1);
|
||||
realy = _mm_and_si128(y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
realx_mult_realy = _mm_mullo_epi16(realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128 (realc, mult1);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128 (imagc, mult1);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128(realc, mult1);
|
||||
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128(imagc, mult1);
|
||||
imagc = _mm_slli_si128(imagc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realc, imagc);
|
||||
totalc = _mm_or_si128(realc, imagc);
|
||||
|
||||
_mm_storeu_si128((__m128i*)c, totalc);
|
||||
|
||||
@ -100,12 +101,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse2(lv_8sc_t* cVector, co
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
@ -125,24 +127,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector,
|
||||
x = _mm_lddqu_si128((__m128i*)a);
|
||||
y = _mm_lddqu_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
imagx = _mm_srli_si128(x, 1);
|
||||
imagx = _mm_and_si128(imagx, mult1);
|
||||
realx = _mm_and_si128(x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
imagy = _mm_srli_si128(y, 1);
|
||||
imagy = _mm_and_si128(imagy, mult1);
|
||||
realy = _mm_and_si128(y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
realx_mult_realy = _mm_mullo_epi16(realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_slli_si128(imagc, 1);
|
||||
|
||||
totalc = _mm_blendv_epi8 (imagc, realc, mult1);
|
||||
totalc = _mm_blendv_epi8(imagc, realc, mult1);
|
||||
|
||||
_mm_storeu_si128((__m128i*)c, totalc);
|
||||
|
||||
@ -159,12 +161,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_sse4_1(lv_8sc_t* cVector,
|
||||
#endif /* LV_HAVE_SSE4_1 */
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
@ -182,12 +185,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
@ -205,24 +209,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
y = _mm_load_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
imagx = _mm_srli_si128(x, 1);
|
||||
imagx = _mm_and_si128(imagx, mult1);
|
||||
realx = _mm_and_si128(x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
imagy = _mm_srli_si128(y, 1);
|
||||
imagy = _mm_and_si128(imagy, mult1);
|
||||
realy = _mm_and_si128(y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
realx_mult_realy = _mm_mullo_epi16(realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128 (realc, mult1);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128 (imagc, mult1);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
|
||||
realc = _mm_and_si128(realc, mult1);
|
||||
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_and_si128(imagc, mult1);
|
||||
imagc = _mm_slli_si128(imagc, 1);
|
||||
|
||||
totalc = _mm_or_si128 (realc, imagc);
|
||||
|
||||
@ -242,12 +246,13 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse2(lv_8sc_t* cVector, co
|
||||
|
||||
#ifdef LV_HAVE_SSE4_1
|
||||
#include <smmintrin.h>
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
@ -267,24 +272,24 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
|
||||
x = _mm_load_si128((__m128i*)a);
|
||||
y = _mm_load_si128((__m128i*)b);
|
||||
|
||||
imagx = _mm_srli_si128 (x, 1);
|
||||
imagx = _mm_and_si128 (imagx, mult1);
|
||||
realx = _mm_and_si128 (x, mult1);
|
||||
imagx = _mm_srli_si128(x, 1);
|
||||
imagx = _mm_and_si128(imagx, mult1);
|
||||
realx = _mm_and_si128(x, mult1);
|
||||
|
||||
imagy = _mm_srli_si128 (y, 1);
|
||||
imagy = _mm_and_si128 (imagy, mult1);
|
||||
realy = _mm_and_si128 (y, mult1);
|
||||
imagy = _mm_srli_si128(y, 1);
|
||||
imagy = _mm_and_si128(imagy, mult1);
|
||||
realy = _mm_and_si128(y, mult1);
|
||||
|
||||
realx_mult_realy = _mm_mullo_epi16 (realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16 (imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16 (realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16 (imagx, realy);
|
||||
realx_mult_realy = _mm_mullo_epi16(realx, realy);
|
||||
imagx_mult_imagy = _mm_mullo_epi16(imagx, imagy);
|
||||
realx_mult_imagy = _mm_mullo_epi16(realx, imagy);
|
||||
imagx_mult_realy = _mm_mullo_epi16(imagx, realy);
|
||||
|
||||
realc = _mm_sub_epi16 (realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16 (realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_slli_si128 (imagc, 1);
|
||||
realc = _mm_sub_epi16(realx_mult_realy, imagx_mult_imagy);
|
||||
imagc = _mm_add_epi16(realx_mult_imagy, imagx_mult_realy);
|
||||
imagc = _mm_slli_si128(imagc, 1);
|
||||
|
||||
totalc = _mm_blendv_epi8 (imagc, realc, mult1);
|
||||
totalc = _mm_blendv_epi8(imagc, realc, mult1);
|
||||
|
||||
_mm_store_si128((__m128i*)c, totalc);
|
||||
|
||||
@ -302,14 +307,16 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
|
||||
|
||||
|
||||
#ifdef LV_HAVE_ORC
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors and stores their results in the third vector
|
||||
\param cVector The vector where the results will be stored
|
||||
\param aVector One of the vectors to be multiplied
|
||||
\param bVector One of the vectors to be multiplied
|
||||
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
|
||||
extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points);
|
||||
|
||||
/*!
|
||||
\brief Multiplies the two input complex vectors of 8-bit integer each component and stores the results in the third vector
|
||||
\param[out] cVector The vector where the results will be stored
|
||||
\param[in] aVector One of the vectors to be multiplied
|
||||
\param[in] bVector One of the vectors to be multiplied
|
||||
\param{in] num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
|
||||
*/
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(cVector, aVector, bVector, num_points);
|
||||
|
Loading…
Reference in New Issue
Block a user