mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-12-14 12:10:34 +00:00
Merge branch 'next' of https://github.com/carlesfernandez/gnss-sdr into next
This commit is contained in:
commit
a7fbc46807
@ -170,6 +170,7 @@ install(FILES
|
||||
${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_common.h
|
||||
${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_avx_intrinsics.h
|
||||
${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_sse3_intrinsics.h
|
||||
${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_neon_intrinsics.h
|
||||
${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h
|
||||
${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h
|
||||
${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h
|
||||
|
@ -0,0 +1,56 @@
|
||||
/*!
|
||||
* \file volk_gnsssdr_neon_intrinsics.h
|
||||
* \author Carles Fernandez, 2016. carles.fernandez(at)gmail.com
|
||||
* \brief Holds NEON intrinsics of intrinsics.
|
||||
* They can be used in VOLK_GNSSSDR kernels to avoid copy-paste
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_
|
||||
#define INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static inline float32x4_t vdivq_f32( float32x4_t num, float32x4_t den )
|
||||
{
|
||||
const float32x4_t q_inv0 = vrecpeq_f32( den );
|
||||
const float32x4_t q_step0 = vrecpsq_f32( q_inv0, den );
|
||||
|
||||
const float32x4_t q_inv1 = vmulq_f32( q_step0, q_inv0 );
|
||||
return vmulq_f32( num, q_inv1 );
|
||||
}
|
||||
|
||||
|
||||
static inline float32x4_t vsqrtq_f32( float32x4_t q_x )
|
||||
{
|
||||
const float32x4_t q_step_0 = vrsqrteq_f32( q_x );
|
||||
// step
|
||||
const float32x4_t q_step_parm0 = vmulq_f32( q_x, q_step_0 );
|
||||
const float32x4_t q_step_result0 = vrsqrtsq_f32( q_step_parm0, q_step_0 );
|
||||
// step
|
||||
const float32x4_t q_step_1 = vmulq_f32( q_step_0, q_step_result0 );
|
||||
const float32x4_t q_step_parm1 = vmulq_f32( q_x, q_step_1 );
|
||||
const float32x4_t q_step_result1 = vrsqrtsq_f32( q_step_parm1, q_step_1 );
|
||||
// take the res
|
||||
const float32x4_t q_step_2 = vmulq_f32( q_step_1, q_step_result1 );
|
||||
// mul by x to get sqrt, not rsqrt
|
||||
return vmulq_f32( q_x, q_step_2 );
|
||||
}
|
||||
|
||||
#endif /* INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_ */
|
@ -57,6 +57,22 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_generic(lv_16sc_t* outVe
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_generic_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.123;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad));
|
||||
volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic_reload(outVector, inVector, phase_inc[0], phase, num_points);
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
@ -73,6 +89,22 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3(lv_16sc_t* outVec
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.123;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad));
|
||||
volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(outVector, inVector, phase_inc[0], phase, num_points);
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
@ -89,6 +121,22 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3(lv_16sc_t* outVec
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.123;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad));
|
||||
volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(outVector, inVector, phase_inc[0], phase, num_points);
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
@ -105,4 +153,20 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon(lv_16sc_t* outVecto
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.123;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad));
|
||||
volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(outVector, inVector, phase_inc[0], phase, num_points);
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_16ic_rotatorpuppet_16ic_H */
|
||||
|
@ -61,6 +61,7 @@
|
||||
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <math.h>
|
||||
//#include <stdio.h>
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
@ -71,6 +72,56 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* ou
|
||||
lv_16sc_t tmp16;
|
||||
lv_32fc_t tmp32;
|
||||
for(i = 0; i < (unsigned int)(num_points); ++i)
|
||||
{
|
||||
tmp16 = *inVector++;
|
||||
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
|
||||
*outVector++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
|
||||
(*phase) *= phase_inc;
|
||||
// Regenerate phase
|
||||
if (i % 512 == 0)
|
||||
{
|
||||
//printf("Phase before regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
|
||||
#endif
|
||||
//printf("Phase after regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_GENERIC */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
unsigned int ROTATOR_RELOAD = 512;
|
||||
unsigned int n = 0;
|
||||
unsigned int j = 0;
|
||||
lv_16sc_t tmp16;
|
||||
lv_32fc_t tmp32;
|
||||
for (; n < num_points / ROTATOR_RELOAD; n++)
|
||||
{
|
||||
for (j = 0; j < ROTATOR_RELOAD; j++)
|
||||
{
|
||||
tmp16 = *inVector++;
|
||||
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
|
||||
*outVector++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
|
||||
(*phase) *= phase_inc;
|
||||
}
|
||||
// Regenerate phase
|
||||
//printf("Phase before regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
|
||||
#endif
|
||||
//printf("Phase after regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
|
||||
}
|
||||
for (j = 0; j < num_points % ROTATOR_RELOAD; j++)
|
||||
{
|
||||
tmp16 = *inVector++;
|
||||
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
|
||||
@ -130,6 +181,168 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
|
||||
//next two samples
|
||||
_in += 2;
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
// store four output samples
|
||||
result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic
|
||||
_mm_store_si128((__m128i*)_out, result);
|
||||
|
||||
// Regenerate phase
|
||||
if ((number % 512) == 0)
|
||||
{
|
||||
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
|
||||
tmp2 = _mm_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
tmp2 = _mm_sqrt_ps(tmp1);
|
||||
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
|
||||
}
|
||||
|
||||
//next two samples
|
||||
_in += 2;
|
||||
_out += 4;
|
||||
}
|
||||
|
||||
_mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg);
|
||||
(*phase) = two_phase_acc[0];
|
||||
|
||||
for (unsigned int i = sse_iters * 4; i < num_points; ++i)
|
||||
{
|
||||
tmp16 = *_in++;
|
||||
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
|
||||
*_out++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
|
||||
(*phase) *= phase_inc;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
const unsigned int ROTATOR_RELOAD = 512;
|
||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i c1, c2, result;
|
||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
|
||||
const lv_16sc_t* _in = inVector;
|
||||
|
||||
lv_16sc_t* _out = outVector;
|
||||
|
||||
__m128 yl, yh, tmp1, tmp2, tmp3;
|
||||
lv_16sc_t tmp16;
|
||||
lv_32fc_t tmp32;
|
||||
|
||||
for (unsigned int n = 0; n < sse_iters / ROTATOR_RELOAD; n++)
|
||||
{
|
||||
for (unsigned int j = 0; j < ROTATOR_RELOAD; j++)
|
||||
{
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
//next two samples
|
||||
_in += 2;
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
// store four output samples
|
||||
result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic
|
||||
_mm_store_si128((__m128i*)_out, result);
|
||||
|
||||
//next two samples
|
||||
_in += 2;
|
||||
_out += 4;
|
||||
}
|
||||
// Regenerate phase
|
||||
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
|
||||
tmp2 = _mm_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
tmp2 = _mm_sqrt_ps(tmp1);
|
||||
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
|
||||
}
|
||||
|
||||
for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++)
|
||||
{
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
//next two samples
|
||||
_in += 2;
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
@ -171,6 +384,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
@ -241,6 +455,16 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
|
||||
result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic
|
||||
_mm_storeu_si128((__m128i*)_out, result);
|
||||
|
||||
// Regenerate phase
|
||||
if ((number % 512) == 0)
|
||||
{
|
||||
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
|
||||
tmp2 = _mm_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
tmp2 = _mm_sqrt_ps(tmp1);
|
||||
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
|
||||
}
|
||||
|
||||
//next two samples
|
||||
_in += 2;
|
||||
_out += 4;
|
||||
@ -261,6 +485,156 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
unsigned int ROTATOR_RELOAD = 512;
|
||||
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i c1, c2, result;
|
||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
|
||||
|
||||
const lv_16sc_t* _in = inVector;
|
||||
|
||||
lv_16sc_t* _out = outVector;
|
||||
|
||||
__m128 yl, yh, tmp1, tmp2, tmp3;
|
||||
lv_16sc_t tmp16;
|
||||
lv_32fc_t tmp32;
|
||||
|
||||
for (unsigned int n = 0; n < sse_iters / ROTATOR_RELOAD; n++)
|
||||
{
|
||||
for (unsigned int j = 0; j < ROTATOR_RELOAD; j++)
|
||||
{
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
//next two samples
|
||||
_in += 2;
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
// store four output samples
|
||||
result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic
|
||||
_mm_storeu_si128((__m128i*)_out, result);
|
||||
|
||||
//next two samples
|
||||
_in += 2;
|
||||
_out += 4;
|
||||
}
|
||||
// Regenerate phase
|
||||
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
|
||||
tmp2 = _mm_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
tmp2 = _mm_sqrt_ps(tmp1);
|
||||
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
|
||||
}
|
||||
|
||||
for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++)
|
||||
{
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
//next two samples
|
||||
_in += 2;
|
||||
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
// store four output samples
|
||||
result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic
|
||||
_mm_storeu_si128((__m128i*)_out, result);
|
||||
|
||||
//next two samples
|
||||
_in += 2;
|
||||
_out += 4;
|
||||
}
|
||||
|
||||
_mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg);
|
||||
(*phase) = two_phase_acc[0];
|
||||
|
||||
for (unsigned int i = sse_iters * 4; i < num_points; ++i)
|
||||
{
|
||||
tmp16 = *_in++;
|
||||
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
|
||||
*_out++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
|
||||
(*phase) *= phase_inc;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
@ -271,6 +645,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
|
||||
lv_16sc_t tmp16_;
|
||||
lv_32fc_t tmp32_;
|
||||
|
||||
float arg_phase0 = cargf(*phase);
|
||||
float arg_phase_inc = cargf(phase_inc);
|
||||
float phase_est = 0.0;
|
||||
|
||||
const lv_16sc_t* _in = inVector;
|
||||
lv_16sc_t* _out = outVector;
|
||||
|
||||
@ -351,6 +729,24 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
|
||||
/* store the four complex results */
|
||||
vst2_s16((int16_t*)_out, tmp16);
|
||||
_out += 4;
|
||||
// Regenerate phase
|
||||
if ((i % 512) == 0)
|
||||
{
|
||||
//printf("Computed phase: %f\n", cos(cargf(lv_cmake(_phase_real[0],_phase_imag[0]))));
|
||||
phase_est = arg_phase0 + (i + 1) * 4 * arg_phase_inc;
|
||||
//printf("Estimated phase: %f\n\n", cos(phase_est));
|
||||
|
||||
*phase = lv_cmake(cos(phase_est), sin(phase_est));
|
||||
phase2 = (lv_32fc_t)(*phase) * phase_inc;
|
||||
phase3 = phase2 * phase_inc;
|
||||
phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
|
||||
_phase_real = vld1q_f32(____phase_real);
|
||||
_phase_imag = vld1q_f32(____phase_imag);
|
||||
}
|
||||
}
|
||||
vst1q_f32((float32_t*)__phase_real, _phase_real);
|
||||
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
|
||||
@ -368,4 +764,190 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
|
||||
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
|
||||
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
const unsigned int neon_iters = num_points / 4;
|
||||
const unsigned int ROTATOR_RELOAD = 512;
|
||||
|
||||
lv_16sc_t tmp16_;
|
||||
lv_32fc_t tmp32_;
|
||||
|
||||
float arg_phase0 = cargf(*phase);
|
||||
float arg_phase_inc = cargf(phase_inc);
|
||||
float phase_est = 0.0;
|
||||
|
||||
const lv_16sc_t* _in = inVector;
|
||||
lv_16sc_t* _out = outVector;
|
||||
|
||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
||||
|
||||
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
|
||||
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
|
||||
|
||||
lv_32fc_t phase2 = (lv_32fc_t)(*phase) * phase_inc;
|
||||
lv_32fc_t phase3 = phase2 * phase_inc;
|
||||
lv_32fc_t phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
|
||||
float32x4_t _phase_real = vld1q_f32(__phase_real);
|
||||
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
|
||||
|
||||
float32x4_t half = vdupq_n_f32(0.5f);
|
||||
int16x4x2_t tmp16;
|
||||
int32x4x2_t tmp32i;
|
||||
float32x4x2_t tmp32f, tmp_real, tmp_imag;
|
||||
float32x4_t sign, PlusHalf, Round;
|
||||
|
||||
if (neon_iters > 0)
|
||||
{
|
||||
for (unsigned int n = 0; n < neon_iters / ROTATOR_RELOAD; n++)
|
||||
{
|
||||
for (unsigned int j = 0; j < ROTATOR_RELOAD; j++)
|
||||
{
|
||||
/* load 4 complex numbers (int 16 bits each component) */
|
||||
tmp16 = vld2_s16((int16_t*)_in);
|
||||
__builtin_prefetch(_in + 8);
|
||||
_in += 4;
|
||||
|
||||
/* promote them to int 32 bits */
|
||||
tmp32i.val[0] = vmovl_s16(tmp16.val[0]);
|
||||
tmp32i.val[1] = vmovl_s16(tmp16.val[1]);
|
||||
|
||||
/* promote them to float 32 bits */
|
||||
tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]);
|
||||
tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]);
|
||||
|
||||
/* complex multiplication of four complex samples (float 32 bits each component) */
|
||||
tmp_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real);
|
||||
tmp_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag);
|
||||
tmp_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag);
|
||||
tmp_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real);
|
||||
|
||||
tmp32f.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
|
||||
tmp32f.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
|
||||
|
||||
/* downcast results to int32 */
|
||||
/* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31)));
|
||||
PlusHalf = vaddq_f32(tmp32f.val[0], half);
|
||||
Round = vsubq_f32(PlusHalf, sign);
|
||||
tmp32i.val[0] = vcvtq_s32_f32(Round);
|
||||
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31)));
|
||||
PlusHalf = vaddq_f32(tmp32f.val[1], half);
|
||||
Round = vsubq_f32(PlusHalf, sign);
|
||||
tmp32i.val[1] = vcvtq_s32_f32(Round);
|
||||
|
||||
/* downcast results to int16 */
|
||||
tmp16.val[0] = vqmovn_s32(tmp32i.val[0]);
|
||||
tmp16.val[1] = vqmovn_s32(tmp32i.val[1]);
|
||||
|
||||
/* compute next four phases */
|
||||
tmp_real.val[0] = vmulq_f32(_phase_real, _phase4_real);
|
||||
tmp_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag);
|
||||
tmp_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag);
|
||||
tmp_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real);
|
||||
|
||||
_phase_real = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
|
||||
_phase_imag = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
|
||||
|
||||
/* store the four complex results */
|
||||
vst2_s16((int16_t*)_out, tmp16);
|
||||
_out += 4;
|
||||
}
|
||||
// Regenerate phase
|
||||
//printf("Computed phase: %f\n", cos(cargf(lv_cmake(_phase_real[0],_phase_imag[0]))));
|
||||
phase_est = arg_phase0 + (n + 1) * ROTATOR_RELOAD * 4 * arg_phase_inc;
|
||||
//printf("Estimated phase: %f\n\n", cos(phase_est));
|
||||
*phase = lv_cmake(cos(phase_est), sin(phase_est));
|
||||
phase2 = (lv_32fc_t)(*phase) * phase_inc;
|
||||
phase3 = phase2 * phase_inc;
|
||||
phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
|
||||
_phase_real = vld1q_f32(____phase_real);
|
||||
_phase_imag = vld1q_f32(____phase_imag);
|
||||
}
|
||||
|
||||
for (unsigned int j = 0; j < neon_iters % ROTATOR_RELOAD; j++)
|
||||
{
|
||||
/* load 4 complex numbers (int 16 bits each component) */
|
||||
tmp16 = vld2_s16((int16_t*)_in);
|
||||
__builtin_prefetch(_in + 8);
|
||||
_in += 4;
|
||||
|
||||
/* promote them to int 32 bits */
|
||||
tmp32i.val[0] = vmovl_s16(tmp16.val[0]);
|
||||
tmp32i.val[1] = vmovl_s16(tmp16.val[1]);
|
||||
|
||||
/* promote them to float 32 bits */
|
||||
tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]);
|
||||
tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]);
|
||||
|
||||
/* complex multiplication of four complex samples (float 32 bits each component) */
|
||||
tmp_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real);
|
||||
tmp_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag);
|
||||
tmp_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag);
|
||||
tmp_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real);
|
||||
|
||||
tmp32f.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
|
||||
tmp32f.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
|
||||
|
||||
/* downcast results to int32 */
|
||||
/* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31)));
|
||||
PlusHalf = vaddq_f32(tmp32f.val[0], half);
|
||||
Round = vsubq_f32(PlusHalf, sign);
|
||||
tmp32i.val[0] = vcvtq_s32_f32(Round);
|
||||
|
||||
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31)));
|
||||
PlusHalf = vaddq_f32(tmp32f.val[1], half);
|
||||
Round = vsubq_f32(PlusHalf, sign);
|
||||
tmp32i.val[1] = vcvtq_s32_f32(Round);
|
||||
|
||||
/* downcast results to int16 */
|
||||
tmp16.val[0] = vqmovn_s32(tmp32i.val[0]);
|
||||
tmp16.val[1] = vqmovn_s32(tmp32i.val[1]);
|
||||
|
||||
/* compute next four phases */
|
||||
tmp_real.val[0] = vmulq_f32(_phase_real, _phase4_real);
|
||||
tmp_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag);
|
||||
tmp_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag);
|
||||
tmp_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real);
|
||||
|
||||
_phase_real = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
|
||||
_phase_imag = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
|
||||
|
||||
/* store the four complex results */
|
||||
vst2_s16((int16_t*)_out, tmp16);
|
||||
_out += 4;
|
||||
}
|
||||
|
||||
vst1q_f32((float32_t*)__phase_real, _phase_real);
|
||||
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
|
||||
|
||||
(*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]);
|
||||
}
|
||||
for(i = 0; i < neon_iters % 4; ++i)
|
||||
{
|
||||
tmp16_ = *_in++;
|
||||
tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase);
|
||||
*_out++ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_)));
|
||||
(*phase) *= phase_inc;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
#endif /* INCLUDED_volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_H */
|
||||
|
@ -94,7 +94,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
|
||||
if (sse_iters > 0)
|
||||
{
|
||||
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
|
||||
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
|
||||
realcacc = _mm_setzero_si128();
|
||||
@ -105,8 +105,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
//std::complex<T> memory structure: real part -> reinterpret_cast<cv T*>(a)[2*i]
|
||||
//imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1]
|
||||
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
|
||||
a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
|
||||
__builtin_prefetch(_in_a + 8);
|
||||
@ -115,7 +113,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
|
||||
|
||||
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
|
||||
real = _mm_subs_epi16(c,c_sr);
|
||||
real = _mm_subs_epi16(c, c_sr);
|
||||
|
||||
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
|
||||
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
|
||||
@ -123,7 +121,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, ....
|
||||
imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, ....
|
||||
|
||||
imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic!
|
||||
imag = _mm_adds_epi16(imag1, imag2); //with saturation arithmetic!
|
||||
|
||||
realcacc = _mm_adds_epi16(realcacc, real);
|
||||
imagcacc = _mm_adds_epi16(imagcacc, imag);
|
||||
@ -135,9 +133,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
|
||||
realcacc = _mm_and_si128(realcacc, mask_real);
|
||||
imagcacc = _mm_and_si128(imagcacc, mask_imag);
|
||||
|
||||
result = _mm_or_si128(realcacc, imagcacc);
|
||||
a = _mm_or_si128(realcacc, imagcacc);
|
||||
|
||||
_mm_store_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector
|
||||
_mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
@ -202,7 +200,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
|
||||
imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, ....
|
||||
imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, ....
|
||||
|
||||
imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic!
|
||||
imag = _mm_adds_epi16(imag1, imag2); //with saturation arithmetic!
|
||||
|
||||
realcacc = _mm_adds_epi16(realcacc, real);
|
||||
imagcacc = _mm_adds_epi16(imagcacc, imag);
|
||||
@ -245,46 +243,57 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const
|
||||
|
||||
lv_16sc_t* a_ptr = (lv_16sc_t*) in_a;
|
||||
lv_16sc_t* b_ptr = (lv_16sc_t*) in_b;
|
||||
// for 2-lane vectors, 1st lane holds the real part,
|
||||
// 2nd lane holds the imaginary part
|
||||
int16x4x2_t a_val, b_val, c_val, accumulator;
|
||||
int16x4x2_t tmp_real, tmp_imag;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
|
||||
accumulator.val[0] = vdup_n_s16(0);
|
||||
accumulator.val[1] = vdup_n_s16(0);
|
||||
*out = lv_cmake((int16_t)0, (int16_t)0);
|
||||
|
||||
for(number = 0; number < quarter_points; ++number)
|
||||
if (quarter_points > 0)
|
||||
{
|
||||
a_val = vld2_s16((int16_t*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
|
||||
b_val = vld2_s16((int16_t*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
|
||||
__builtin_prefetch(a_ptr + 8);
|
||||
__builtin_prefetch(b_ptr + 8);
|
||||
// for 2-lane vectors, 1st lane holds the real part,
|
||||
// 2nd lane holds the imaginary part
|
||||
int16x4x2_t a_val, b_val, c_val, accumulator;
|
||||
int16x4x2_t tmp_real, tmp_imag;
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
|
||||
accumulator.val[0] = vdup_n_s16(0);
|
||||
accumulator.val[1] = vdup_n_s16(0);
|
||||
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
|
||||
|
||||
// multiply the real*real and imag*imag to get real result
|
||||
// a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r
|
||||
tmp_real.val[0] = vmul_s16(a_val.val[0], b_val.val[0]);
|
||||
// a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i
|
||||
tmp_real.val[1] = vmul_s16(a_val.val[1], b_val.val[1]);
|
||||
for(number = 0; number < quarter_points; ++number)
|
||||
{
|
||||
a_val = vld2_s16((int16_t*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
|
||||
b_val = vld2_s16((int16_t*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
|
||||
__builtin_prefetch(a_ptr + 8);
|
||||
__builtin_prefetch(b_ptr + 8);
|
||||
|
||||
// Multiply cross terms to get the imaginary result
|
||||
// a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i
|
||||
tmp_imag.val[0] = vmul_s16(a_val.val[0], b_val.val[1]);
|
||||
// a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r
|
||||
tmp_imag.val[1] = vmul_s16(a_val.val[1], b_val.val[0]);
|
||||
// multiply the real*real and imag*imag to get real result
|
||||
// a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r
|
||||
tmp_real.val[0] = vmul_s16(a_val.val[0], b_val.val[0]);
|
||||
// a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i
|
||||
tmp_real.val[1] = vmul_s16(a_val.val[1], b_val.val[1]);
|
||||
|
||||
c_val.val[0] = vsub_s16(tmp_real.val[0], tmp_real.val[1]);
|
||||
c_val.val[1] = vadd_s16(tmp_imag.val[0], tmp_imag.val[1]);
|
||||
// Multiply cross terms to get the imaginary result
|
||||
// a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i
|
||||
tmp_imag.val[0] = vmul_s16(a_val.val[0], b_val.val[1]);
|
||||
// a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r
|
||||
tmp_imag.val[1] = vmul_s16(a_val.val[1], b_val.val[0]);
|
||||
|
||||
accumulator.val[0] = vadd_s16(accumulator.val[0], c_val.val[0]);
|
||||
accumulator.val[1] = vadd_s16(accumulator.val[1], c_val.val[1]);
|
||||
c_val.val[0] = vqsub_s16(tmp_real.val[0], tmp_real.val[1]);
|
||||
c_val.val[1] = vqadd_s16(tmp_imag.val[0], tmp_imag.val[1]);
|
||||
|
||||
a_ptr += 4;
|
||||
b_ptr += 4;
|
||||
accumulator.val[0] = vqadd_s16(accumulator.val[0], c_val.val[0]);
|
||||
accumulator.val[1] = vqadd_s16(accumulator.val[1], c_val.val[1]);
|
||||
|
||||
a_ptr += 4;
|
||||
b_ptr += 4;
|
||||
}
|
||||
|
||||
vst2_s16((int16_t*)accum_result, accumulator);
|
||||
for (unsigned int i = 0; i < 4; ++i)
|
||||
{
|
||||
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(accum_result[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(accum_result[i])));
|
||||
}
|
||||
|
||||
*out = dotProduct;
|
||||
}
|
||||
|
||||
vst2_s16((int16_t*)accum_result, accumulator);
|
||||
*out = accum_result[0] + accum_result[1] + accum_result[2] + accum_result[3];
|
||||
|
||||
// tail case
|
||||
for(number = quarter_points * 4; number < num_points; ++number)
|
||||
{
|
||||
|
@ -84,6 +84,25 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic_sat(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
result[n_vec] = lv_cmake(0,0);
|
||||
for (unsigned int n = 0; n < num_points; n++)
|
||||
{
|
||||
lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(in_common[n]), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(in_common[n]), lv_cimag(in_a[n_vec][n]))),
|
||||
sat_adds16i(sat_muls16i(lv_creal(in_common[n]), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(in_common[n]), lv_creal(in_a[n_vec][n]))));
|
||||
result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
|
||||
@ -398,8 +417,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res
|
||||
tmp.val[0] = vmls_s16(tmp.val[0], a_val.val[1], b_val.val[1]);
|
||||
tmp.val[1] = vmla_s16(tmp.val[1], a_val.val[0], b_val.val[1]);
|
||||
|
||||
accumulator[n_vec].val[0] = vadd_s16(accumulator[n_vec].val[0], tmp.val[0]);
|
||||
accumulator[n_vec].val[1] = vadd_s16(accumulator[n_vec].val[1], tmp.val[1]);
|
||||
accumulator[n_vec].val[0] = vqadd_s16(accumulator[n_vec].val[0], tmp.val[0]);
|
||||
accumulator[n_vec].val[1] = vqadd_s16(accumulator[n_vec].val[1], tmp.val[1]);
|
||||
}
|
||||
_in_common += 4;
|
||||
}
|
||||
|
@ -63,6 +63,30 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic(lv_16sc_t*
|
||||
|
||||
#endif /* Generic */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic_sat(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
int num_a_vectors = 3;
|
||||
lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(unsigned int n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
|
||||
}
|
||||
|
||||
volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic_sat(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(unsigned int n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif /* Generic */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE2
|
||||
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
|
@ -43,6 +43,7 @@
|
||||
*
|
||||
* Rotates and multiplies the reference complex vector with an arbitrary number of other complex vectors,
|
||||
* accumulates the results and stores them in the output vector.
|
||||
* The rotation is done at a fixed rate per sample, from an initial \p phase offset.
|
||||
* This function can be used for Doppler wipe-off and multiple correlator.
|
||||
*
|
||||
* <b>Dispatcher Prototype</b>
|
||||
@ -71,7 +72,7 @@
|
||||
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
|
||||
#include <volk_gnsssdr/saturation_arithmetic.h>
|
||||
#include <math.h>
|
||||
//#include <stdio.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
@ -88,6 +89,19 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc
|
||||
tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
|
||||
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
|
||||
tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
|
||||
|
||||
// Regenerate phase
|
||||
if (n % 256 == 0)
|
||||
{
|
||||
//printf("Phase before regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
|
||||
#endif
|
||||
//printf("Phase after regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
|
||||
}
|
||||
|
||||
(*phase) *= phase_inc;
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
@ -101,6 +115,60 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic_reload(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t tmp16;
|
||||
lv_32fc_t tmp32;
|
||||
const unsigned int ROTATOR_RELOAD = 256;
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
result[n_vec] = lv_cmake(0,0);
|
||||
}
|
||||
|
||||
for (unsigned int n = 0; n < num_points / ROTATOR_RELOAD; n++)
|
||||
{
|
||||
for (unsigned int j = 0; j < ROTATOR_RELOAD; j++)
|
||||
{
|
||||
tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
|
||||
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
|
||||
tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
|
||||
(*phase) *= phase_inc;
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
lv_16sc_t tmp = tmp16 * in_a[n_vec][n * ROTATOR_RELOAD + j];
|
||||
//lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n]))));
|
||||
result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
/* Regenerate phase */
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
//(*phase) /= cabsf((*phase));
|
||||
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
|
||||
#endif
|
||||
}
|
||||
|
||||
for (unsigned int j = 0; j < num_points % ROTATOR_RELOAD; j++)
|
||||
{
|
||||
tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
|
||||
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
|
||||
tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
|
||||
(*phase) *= phase_inc;
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
lv_16sc_t tmp = tmp16 * in_a[n_vec][ (num_points / ROTATOR_RELOAD) * ROTATOR_RELOAD + j ];
|
||||
//lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n]))));
|
||||
result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*LV_HAVE_GENERIC*/
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
@ -169,6 +237,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
|
||||
//next two samples
|
||||
_in_common += 2;
|
||||
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in_common + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
@ -212,6 +281,15 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
|
||||
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
|
||||
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
|
||||
}
|
||||
// Regenerate phase
|
||||
if ((number % 128) == 0)
|
||||
{
|
||||
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
|
||||
tmp2 = _mm_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
tmp2 = _mm_sqrt_ps(tmp1);
|
||||
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
|
||||
}
|
||||
}
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
@ -233,6 +311,254 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
|
||||
free(realcacc);
|
||||
free(imagcacc);
|
||||
|
||||
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
|
||||
tmp2 = _mm_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
tmp2 = _mm_sqrt_ps(tmp1);
|
||||
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
|
||||
|
||||
_mm_store_ps((float*)two_phase_acc, two_phase_acc_reg);
|
||||
//(*phase) = lv_cmake((float*)two_phase_acc[0], (float*)two_phase_acc[1]);
|
||||
(*phase) = two_phase_acc[0];
|
||||
|
||||
for(unsigned int n = sse_iters * 4; n < num_points; n++)
|
||||
{
|
||||
tmp16 = in_common[n]; //printf("a_sse phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
|
||||
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
|
||||
tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
|
||||
(*phase) *= phase_inc;
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
lv_16sc_t tmp = tmp16 * in_a[n_vec][n];
|
||||
//lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n]))));
|
||||
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
|
||||
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
#include <pmmintrin.h>
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
|
||||
const unsigned int sse_iters = num_points / 4;
|
||||
const unsigned int ROTATOR_RELOAD = 128;
|
||||
|
||||
const lv_16sc_t** _in_a = in_a;
|
||||
const lv_16sc_t* _in_common = in_common;
|
||||
lv_16sc_t* _out = result;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
|
||||
|
||||
//todo dyn mem reg
|
||||
|
||||
__m128i* realcacc;
|
||||
__m128i* imagcacc;
|
||||
|
||||
realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
|
||||
imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
|
||||
|
||||
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl;
|
||||
|
||||
mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
|
||||
mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255);
|
||||
|
||||
// phase rotation registers
|
||||
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
|
||||
__m128i pc1, pc2;
|
||||
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
|
||||
two_phase_inc[0] = phase_inc * phase_inc;
|
||||
two_phase_inc[1] = phase_inc * phase_inc;
|
||||
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
|
||||
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
|
||||
two_phase_acc[0] = (*phase);
|
||||
two_phase_acc[1] = (*phase) * phase_inc;
|
||||
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
|
||||
__m128 yl, yh, tmp1, tmp2, tmp3;
|
||||
lv_16sc_t tmp16;
|
||||
lv_32fc_t tmp32;
|
||||
|
||||
for (unsigned int number = 0; number < sse_iters / ROTATOR_RELOAD; ++number)
|
||||
{
|
||||
for (unsigned int j = 0; j < ROTATOR_RELOAD; j++)
|
||||
{
|
||||
// Phase rotation on operand in_common starts here:
|
||||
//printf("generic phase %i: %f,%f\n", n*4,lv_creal(*phase),lv_cimag(*phase));
|
||||
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
pc1 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
//next two samples
|
||||
_in_common += 2;
|
||||
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in_common + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
pc2 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
// store four rotated in_common samples in the register b
|
||||
b = _mm_packs_epi32(pc1, pc2);// convert from 32ic to 16ic
|
||||
|
||||
//next two samples
|
||||
_in_common += 2;
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
a = _mm_load_si128((__m128i*)&(_in_a[n_vec][(number * ROTATOR_RELOAD + j) * 4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
|
||||
|
||||
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
|
||||
|
||||
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
|
||||
real = _mm_subs_epi16(c, c_sr);
|
||||
|
||||
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
|
||||
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
|
||||
|
||||
imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, ....
|
||||
imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, ....
|
||||
|
||||
imag = _mm_adds_epi16(imag1, imag2);
|
||||
|
||||
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
|
||||
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
|
||||
}
|
||||
}
|
||||
// regenerate phase
|
||||
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
|
||||
tmp2 = _mm_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
tmp2 = _mm_sqrt_ps(tmp1);
|
||||
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
|
||||
}
|
||||
|
||||
for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++)
|
||||
{
|
||||
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
pc1 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
//next two samples
|
||||
_in_common += 2;
|
||||
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in_common + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
pc2 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic
|
||||
|
||||
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
|
||||
// store four rotated in_common samples in the register b
|
||||
b = _mm_packs_epi32(pc1, pc2);// convert from 32ic to 16ic
|
||||
|
||||
//next two samples
|
||||
_in_common += 2;
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
a = _mm_load_si128((__m128i*)&(_in_a[n_vec][((sse_iters / ROTATOR_RELOAD) * ROTATOR_RELOAD + j) * 4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
|
||||
|
||||
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
|
||||
|
||||
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
|
||||
real = _mm_subs_epi16(c, c_sr);
|
||||
|
||||
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
|
||||
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
|
||||
|
||||
imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, ....
|
||||
imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, ....
|
||||
|
||||
imag = _mm_adds_epi16(imag1, imag2);
|
||||
|
||||
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
|
||||
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
|
||||
}
|
||||
}
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real);
|
||||
imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag);
|
||||
|
||||
a = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);
|
||||
|
||||
_mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
|
||||
dotProduct = lv_cmake(0,0);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])),
|
||||
sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
|
||||
}
|
||||
_out[n_vec] = dotProduct;
|
||||
}
|
||||
|
||||
free(realcacc);
|
||||
free(imagcacc);
|
||||
|
||||
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
|
||||
tmp2 = _mm_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
tmp2 = _mm_sqrt_ps(tmp1);
|
||||
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
|
||||
|
||||
_mm_store_ps((float*)two_phase_acc, two_phase_acc_reg);
|
||||
//(*phase) = lv_cmake((float*)two_phase_acc[0], (float*)two_phase_acc[1]);
|
||||
(*phase) = two_phase_acc[0];
|
||||
@ -303,7 +629,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
||||
for(unsigned int number = 0; number < sse_iters; number++)
|
||||
{
|
||||
// Phase rotation on operand in_common starts here:
|
||||
|
||||
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in_common + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
@ -326,6 +651,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
||||
//next two samples
|
||||
_in_common += 2;
|
||||
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
|
||||
__builtin_prefetch(_in_common + 8);
|
||||
//complex 32fc multiplication b=a*two_phase_acc_reg
|
||||
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
|
||||
@ -369,6 +695,15 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
||||
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
|
||||
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
|
||||
}
|
||||
// Regenerate phase
|
||||
if ((number % 256) == 0)
|
||||
{
|
||||
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
|
||||
tmp2 = _mm_hadd_ps(tmp1, tmp1);
|
||||
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
|
||||
tmp2 = _mm_sqrt_ps(tmp1);
|
||||
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
|
||||
}
|
||||
}
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
@ -393,7 +728,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
|
||||
_mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg);
|
||||
(*phase) = two_phase_acc[0];
|
||||
|
||||
|
||||
for(unsigned int n = sse_iters * 4; n < num_points; n++)
|
||||
{
|
||||
tmp16 = in_common[n];
|
||||
@ -428,6 +762,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
||||
if (neon_iters > 0)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
float arg_phase0 = cargf(*phase);
|
||||
float arg_phase_inc = cargf(phase_inc);
|
||||
float phase_est;
|
||||
|
||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||
@ -538,6 +875,22 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
||||
accumulator[n_vec].val[0] = vqadd_s16(accumulator[n_vec].val[0], c_val.val[0]);
|
||||
accumulator[n_vec].val[1] = vqadd_s16(accumulator[n_vec].val[1], c_val.val[1]);
|
||||
}
|
||||
// Regenerate phase
|
||||
if ((number % 256) == 0)
|
||||
{
|
||||
phase_est = arg_phase0 + (number + 1) * 4 * arg_phase_inc;
|
||||
|
||||
*phase = lv_cmake(cos(phase_est), sin(phase_est));
|
||||
phase2 = (lv_32fc_t)(*phase) * phase_inc;
|
||||
phase3 = phase2 * phase_inc;
|
||||
phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
|
||||
_phase_real = vld1q_f32(____phase_real);
|
||||
_phase_imag = vld1q_f32(____phase_imag);
|
||||
}
|
||||
}
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
@ -577,6 +930,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
|
||||
|
||||
#ifdef LV_HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
#include <volk_gnsssdr/volk_gnsssdr_neon_intrinsics.h>
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
@ -592,7 +946,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
|
||||
if (neon_iters > 0)
|
||||
{
|
||||
lv_16sc_t dotProduct = lv_cmake(0,0);
|
||||
|
||||
float arg_phase0 = cargf(*phase);
|
||||
float arg_phase_inc = cargf(phase_inc);
|
||||
float phase_est;
|
||||
//printf("arg phase0: %f", arg_phase0);
|
||||
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
|
||||
@ -677,6 +1034,37 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
|
||||
_phase_real = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]);
|
||||
_phase_imag = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]);
|
||||
|
||||
// Regenerate phase
|
||||
if ((number % 256) == 0)
|
||||
{
|
||||
//printf("computed phase: %f\n", cos(cargf(lv_cmake(_phase_real[0],_phase_imag[0]))));
|
||||
phase_est = arg_phase0 + (number + 1) * 4 * arg_phase_inc;
|
||||
//printf("Estimated phase: %f\n\n", cos(phase_est));
|
||||
|
||||
*phase = lv_cmake(cos(phase_est), sin(phase_est));
|
||||
phase2 = (lv_32fc_t)(*phase) * phase_inc;
|
||||
phase3 = phase2 * phase_inc;
|
||||
phase4 = phase3 * phase_inc;
|
||||
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
|
||||
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
|
||||
|
||||
_phase_real = vld1q_f32(____phase_real);
|
||||
_phase_imag = vld1q_f32(____phase_imag);
|
||||
|
||||
// Round = vmulq_f32(_phase_real, _phase_real);
|
||||
// Round = vmlaq_f32(Round, _phase_imag, _phase_imag);
|
||||
// Round = vsqrtq_f32(Round);//printf("sqrt: %f \n", Round[0]);
|
||||
//Round = vrsqrteq_f32(Round);printf("1/sqtr: %f \n",Round[0]);
|
||||
//Round = vrecpeq_f32((Round);
|
||||
// _phase_real = vdivq_f32(_phase_real, Round);
|
||||
// _phase_imag = vdivq_f32(_phase_imag, Round);
|
||||
//_phase_real = vmulq_f32(_phase_real, Round);
|
||||
//_phase_imag = vmulq_f32(_phase_imag, Round);
|
||||
//printf("After %i: %f,%f, %f\n\n", number, _phase_real[0], _phase_imag[0], sqrt(_phase_real[0]*_phase_real[0]+_phase_imag[0]*_phase_imag[0]));
|
||||
|
||||
}
|
||||
|
||||
vst1q_f32((float32_t*)__phase_real, _phase_real);
|
||||
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
|
||||
|
||||
@ -708,6 +1096,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
|
||||
_out[n_vec] = dotProduct;
|
||||
}
|
||||
free(accumulator);
|
||||
|
||||
vst1q_f32((float32_t*)__phase_real, _phase_real);
|
||||
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
|
||||
|
||||
@ -731,3 +1120,4 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
|
||||
#endif /* LV_HAVE_NEON */
|
||||
|
||||
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_H*/
|
||||
|
||||
|
@ -70,6 +70,36 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic(lv_
|
||||
#endif // Generic
|
||||
|
||||
|
||||
#ifdef LV_HAVE_GENERIC
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
|
||||
int num_a_vectors = 3;
|
||||
lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(unsigned int n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
|
||||
}
|
||||
volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic_reload(result, local_code, phase_inc[0], phase,(const lv_16sc_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(unsigned int n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // Generic
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
@ -101,6 +131,37 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_1
|
||||
#endif // SSE3
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
// phases must be normalized. Phase rotator expects a complex exponential input!
|
||||
float rem_carrier_phase_in_rad = 0.345;
|
||||
float phase_step_rad = 0.1;
|
||||
lv_32fc_t phase[1];
|
||||
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
|
||||
lv_32fc_t phase_inc[1];
|
||||
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
|
||||
|
||||
int num_a_vectors = 3;
|
||||
lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
|
||||
for(unsigned int n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
|
||||
memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
|
||||
}
|
||||
|
||||
volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points);
|
||||
|
||||
for(unsigned int n = 0; n < num_a_vectors; n++)
|
||||
{
|
||||
volk_gnsssdr_free(in_a[n]);
|
||||
}
|
||||
volk_gnsssdr_free(in_a);
|
||||
}
|
||||
|
||||
#endif // SSE3
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE3
|
||||
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
|
||||
{
|
||||
|
@ -58,6 +58,9 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t
|
||||
// some others need more iterations ***** ADDED BY GNSS-SDR
|
||||
volk_gnsssdr_test_params_t test_params_more_iters = volk_gnsssdr_test_params_t(test_params.tol(), test_params.scalar(),
|
||||
test_params.vlen(), 100000, test_params.benchmark_mode(), test_params.kernel_regex());
|
||||
// ... or more tolerance ***** ADDED BY GNSS-SDR
|
||||
volk_gnsssdr_test_params_t test_params_int16 = volk_gnsssdr_test_params_t(16, test_params.scalar(),
|
||||
test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex());
|
||||
|
||||
std::vector<volk_gnsssdr_test_case_t> test_cases = boost::assign::list_of
|
||||
|
||||
@ -77,11 +80,11 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t
|
||||
(VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_dot_prod_16ic, test_params))
|
||||
(VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_multiply_16ic, test_params_more_iters))
|
||||
(VOLK_INIT_TEST(volk_gnsssdr_16ic_convert_32fc, test_params_more_iters))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_rotatorpuppet_16ic, volk_gnsssdr_16ic_s32fc_x2_rotator_16ic, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_rotatorpuppet_16ic, volk_gnsssdr_16ic_s32fc_x2_rotator_16ic, test_params_int1))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerpuppet_16ic, volk_gnsssdr_16ic_resampler_16ic, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerxnpuppet_16ic, volk_gnsssdr_16ic_xn_resampler_16ic_xn, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_dot_prod_16ic_xn, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn, test_params))
|
||||
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn, test_params_int16))
|
||||
;
|
||||
|
||||
return test_cases;
|
||||
|
@ -76,10 +76,10 @@ void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n)
|
||||
else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
|
||||
break;
|
||||
case 2:
|
||||
// 16 bits dot product saturates very fast even with moderate length vectors
|
||||
// we produce here only 4 bits input range
|
||||
if(type.is_signed) ((int16_t *)data)[i] = (int16_t)((int16_t) scaled_rand % 16);
|
||||
else ((uint16_t *)data)[i] = (uint16_t) (int16_t)((int16_t) scaled_rand % 16);
|
||||
// 16 bit multiplication saturates very fast
|
||||
// we produce here only 3 bits input range
|
||||
if(type.is_signed) ((int16_t *)data)[i] = (int16_t)((int16_t) scaled_rand % 8);
|
||||
else ((uint16_t *)data)[i] = (uint16_t) (int16_t)((int16_t) scaled_rand % 8);
|
||||
break;
|
||||
case 1:
|
||||
if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
|
||||
|
@ -25,7 +25,6 @@ set(TRACKING_ADAPTER_SOURCES
|
||||
galileo_e1_dll_pll_veml_tracking.cc
|
||||
galileo_e1_tcp_connector_tracking.cc
|
||||
gps_l1_ca_dll_fll_pll_tracking.cc
|
||||
gps_l1_ca_dll_pll_optim_tracking.cc
|
||||
gps_l1_ca_dll_pll_tracking.cc
|
||||
gps_l1_ca_dll_pll_c_aid_tracking.cc
|
||||
gps_l1_ca_tcp_connector_tracking.cc
|
||||
|
@ -1,159 +0,0 @@
|
||||
/*!
|
||||
* \file gps_l1_ca_dll_pll_optim_tracking.cc
|
||||
* \brief Interface of an adapter of a speed optimized DLL+PLL tracking loop block
|
||||
* for GPS L1 C/A to a TrackingInterface
|
||||
* \author Javier Arribas, 2012. jarribas(at)cttc.es
|
||||
*
|
||||
* GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED:
|
||||
* - Code Doppler is not compensated in the local replica
|
||||
* Code DLL + carrier PLL according to the algorithms described in:
|
||||
* K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
|
||||
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
|
||||
* Approach, Birkha user, 2007
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "gps_l1_ca_dll_pll_optim_tracking.h"
|
||||
#include <cmath>
|
||||
#include <glog/logging.h>
|
||||
#include "GPS_L1_CA.h"
|
||||
#include "configuration_interface.h"
|
||||
|
||||
using google::LogMessage;
|
||||
|
||||
GpsL1CaDllPllOptimTracking::GpsL1CaDllPllOptimTracking(
|
||||
ConfigurationInterface* configuration, std::string role,
|
||||
unsigned int in_streams, unsigned int out_streams,
|
||||
boost::shared_ptr<gr::msg_queue> queue) :
|
||||
role_(role), in_streams_(in_streams), out_streams_(out_streams),
|
||||
queue_(queue)
|
||||
{
|
||||
DLOG(INFO) << "role " << role;
|
||||
//################# CONFIGURATION PARAMETERS ########################
|
||||
int fs_in;
|
||||
int vector_length;
|
||||
int f_if;
|
||||
bool dump;
|
||||
std::string dump_filename;
|
||||
std::string item_type;
|
||||
std::string default_item_type = "gr_complex";
|
||||
float pll_bw_hz;
|
||||
float dll_bw_hz;
|
||||
float early_late_space_chips;
|
||||
item_type = configuration->property(role + ".item_type",default_item_type);
|
||||
//vector_length = configuration->property(role + ".vector_length", 2048);
|
||||
fs_in = configuration->property("GNSS-SDR.internal_fs_hz", 2048000);
|
||||
f_if = configuration->property(role + ".if", 0);
|
||||
dump = configuration->property(role + ".dump", false);
|
||||
pll_bw_hz = configuration->property(role + ".pll_bw_hz", 50.0);
|
||||
dll_bw_hz = configuration->property(role + ".dll_bw_hz", 2.0);
|
||||
early_late_space_chips = configuration->property(role + ".early_late_space_chips", 0.5);
|
||||
std::string default_dump_filename = "./track_ch";
|
||||
dump_filename = configuration->property(role + ".dump_filename", default_dump_filename); //unused!
|
||||
vector_length = round(fs_in / (GPS_L1_CA_CODE_RATE_HZ / GPS_L1_CA_CODE_LENGTH_CHIPS));
|
||||
|
||||
//################# MAKE TRACKING GNURadio object ###################
|
||||
if (item_type.compare("gr_complex") == 0)
|
||||
{
|
||||
item_size_ = sizeof(gr_complex);
|
||||
tracking_ = gps_l1_ca_dll_pll_make_optim_tracking_cc(
|
||||
f_if,
|
||||
fs_in,
|
||||
vector_length,
|
||||
queue_,
|
||||
dump,
|
||||
dump_filename,
|
||||
pll_bw_hz,
|
||||
dll_bw_hz,
|
||||
early_late_space_chips);
|
||||
}
|
||||
else
|
||||
{
|
||||
item_size_ = sizeof(gr_complex);
|
||||
LOG(WARNING) << item_type << " unknown tracking item type.";
|
||||
}
|
||||
|
||||
channel_ = 0;
|
||||
channel_internal_queue_ = 0;
|
||||
DLOG(INFO) << "tracking(" << tracking_->unique_id() << ")";
|
||||
}
|
||||
|
||||
|
||||
GpsL1CaDllPllOptimTracking::~GpsL1CaDllPllOptimTracking()
|
||||
{}
|
||||
|
||||
|
||||
void GpsL1CaDllPllOptimTracking::start_tracking()
|
||||
{
|
||||
tracking_->start_tracking();
|
||||
}
|
||||
|
||||
/*
|
||||
* Set tracking channel unique ID
|
||||
*/
|
||||
void GpsL1CaDllPllOptimTracking::set_channel(unsigned int channel)
|
||||
{
|
||||
channel_ = channel;
|
||||
tracking_->set_channel(channel);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set tracking channel internal queue
|
||||
*/
|
||||
void GpsL1CaDllPllOptimTracking::set_channel_queue(
|
||||
concurrent_queue<int> *channel_internal_queue)
|
||||
{
|
||||
channel_internal_queue_ = channel_internal_queue;
|
||||
tracking_->set_channel_queue(channel_internal_queue_);
|
||||
}
|
||||
|
||||
void GpsL1CaDllPllOptimTracking::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro)
|
||||
{
|
||||
tracking_->set_gnss_synchro(p_gnss_synchro);
|
||||
}
|
||||
|
||||
void GpsL1CaDllPllOptimTracking::connect(gr::top_block_sptr top_block)
|
||||
{
|
||||
if(top_block) { /* top_block is not null */};
|
||||
//nothing to connect, now the tracking uses gr_sync_decimator
|
||||
}
|
||||
|
||||
void GpsL1CaDllPllOptimTracking::disconnect(gr::top_block_sptr top_block)
|
||||
{
|
||||
if(top_block) { /* top_block is not null */};
|
||||
//nothing to disconnect, now the tracking uses gr_sync_decimator
|
||||
}
|
||||
|
||||
gr::basic_block_sptr GpsL1CaDllPllOptimTracking::get_left_block()
|
||||
{
|
||||
return tracking_;
|
||||
}
|
||||
|
||||
gr::basic_block_sptr GpsL1CaDllPllOptimTracking::get_right_block()
|
||||
{
|
||||
return tracking_;
|
||||
}
|
||||
|
@ -1,119 +0,0 @@
|
||||
/*!
|
||||
* \file gps_l1_ca_dll_pll_optim_tracking.h
|
||||
* \brief Interface of an adapter of a speed optimized DLL+PLL tracking loop block
|
||||
* for GPS L1 C/A to a TrackingInterface
|
||||
* \author Javier Arribas, 2012. jarribas(at)cttc.es
|
||||
*
|
||||
* GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED:
|
||||
* - Code Doppler is not compensated in the local replica
|
||||
* Code DLL + carrier PLL according to the algorithms described in:
|
||||
* K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
|
||||
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
|
||||
* Approach, Birkha user, 2007
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_H_
|
||||
#define GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_H_
|
||||
|
||||
#include <string>
|
||||
#include <gnuradio/msg_queue.h>
|
||||
#include "tracking_interface.h"
|
||||
#include "gps_l1_ca_dll_pll_optim_tracking_cc.h"
|
||||
|
||||
|
||||
class ConfigurationInterface;
|
||||
|
||||
/*!
|
||||
* \brief This class implements a code DLL + carrier PLL tracking loop
|
||||
*/
|
||||
class GpsL1CaDllPllOptimTracking : public TrackingInterface
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
GpsL1CaDllPllOptimTracking(ConfigurationInterface* configuration,
|
||||
std::string role,
|
||||
unsigned int in_streams,
|
||||
unsigned int out_streams,
|
||||
boost::shared_ptr<gr::msg_queue> queue);
|
||||
|
||||
virtual ~GpsL1CaDllPllOptimTracking();
|
||||
|
||||
std::string role()
|
||||
{
|
||||
return role_;
|
||||
}
|
||||
|
||||
//! Returns "GPS_L1_CA_DLL_PLL_Optim_Tracking"
|
||||
std::string implementation()
|
||||
{
|
||||
return "GPS_L1_CA_DLL_PLL_Optim_Tracking";
|
||||
}
|
||||
size_t item_size()
|
||||
{
|
||||
return item_size_;
|
||||
}
|
||||
|
||||
void connect(gr::top_block_sptr top_block);
|
||||
void disconnect(gr::top_block_sptr top_block);
|
||||
gr::basic_block_sptr get_left_block();
|
||||
gr::basic_block_sptr get_right_block();
|
||||
|
||||
|
||||
/*!
|
||||
* \brief Set tracking channel unique ID
|
||||
*/
|
||||
void set_channel(unsigned int channel);
|
||||
|
||||
/*!
|
||||
* \brief Set acquisition/tracking common Gnss_Synchro object pointer
|
||||
* to efficiently exchange synchronization data between acquisition and tracking blocks
|
||||
*/
|
||||
void set_gnss_synchro(Gnss_Synchro* p_gnss_synchro);
|
||||
|
||||
/*!
|
||||
* \brief Set tracking channel internal queue
|
||||
*/
|
||||
void set_channel_queue(concurrent_queue<int> *channel_internal_queue);
|
||||
|
||||
void start_tracking();
|
||||
|
||||
private:
|
||||
|
||||
gps_l1_ca_dll_pll_optim_tracking_cc_sptr tracking_;
|
||||
size_t item_size_;
|
||||
|
||||
unsigned int channel_;
|
||||
|
||||
std::string role_;
|
||||
unsigned int in_streams_;
|
||||
unsigned int out_streams_;
|
||||
boost::shared_ptr<gr::msg_queue> queue_;
|
||||
concurrent_queue<int> *channel_internal_queue_;
|
||||
};
|
||||
|
||||
#endif // GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_H_
|
@ -26,7 +26,6 @@ set(TRACKING_GR_BLOCKS_SOURCES
|
||||
galileo_e1_dll_pll_veml_tracking_cc.cc
|
||||
galileo_e1_tcp_connector_tracking_cc.cc
|
||||
gps_l1_ca_dll_fll_pll_tracking_cc.cc
|
||||
gps_l1_ca_dll_pll_optim_tracking_cc.cc
|
||||
gps_l1_ca_dll_pll_tracking_cc.cc
|
||||
gps_l1_ca_tcp_connector_tracking_cc.cc
|
||||
galileo_e5a_dll_pll_tracking_cc.cc
|
||||
|
@ -1,654 +0,0 @@
|
||||
/*!
|
||||
* \file gps_l1_ca_dll_pll_optim_tracking_cc.cc
|
||||
* \brief Implementation of a code DLL + carrier PLL tracking block
|
||||
* \author Javier Arribas, 2012. jarribas(at)cttc.es
|
||||
* GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED:
|
||||
* - Code Doppler is not compensated in the local replica
|
||||
* Code DLL + carrier PLL according to the algorithms described in:
|
||||
* [1] K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
|
||||
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
|
||||
* Approach, Birkha user, 2007
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "gps_l1_ca_dll_pll_optim_tracking_cc.h"
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <gnuradio/io_signature.h>
|
||||
#include <glog/logging.h>
|
||||
#include <volk/volk.h>
|
||||
#include "gps_sdr_signal_processing.h"
|
||||
#include "tracking_discriminators.h"
|
||||
#include "lock_detectors.h"
|
||||
#include "GPS_L1_CA.h"
|
||||
#include "nco_lib.h"
|
||||
#include "control_message_factory.h"
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
* \todo Include in definition header file
|
||||
*/
|
||||
#define CN0_ESTIMATION_SAMPLES 20
|
||||
#define MINIMUM_VALID_CN0 25
|
||||
#define MAXIMUM_LOCK_FAIL_COUNTER 50
|
||||
#define CARRIER_LOCK_THRESHOLD 0.85
|
||||
|
||||
using google::LogMessage;
|
||||
|
||||
gps_l1_ca_dll_pll_optim_tracking_cc_sptr
|
||||
gps_l1_ca_dll_pll_make_optim_tracking_cc(
|
||||
long if_freq,
|
||||
long fs_in,
|
||||
unsigned int vector_length,
|
||||
boost::shared_ptr<gr::msg_queue> queue,
|
||||
bool dump,
|
||||
std::string dump_filename,
|
||||
float pll_bw_hz,
|
||||
float dll_bw_hz,
|
||||
float early_late_space_chips)
|
||||
{
|
||||
return gps_l1_ca_dll_pll_optim_tracking_cc_sptr(new Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(if_freq,
|
||||
fs_in, vector_length, queue, dump, dump_filename, pll_bw_hz, dll_bw_hz, early_late_space_chips));
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::forecast (int noutput_items,
|
||||
gr_vector_int &ninput_items_required)
|
||||
{
|
||||
if (noutput_items != 0)
|
||||
{
|
||||
ninput_items_required[0] = d_gnuradio_forecast_samples; //set the required available samples in each call
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(
|
||||
long if_freq,
|
||||
long fs_in,
|
||||
unsigned int vector_length,
|
||||
boost::shared_ptr<gr::msg_queue> queue,
|
||||
bool dump,
|
||||
std::string dump_filename,
|
||||
float pll_bw_hz,
|
||||
float dll_bw_hz,
|
||||
float early_late_space_chips) :
|
||||
gr::block("Gps_L1_Ca_Dll_Pll_Tracking_cc",
|
||||
gr::io_signature::make(1, 1, sizeof(gr_complex)),
|
||||
gr::io_signature::make(1, 1, sizeof(Gnss_Synchro)))
|
||||
{
|
||||
// initialize internal vars
|
||||
d_queue = queue;
|
||||
d_dump = dump;
|
||||
d_if_freq = if_freq;
|
||||
d_fs_in = fs_in;
|
||||
d_vector_length = vector_length;
|
||||
d_gnuradio_forecast_samples = static_cast<int>(d_vector_length) * 2;
|
||||
d_dump_filename = dump_filename;
|
||||
|
||||
// Initialize tracking ==========================================
|
||||
d_code_loop_filter.set_DLL_BW(dll_bw_hz);
|
||||
d_carrier_loop_filter.set_PLL_BW(pll_bw_hz);
|
||||
|
||||
//--- DLL variables --------------------------------------------------------
|
||||
d_early_late_spc_chips = early_late_space_chips; // Define early-late offset (in chips)
|
||||
|
||||
// Initialization of local code replica
|
||||
// Get space for a vector with the C/A code replica sampled 1x/chip
|
||||
d_ca_code = static_cast<gr_complex*>(volk_malloc((GPS_L1_CA_CODE_LENGTH_CHIPS + 2) * sizeof(gr_complex), volk_get_alignment()));
|
||||
|
||||
// Get space for the resampled early / prompt / late local replicas
|
||||
d_early_code = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment()));
|
||||
d_prompt_code = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment()));
|
||||
d_late_code = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment()));
|
||||
|
||||
// space for carrier wipeoff and signal baseband vectors
|
||||
d_carr_sign = static_cast<gr_complex*>(volk_malloc(2*d_vector_length * sizeof(gr_complex), volk_get_alignment()));
|
||||
|
||||
// correlator outputs (scalar)
|
||||
d_Early = static_cast<gr_complex*>(volk_malloc(sizeof(gr_complex), volk_get_alignment()));
|
||||
d_Prompt = static_cast<gr_complex*>(volk_malloc(sizeof(gr_complex), volk_get_alignment()));
|
||||
d_Late = static_cast<gr_complex*>(volk_malloc(sizeof(gr_complex), volk_get_alignment()));
|
||||
|
||||
//--- Perform initializations ------------------------------
|
||||
// define initial code frequency basis of NCO
|
||||
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ;
|
||||
// define residual code phase (in chips)
|
||||
d_rem_code_phase_samples = 0.0;
|
||||
// define residual carrier phase
|
||||
d_rem_carr_phase_rad = 0.0;
|
||||
|
||||
// sample synchronization
|
||||
d_sample_counter = 0;
|
||||
//d_sample_counter_seconds = 0;
|
||||
d_acq_sample_stamp = 0;
|
||||
|
||||
d_enable_tracking = false;
|
||||
d_pull_in = false;
|
||||
d_last_seg = 0;
|
||||
|
||||
d_current_prn_length_samples = static_cast<int>(d_vector_length);
|
||||
|
||||
// CN0 estimation and lock detector buffers
|
||||
d_cn0_estimation_counter = 0;
|
||||
d_Prompt_buffer = new gr_complex[CN0_ESTIMATION_SAMPLES];
|
||||
d_carrier_lock_test = 1;
|
||||
d_CN0_SNV_dB_Hz = 0;
|
||||
d_carrier_lock_fail_counter = 0;
|
||||
d_carrier_lock_threshold = CARRIER_LOCK_THRESHOLD;
|
||||
|
||||
systemName["G"] = std::string("GPS");
|
||||
|
||||
d_channel_internal_queue = 0;
|
||||
d_acquisition_gnss_synchro = 0;
|
||||
d_channel = 0;
|
||||
d_acq_code_phase_samples = 0.0;
|
||||
d_acq_carrier_doppler_hz = 0.0;
|
||||
d_carrier_doppler_hz = 0.0;
|
||||
d_acc_carrier_phase_rad = 0.0;
|
||||
d_code_phase_samples = 0.0;
|
||||
d_acc_code_phase_secs = 0.0;
|
||||
}
|
||||
|
||||
|
||||
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking()
|
||||
{
|
||||
// correct the code phase according to the delay between acq and trk
|
||||
d_acq_code_phase_samples = d_acquisition_gnss_synchro->Acq_delay_samples;
|
||||
d_acq_carrier_doppler_hz = d_acquisition_gnss_synchro->Acq_doppler_hz;
|
||||
d_acq_sample_stamp = d_acquisition_gnss_synchro->Acq_samplestamp_samples;
|
||||
|
||||
long int acq_trk_diff_samples;
|
||||
double acq_trk_diff_seconds;
|
||||
acq_trk_diff_samples = static_cast<long int>(d_sample_counter) - static_cast<long int>(d_acq_sample_stamp); //-d_vector_length;
|
||||
LOG(INFO) << "Number of samples between Acquisition and Tracking =" << acq_trk_diff_samples;
|
||||
acq_trk_diff_seconds = static_cast<float>(acq_trk_diff_samples) / static_cast<float>(d_fs_in);
|
||||
//doppler effect
|
||||
// Fd=(C/(C+Vr))*F
|
||||
double radial_velocity;
|
||||
radial_velocity = (GPS_L1_FREQ_HZ + d_acq_carrier_doppler_hz) / GPS_L1_FREQ_HZ;
|
||||
// new chip and prn sequence periods based on acq Doppler
|
||||
double T_chip_mod_seconds;
|
||||
double T_prn_mod_seconds;
|
||||
double T_prn_mod_samples;
|
||||
d_code_freq_chips = radial_velocity * GPS_L1_CA_CODE_RATE_HZ;
|
||||
T_chip_mod_seconds = 1/d_code_freq_chips;
|
||||
T_prn_mod_seconds = T_chip_mod_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
|
||||
T_prn_mod_samples = T_prn_mod_seconds * static_cast<float>(d_fs_in);
|
||||
d_current_prn_length_samples = round(T_prn_mod_samples);
|
||||
|
||||
double T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ;
|
||||
double T_prn_true_samples = T_prn_true_seconds * static_cast<float>(d_fs_in);
|
||||
double T_prn_diff_seconds = T_prn_true_seconds - T_prn_mod_seconds;
|
||||
double N_prn_diff = acq_trk_diff_seconds / T_prn_true_seconds;
|
||||
double corrected_acq_phase_samples, delay_correction_samples;
|
||||
corrected_acq_phase_samples = fmod((d_acq_code_phase_samples + T_prn_diff_seconds * N_prn_diff * static_cast<float>(d_fs_in)), T_prn_true_samples);
|
||||
if (corrected_acq_phase_samples < 0)
|
||||
{
|
||||
corrected_acq_phase_samples = T_prn_mod_samples + corrected_acq_phase_samples;
|
||||
}
|
||||
delay_correction_samples = d_acq_code_phase_samples - corrected_acq_phase_samples;
|
||||
d_acq_code_phase_samples = corrected_acq_phase_samples;
|
||||
d_carrier_doppler_hz = d_acq_carrier_doppler_hz;
|
||||
|
||||
// DLL/PLL filter initialization
|
||||
d_carrier_loop_filter.initialize(); //initialize the carrier filter
|
||||
d_code_loop_filter.initialize(); //initialize the code filter
|
||||
|
||||
// generate local reference ALWAYS starting at chip 1 (1 sample per chip)
|
||||
gps_l1_ca_code_gen_complex(&d_ca_code[1], d_acquisition_gnss_synchro->PRN, 0);
|
||||
d_ca_code[0] = d_ca_code[static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS)];
|
||||
d_ca_code[static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS) + 1] = d_ca_code[1];
|
||||
|
||||
//******************************************************************************
|
||||
// Experimental: pre-sampled local signal replica at nominal code frequency.
|
||||
// No code doppler correction
|
||||
double tcode_chips;
|
||||
int associated_chip_index;
|
||||
int code_length_chips = static_cast<float>(GPS_L1_CA_CODE_LENGTH_CHIPS);
|
||||
double code_phase_step_chips;
|
||||
int early_late_spc_samples;
|
||||
int epl_loop_length_samples;
|
||||
|
||||
// unified loop for E, P, L code vectors
|
||||
code_phase_step_chips = (static_cast<double>(d_code_freq_chips)) / (static_cast<double>(d_fs_in));
|
||||
tcode_chips = 0;
|
||||
|
||||
// Alternative EPL code generation (40% of speed improvement!)
|
||||
early_late_spc_samples = round(d_early_late_spc_chips / code_phase_step_chips);
|
||||
epl_loop_length_samples = d_current_prn_length_samples +early_late_spc_samples*2;
|
||||
for (int i = 0; i < epl_loop_length_samples; i++)
|
||||
{
|
||||
associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips));
|
||||
d_early_code[i] = d_ca_code[associated_chip_index];
|
||||
tcode_chips = tcode_chips + code_phase_step_chips;
|
||||
}
|
||||
|
||||
memcpy(d_prompt_code, &d_early_code[early_late_spc_samples], d_current_prn_length_samples* sizeof(gr_complex));
|
||||
memcpy(d_late_code, &d_early_code[early_late_spc_samples*2], d_current_prn_length_samples* sizeof(gr_complex));
|
||||
//******************************************************************************
|
||||
|
||||
d_carrier_lock_fail_counter = 0;
|
||||
d_rem_code_phase_samples = 0;
|
||||
d_rem_carr_phase_rad = 0;
|
||||
d_acc_carrier_phase_rad = 0;
|
||||
|
||||
d_code_phase_samples = d_acq_code_phase_samples;
|
||||
|
||||
std::string sys_ = &d_acquisition_gnss_synchro->System;
|
||||
sys = sys_.substr(0,1);
|
||||
|
||||
// DEBUG OUTPUT
|
||||
std::cout << "Tracking start on channel " << d_channel << " for satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) << std::endl;
|
||||
LOG(INFO) << "Starting tracking of satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) << " on channel " << d_channel;
|
||||
|
||||
// enable tracking
|
||||
d_pull_in = true;
|
||||
d_enable_tracking = true;
|
||||
|
||||
LOG(INFO) << "PULL-IN Doppler [Hz]=" << d_carrier_doppler_hz
|
||||
<< " Code Phase correction [samples]=" << delay_correction_samples
|
||||
<< " PULL-IN Code Phase [samples]=" << d_acq_code_phase_samples << std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_code()
|
||||
{
|
||||
double tcode_chips;
|
||||
double rem_code_phase_chips;
|
||||
int associated_chip_index;
|
||||
int code_length_chips = static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS);
|
||||
double code_phase_step_chips;
|
||||
int early_late_spc_samples;
|
||||
int epl_loop_length_samples;
|
||||
|
||||
// unified loop for E, P, L code vectors
|
||||
code_phase_step_chips = (static_cast<double>(d_code_freq_chips)) / (static_cast<double>(d_fs_in));
|
||||
rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
|
||||
tcode_chips = -rem_code_phase_chips;
|
||||
|
||||
//EPL code generation
|
||||
early_late_spc_samples = round(d_early_late_spc_chips / code_phase_step_chips);
|
||||
epl_loop_length_samples = d_current_prn_length_samples + early_late_spc_samples*2;
|
||||
for (int i = 0; i < epl_loop_length_samples; i++)
|
||||
{
|
||||
associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips));
|
||||
d_early_code[i] = d_ca_code[associated_chip_index];
|
||||
tcode_chips = tcode_chips + code_phase_step_chips;
|
||||
}
|
||||
|
||||
memcpy(d_prompt_code, &d_early_code[early_late_spc_samples], d_current_prn_length_samples* sizeof(gr_complex));
|
||||
memcpy(d_late_code, &d_early_code[early_late_spc_samples*2], d_current_prn_length_samples* sizeof(gr_complex));
|
||||
}
|
||||
|
||||
|
||||
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_carrier()
|
||||
{
|
||||
float phase_step_rad;
|
||||
phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);
|
||||
fxp_nco(d_carr_sign, d_current_prn_length_samples, d_rem_carr_phase_rad, phase_step_rad);
|
||||
}
|
||||
|
||||
|
||||
Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::~Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc()
|
||||
{
|
||||
d_dump_file.close();
|
||||
|
||||
volk_free(d_prompt_code);
|
||||
volk_free(d_late_code);
|
||||
volk_free(d_early_code);
|
||||
volk_free(d_carr_sign);
|
||||
volk_free(d_Early);
|
||||
volk_free(d_Prompt);
|
||||
volk_free(d_Late);
|
||||
|
||||
delete[] d_ca_code;
|
||||
delete[] d_Prompt_buffer;
|
||||
}
|
||||
|
||||
|
||||
// Tracking signal processing
|
||||
int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vector_int &ninput_items,
|
||||
gr_vector_const_void_star &input_items, gr_vector_void_star &output_items)
|
||||
{
|
||||
// stream to collect cout calls to improve thread safety
|
||||
std::stringstream tmp_str_stream;
|
||||
double carr_error_hz = 0.0;
|
||||
double carr_error_filt_hz = 0.0;
|
||||
double code_error_chips = 0.0;
|
||||
double code_error_filt_chips = 0.0;
|
||||
|
||||
if (d_enable_tracking == true)
|
||||
{
|
||||
/*
|
||||
* Receiver signal alignment
|
||||
*/
|
||||
if (d_pull_in == true)
|
||||
{
|
||||
int samples_offset;
|
||||
float acq_trk_shif_correction_samples;
|
||||
int acq_to_trk_delay_samples;
|
||||
acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp;
|
||||
acq_trk_shif_correction_samples = d_current_prn_length_samples - fmod(static_cast<float>(acq_to_trk_delay_samples), static_cast<float>(d_current_prn_length_samples));
|
||||
samples_offset = round(d_acq_code_phase_samples + acq_trk_shif_correction_samples);
|
||||
d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples
|
||||
d_pull_in = false;
|
||||
consume_each(samples_offset); //shift input to perform alignment with local replica
|
||||
return 1;
|
||||
}
|
||||
// GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
|
||||
Gnss_Synchro current_synchro_data;
|
||||
// Fill the acquisition data
|
||||
current_synchro_data = *d_acquisition_gnss_synchro;
|
||||
|
||||
// Block input data and block output stream pointers
|
||||
const gr_complex* in = (gr_complex*) input_items[0]; //PRN start block alignment
|
||||
Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0];
|
||||
|
||||
// Generate local code and carrier replicas (using \hat{f}_d(k-1))
|
||||
//update_local_code(); //disabled in the speed optimized tracking!
|
||||
update_local_carrier();
|
||||
|
||||
// perform Early, Prompt and Late correlation
|
||||
#if USING_VOLK_CW_EPL_CORR_CUSTOM
|
||||
d_correlator.Carrier_wipeoff_and_EPL_volk_custom(d_current_prn_length_samples,
|
||||
in,
|
||||
d_carr_sign,
|
||||
d_early_code,
|
||||
d_prompt_code,
|
||||
d_late_code,
|
||||
d_Early,
|
||||
d_Prompt,
|
||||
d_Late);
|
||||
#else
|
||||
d_correlator.Carrier_wipeoff_and_EPL_volk(d_current_prn_length_samples,
|
||||
in,
|
||||
d_carr_sign,
|
||||
d_early_code,
|
||||
d_prompt_code,
|
||||
d_late_code,
|
||||
d_Early,
|
||||
d_Prompt,
|
||||
d_Late);
|
||||
#endif
|
||||
// ################## PLL ##########################################################
|
||||
// PLL discriminator
|
||||
carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / GPS_TWO_PI;
|
||||
// Carrier discriminator filter
|
||||
carr_error_filt_hz = d_carrier_loop_filter.get_carrier_nco(carr_error_hz);
|
||||
// New carrier Doppler frequency estimation
|
||||
d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_error_filt_hz;
|
||||
// New code Doppler frequency estimation
|
||||
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ);
|
||||
//carrier phase accumulator for (K) doppler estimation
|
||||
d_acc_carrier_phase_rad -= GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD;
|
||||
//remnant carrier phase to prevent overflow in the code NCO
|
||||
d_rem_carr_phase_rad = d_rem_carr_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD;
|
||||
d_rem_carr_phase_rad = fmod(d_rem_carr_phase_rad, GPS_TWO_PI);
|
||||
|
||||
// ################## DLL ##########################################################
|
||||
// DLL discriminator
|
||||
code_error_chips = dll_nc_e_minus_l_normalized(*d_Early, *d_Late); //[chips/Ti]
|
||||
// Code discriminator filter
|
||||
code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips); //[chips/second]
|
||||
//Code phase accumulator
|
||||
double code_error_filt_secs;
|
||||
code_error_filt_secs = (GPS_L1_CA_CODE_PERIOD * code_error_filt_chips) / GPS_L1_CA_CODE_RATE_HZ; //[seconds]
|
||||
d_acc_code_phase_secs = d_acc_code_phase_secs + code_error_filt_secs;
|
||||
|
||||
// ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT #######################
|
||||
// keep alignment parameters for the next input buffer
|
||||
double T_chip_seconds;
|
||||
double T_prn_seconds;
|
||||
double T_prn_samples;
|
||||
double K_blk_samples;
|
||||
// Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation
|
||||
T_chip_seconds = 1.0 / d_code_freq_chips;
|
||||
T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
|
||||
T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
|
||||
K_blk_samples = T_prn_samples + d_rem_code_phase_samples + code_error_filt_secs * static_cast<double>(d_fs_in);
|
||||
d_current_prn_length_samples = round(K_blk_samples); //round to a discrete samples
|
||||
//d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
|
||||
|
||||
// ####### CN0 ESTIMATION AND LOCK DETECTORS ######
|
||||
if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
|
||||
{
|
||||
// fill buffer with prompt correlator output values
|
||||
d_Prompt_buffer[d_cn0_estimation_counter] = *d_Prompt;
|
||||
d_cn0_estimation_counter++;
|
||||
}
|
||||
else
|
||||
{
|
||||
d_cn0_estimation_counter = 0;
|
||||
// Code lock indicator
|
||||
d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
|
||||
// Carrier lock indicator
|
||||
d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
|
||||
// Loss of lock detection
|
||||
if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
|
||||
{
|
||||
d_carrier_lock_fail_counter++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
|
||||
}
|
||||
if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
|
||||
{
|
||||
std::cout << "Loss of lock in channel " << d_channel << "!" << std::endl;
|
||||
LOG(INFO) << "Loss of lock in channel " << d_channel << "!";
|
||||
std::unique_ptr<ControlMessageFactory> cmf(new ControlMessageFactory());
|
||||
if (d_queue != gr::msg_queue::sptr())
|
||||
{
|
||||
d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
|
||||
}
|
||||
d_carrier_lock_fail_counter = 0;
|
||||
d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
|
||||
}
|
||||
}
|
||||
// ########### Output the tracking data to navigation and PVT ##########
|
||||
current_synchro_data.Prompt_I = static_cast<double>((*d_Prompt).real());
|
||||
current_synchro_data.Prompt_Q = static_cast<double>((*d_Prompt).imag());
|
||||
// Tracking_timestamp_secs is aligned with the PRN start sample
|
||||
//current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter + (double)d_current_prn_length_samples + (double)d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
|
||||
current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + static_cast<double>(d_rem_code_phase_samples)) / static_cast<double>(d_fs_in);
|
||||
d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
|
||||
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
|
||||
current_synchro_data.Code_phase_secs = 0;
|
||||
current_synchro_data.Carrier_phase_rads = static_cast<double>(d_acc_carrier_phase_rad);
|
||||
current_synchro_data.Carrier_Doppler_hz = static_cast<double>(d_carrier_doppler_hz);
|
||||
current_synchro_data.CN0_dB_hz = static_cast<double>(d_CN0_SNV_dB_Hz);
|
||||
current_synchro_data.Flag_valid_pseudorange = false;
|
||||
*out[0] = current_synchro_data;
|
||||
|
||||
// ########## DEBUG OUTPUT
|
||||
/*!
|
||||
* \todo The stop timer has to be moved to the signal source!
|
||||
*/
|
||||
if (floor(d_sample_counter / d_fs_in) != d_last_seg)
|
||||
{
|
||||
d_last_seg = floor(d_sample_counter / d_fs_in);
|
||||
|
||||
if (d_channel == 0)
|
||||
{
|
||||
// debug: Second counter in channel 0
|
||||
tmp_str_stream << "Current input signal time = " << d_last_seg << " [s]" << std::endl;
|
||||
std::cout << tmp_str_stream.rdbuf();
|
||||
LOG(INFO) << tmp_str_stream.rdbuf();
|
||||
}
|
||||
|
||||
tmp_str_stream << "Tracking CH " << d_channel << ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN)
|
||||
<< ", Doppler=" << d_carrier_doppler_hz << " [Hz] CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]" << std::endl;
|
||||
//std::cout << tmp_str_stream.rdbuf() << std::flush;
|
||||
LOG(INFO) << tmp_str_stream.rdbuf();
|
||||
//if (d_channel == 0 || d_last_seg==5) d_carrier_lock_fail_counter=500; //DEBUG: force unlock!
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// ########## DEBUG OUTPUT (TIME ONLY for channel 0 when tracking is disabled)
|
||||
/*!
|
||||
* \todo The stop timer has to be moved to the signal source!
|
||||
*/
|
||||
// stream to collect cout calls to improve thread safety
|
||||
std::stringstream tmp_str_stream;
|
||||
if (floor(d_sample_counter / d_fs_in) != d_last_seg)
|
||||
{
|
||||
d_last_seg = floor(d_sample_counter / d_fs_in);
|
||||
|
||||
if (d_channel == 0)
|
||||
{
|
||||
// debug: Second counter in channel 0
|
||||
tmp_str_stream << "Current input signal time = " << d_last_seg << " [s]" << std::endl << std::flush;
|
||||
std::cout << tmp_str_stream.rdbuf() << std::flush;
|
||||
}
|
||||
}
|
||||
*d_Early = gr_complex(0,0);
|
||||
*d_Prompt = gr_complex(0,0);
|
||||
*d_Late = gr_complex(0,0);
|
||||
Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0]; //block output streams pointer
|
||||
// GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
|
||||
d_acquisition_gnss_synchro->Flag_valid_pseudorange = false;
|
||||
*out[0] = *d_acquisition_gnss_synchro;
|
||||
}
|
||||
|
||||
if(d_dump)
|
||||
{
|
||||
// MULTIPLEXED FILE RECORDING - Record results to file
|
||||
float prompt_I;
|
||||
float prompt_Q;
|
||||
float tmp_E, tmp_P, tmp_L;
|
||||
float tmp_float;
|
||||
double tmp_double;
|
||||
prompt_I = (*d_Prompt).real();
|
||||
prompt_Q = (*d_Prompt).imag();
|
||||
tmp_E = std::abs<float>(*d_Early);
|
||||
tmp_P = std::abs<float>(*d_Prompt);
|
||||
tmp_L = std::abs<float>(*d_Late);
|
||||
try
|
||||
{
|
||||
// EPR
|
||||
d_dump_file.write((char*)&tmp_E, sizeof(float));
|
||||
d_dump_file.write((char*)&tmp_P, sizeof(float));
|
||||
d_dump_file.write((char*)&tmp_L, sizeof(float));
|
||||
// PROMPT I and Q (to analyze navigation symbols)
|
||||
d_dump_file.write((char*)&prompt_I, sizeof(float));
|
||||
d_dump_file.write((char*)&prompt_Q, sizeof(float));
|
||||
// PRN start sample stamp
|
||||
//tmp_float=(float)d_sample_counter;
|
||||
d_dump_file.write((char*)&d_sample_counter, sizeof(unsigned long int));
|
||||
// accumulated carrier phase
|
||||
tmp_float = d_acc_carrier_phase_rad;
|
||||
d_dump_file.write((char*)&tmp_float, sizeof(float));
|
||||
|
||||
// carrier and code frequency
|
||||
tmp_float = d_carrier_doppler_hz;
|
||||
d_dump_file.write((char*)&tmp_float, sizeof(float));
|
||||
tmp_float = d_code_freq_chips;
|
||||
d_dump_file.write((char*)&tmp_float, sizeof(float));
|
||||
|
||||
//PLL commands
|
||||
tmp_float = carr_error_hz;
|
||||
d_dump_file.write((char*)&tmp_float, sizeof(float));
|
||||
tmp_float = carr_error_filt_hz;
|
||||
d_dump_file.write((char*)&tmp_float, sizeof(float));
|
||||
|
||||
//DLL commands
|
||||
tmp_float = code_error_chips;
|
||||
d_dump_file.write((char*)&tmp_float, sizeof(float));
|
||||
tmp_float = code_error_filt_chips;
|
||||
d_dump_file.write((char*)&tmp_float, sizeof(float));
|
||||
|
||||
// CN0 and carrier lock test
|
||||
tmp_float = d_CN0_SNV_dB_Hz;
|
||||
d_dump_file.write((char*)&tmp_float, sizeof(float));
|
||||
tmp_float = d_carrier_lock_test;
|
||||
d_dump_file.write((char*)&tmp_float, sizeof(float));
|
||||
|
||||
// AUX vars (for debug purposes)
|
||||
tmp_float = d_rem_code_phase_samples;
|
||||
d_dump_file.write((char*)&tmp_float, sizeof(float));
|
||||
tmp_double = (double)(d_sample_counter + d_current_prn_length_samples);
|
||||
d_dump_file.write((char*)&tmp_double, sizeof(double));
|
||||
}
|
||||
catch (std::ifstream::failure& e)
|
||||
{
|
||||
LOG(WARNING) << "Exception writing trk dump file " << e.what();
|
||||
}
|
||||
}
|
||||
|
||||
consume_each(d_current_prn_length_samples); // this is necesary in gr_block derivates
|
||||
d_sample_counter += d_current_prn_length_samples; //count for the processed samples
|
||||
if((noutput_items == 0) || (ninput_items[0] == 0))
|
||||
{
|
||||
LOG(WARNING) << "noutput_items = 0";
|
||||
}
|
||||
return 1; //output tracking result ALWAYS even in the case of d_enable_tracking==false
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel(unsigned int channel)
|
||||
{
|
||||
d_channel = channel;
|
||||
LOG(INFO) << "Tracking Channel set to " << d_channel;
|
||||
// ############# ENABLE DATA FILE LOG #################
|
||||
if (d_dump == true)
|
||||
{
|
||||
if (d_dump_file.is_open() == false)
|
||||
{
|
||||
try
|
||||
{
|
||||
d_dump_filename.append(boost::lexical_cast<std::string>(d_channel));
|
||||
d_dump_filename.append(".dat");
|
||||
d_dump_file.exceptions (std::ifstream::failbit | std::ifstream::badbit);
|
||||
d_dump_file.open(d_dump_filename.c_str(), std::ios::out | std::ios::binary);
|
||||
LOG(INFO) << "Tracking dump enabled on channel " << d_channel << " Log file: " << d_dump_filename.c_str();
|
||||
}
|
||||
catch (std::ifstream::failure& e)
|
||||
{
|
||||
LOG(WARNING) << "channel " << d_channel << " Exception opening trk dump file " << e.what();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel_queue(concurrent_queue<int> *channel_internal_queue)
|
||||
{
|
||||
d_channel_internal_queue = channel_internal_queue;
|
||||
}
|
||||
|
||||
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro)
|
||||
{
|
||||
d_acquisition_gnss_synchro = p_gnss_synchro;
|
||||
|
||||
}
|
@ -1,181 +0,0 @@
|
||||
/*!
|
||||
* \file gps_l1_ca_dll_pll_optim_tracking_cc.h
|
||||
* \brief Implementation of a code DLL + carrier PLL tracking block
|
||||
* \author Javier Arribas, 2012. jarribas(at)cttc.es
|
||||
* GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED:
|
||||
* - Code Doppler is not compensated in the local replica
|
||||
* Code DLL + carrier PLL according to the algorithms described in:
|
||||
* [1] K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
|
||||
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
|
||||
* Approach, Birkha user, 2007
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*
|
||||
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
|
||||
*
|
||||
* GNSS-SDR is a software defined Global Navigation
|
||||
* Satellite Systems receiver
|
||||
*
|
||||
* This file is part of GNSS-SDR.
|
||||
*
|
||||
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* -------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H
|
||||
#define GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H
|
||||
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <gnuradio/block.h>
|
||||
#include <gnuradio/msg_queue.h>
|
||||
#include "concurrent_queue.h"
|
||||
#include "gnss_synchro.h"
|
||||
#include "tracking_2nd_DLL_filter.h"
|
||||
#include "tracking_2nd_PLL_filter.h"
|
||||
#include "correlator.h"
|
||||
|
||||
class Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc;
|
||||
|
||||
typedef boost::shared_ptr<Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc> gps_l1_ca_dll_pll_optim_tracking_cc_sptr;
|
||||
|
||||
gps_l1_ca_dll_pll_optim_tracking_cc_sptr gps_l1_ca_dll_pll_make_optim_tracking_cc(long if_freq,
|
||||
long fs_in,
|
||||
unsigned int vector_length,
|
||||
boost::shared_ptr<gr::msg_queue> queue,
|
||||
bool dump,
|
||||
std::string dump_filename,
|
||||
float pll_bw_hz,
|
||||
float dll_bw_hz,
|
||||
float early_late_space_chips);
|
||||
|
||||
|
||||
/*!
|
||||
* \brief This class implements a DLL + PLL tracking loop block
|
||||
*/
|
||||
class Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc: public gr::block
|
||||
{
|
||||
public:
|
||||
~Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc();
|
||||
|
||||
void set_channel(unsigned int channel);
|
||||
void set_gnss_synchro(Gnss_Synchro* p_gnss_synchro);
|
||||
void start_tracking();
|
||||
void set_channel_queue(concurrent_queue<int> *channel_internal_queue);
|
||||
|
||||
int general_work (int noutput_items, gr_vector_int &ninput_items,
|
||||
gr_vector_const_void_star &input_items, gr_vector_void_star &output_items);
|
||||
|
||||
void forecast (int noutput_items, gr_vector_int &ninput_items_required);
|
||||
private:
|
||||
friend gps_l1_ca_dll_pll_optim_tracking_cc_sptr
|
||||
gps_l1_ca_dll_pll_make_optim_tracking_cc(long if_freq,
|
||||
long fs_in, unsigned
|
||||
int vector_length,
|
||||
boost::shared_ptr<gr::msg_queue> queue,
|
||||
bool dump,
|
||||
std::string dump_filename,
|
||||
float pll_bw_hz,
|
||||
float dll_bw_hz,
|
||||
float early_late_space_chips);
|
||||
|
||||
Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(long if_freq,
|
||||
long fs_in, unsigned
|
||||
int vector_length,
|
||||
boost::shared_ptr<gr::msg_queue> queue,
|
||||
bool dump,
|
||||
std::string dump_filename,
|
||||
float pll_bw_hz,
|
||||
float dll_bw_hz,
|
||||
float early_late_space_chips);
|
||||
void update_local_code();
|
||||
void update_local_carrier();
|
||||
|
||||
// tracking configuration vars
|
||||
boost::shared_ptr<gr::msg_queue> d_queue;
|
||||
concurrent_queue<int> *d_channel_internal_queue;
|
||||
unsigned int d_vector_length;
|
||||
bool d_dump;
|
||||
|
||||
Gnss_Synchro* d_acquisition_gnss_synchro;
|
||||
unsigned int d_channel;
|
||||
int d_last_seg;
|
||||
long d_if_freq;
|
||||
long d_fs_in;
|
||||
|
||||
double d_early_late_spc_chips;
|
||||
|
||||
gr_complex* d_ca_code;
|
||||
|
||||
gr_complex* d_early_code;
|
||||
gr_complex* d_late_code;
|
||||
gr_complex* d_prompt_code;
|
||||
gr_complex* d_carr_sign;
|
||||
|
||||
gr_complex *d_Early;
|
||||
gr_complex *d_Prompt;
|
||||
gr_complex *d_Late;
|
||||
|
||||
// remaining code phase and carrier phase between tracking loops
|
||||
double d_rem_code_phase_samples;
|
||||
double d_rem_carr_phase_rad;
|
||||
|
||||
// PLL and DLL filter library
|
||||
Tracking_2nd_DLL_filter d_code_loop_filter;
|
||||
Tracking_2nd_PLL_filter d_carrier_loop_filter;
|
||||
|
||||
// acquisition
|
||||
double d_acq_code_phase_samples;
|
||||
double d_acq_carrier_doppler_hz;
|
||||
// correlator
|
||||
Correlator d_correlator;
|
||||
|
||||
// tracking vars
|
||||
double d_code_freq_chips;
|
||||
double d_carrier_doppler_hz;
|
||||
double d_acc_carrier_phase_rad;
|
||||
double d_code_phase_samples;
|
||||
double d_acc_code_phase_secs;
|
||||
|
||||
//PRN period in samples
|
||||
int d_current_prn_length_samples;
|
||||
|
||||
//processing samples counters
|
||||
unsigned long int d_sample_counter;
|
||||
unsigned long int d_acq_sample_stamp;
|
||||
|
||||
// CN0 estimation and lock detector
|
||||
int d_cn0_estimation_counter;
|
||||
gr_complex* d_Prompt_buffer;
|
||||
double d_carrier_lock_test;
|
||||
double d_CN0_SNV_dB_Hz;
|
||||
double d_carrier_lock_threshold;
|
||||
int d_carrier_lock_fail_counter;
|
||||
|
||||
// control vars
|
||||
int d_gnuradio_forecast_samples;
|
||||
bool d_enable_tracking;
|
||||
bool d_pull_in;
|
||||
|
||||
// file dump
|
||||
std::string d_dump_filename;
|
||||
std::ofstream d_dump_file;
|
||||
|
||||
std::map<std::string, std::string> systemName;
|
||||
std::string sys;
|
||||
};
|
||||
|
||||
#endif //GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H
|
@ -80,7 +80,6 @@
|
||||
#include "galileo_e5a_noncoherent_iq_acquisition_caf.h"
|
||||
#include "gps_l1_ca_dll_pll_tracking.h"
|
||||
#include "gps_l1_ca_dll_pll_c_aid_tracking.h"
|
||||
#include "gps_l1_ca_dll_pll_optim_tracking.h"
|
||||
#include "gps_l1_ca_dll_fll_pll_tracking.h"
|
||||
#include "gps_l1_ca_tcp_connector_tracking.h"
|
||||
#include "galileo_e1_dll_pll_veml_tracking.h"
|
||||
@ -1324,12 +1323,6 @@ std::unique_ptr<GNSSBlockInterface> GNSSBlockFactory::GetBlock(
|
||||
out_streams, queue));
|
||||
block = std::move(block_);
|
||||
}
|
||||
else if (implementation.compare("GPS_L1_CA_DLL_PLL_Optim_Tracking") == 0)
|
||||
{
|
||||
std::unique_ptr<GNSSBlockInterface> block_(new GpsL1CaDllPllOptimTracking(configuration.get(), role, in_streams,
|
||||
out_streams, queue));
|
||||
block = std::move(block_);
|
||||
}
|
||||
else if (implementation.compare("GPS_L1_CA_DLL_FLL_PLL_Tracking") == 0)
|
||||
{
|
||||
std::unique_ptr<GNSSBlockInterface> block_(new GpsL1CaDllFllPllTracking(configuration.get(), role, in_streams,
|
||||
@ -1589,12 +1582,6 @@ std::unique_ptr<TrackingInterface> GNSSBlockFactory::GetTrkBlock(
|
||||
out_streams, queue));
|
||||
block = std::move(block_);
|
||||
}
|
||||
else if (implementation.compare("GPS_L1_CA_DLL_PLL_Optim_Tracking") == 0)
|
||||
{
|
||||
std::unique_ptr<TrackingInterface> block_(new GpsL1CaDllPllOptimTracking(configuration.get(), role, in_streams,
|
||||
out_streams, queue));
|
||||
block = std::move(block_);
|
||||
}
|
||||
else if (implementation.compare("GPS_L1_CA_DLL_FLL_PLL_Tracking") == 0)
|
||||
{
|
||||
std::unique_ptr<TrackingInterface> block_(new GpsL1CaDllFllPllTracking(configuration.get(), role, in_streams,
|
||||
|
Loading…
Reference in New Issue
Block a user