1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-07-02 01:43:20 +00:00
This commit is contained in:
Carles Fernandez 2016-03-18 20:44:33 +01:00
commit a7fbc46807
18 changed files with 1260 additions and 1179 deletions

View File

@ -170,6 +170,7 @@ install(FILES
${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_common.h
${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_avx_intrinsics.h
${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_sse3_intrinsics.h
${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_neon_intrinsics.h
${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr.h
${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_cpu.h
${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h

View File

@ -0,0 +1,56 @@
/*!
* \file volk_gnsssdr_neon_intrinsics.h
* \author Carles Fernandez, 2016. carles.fernandez(at)gmail.com
* \brief Holds NEON intrinsics of intrinsics.
* They can be used in VOLK_GNSSSDR kernels to avoid copy-paste
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_
#define INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_
#include <arm_neon.h>
static inline float32x4_t vdivq_f32( float32x4_t num, float32x4_t den )
{
const float32x4_t q_inv0 = vrecpeq_f32( den );
const float32x4_t q_step0 = vrecpsq_f32( q_inv0, den );
const float32x4_t q_inv1 = vmulq_f32( q_step0, q_inv0 );
return vmulq_f32( num, q_inv1 );
}
static inline float32x4_t vsqrtq_f32( float32x4_t q_x )
{
const float32x4_t q_step_0 = vrsqrteq_f32( q_x );
// step
const float32x4_t q_step_parm0 = vmulq_f32( q_x, q_step_0 );
const float32x4_t q_step_result0 = vrsqrtsq_f32( q_step_parm0, q_step_0 );
// step
const float32x4_t q_step_1 = vmulq_f32( q_step_0, q_step_result0 );
const float32x4_t q_step_parm1 = vmulq_f32( q_x, q_step_1 );
const float32x4_t q_step_result1 = vrsqrtsq_f32( q_step_parm1, q_step_1 );
// take the res
const float32x4_t q_step_2 = vmulq_f32( q_step_1, q_step_result1 );
// mul by x to get sqrt, not rsqrt
return vmulq_f32( q_x, q_step_2 );
}
#endif /* INCLUDED_VOLK_GNSSSDR_NEON_INTRINSICS_H_ */

View File

@ -57,6 +57,22 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_generic(lv_16sc_t* outVe
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_generic_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
float rem_carrier_phase_in_rad = 0.345;
float phase_step_rad = 0.123;
lv_32fc_t phase[1];
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad));
lv_32fc_t phase_inc[1];
phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad));
volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic_reload(outVector, inVector, phase_inc[0], phase, num_points);
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
@ -73,6 +89,22 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3(lv_16sc_t* outVec
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_a_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
float rem_carrier_phase_in_rad = 0.345;
float phase_step_rad = 0.123;
lv_32fc_t phase[1];
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad));
lv_32fc_t phase_inc[1];
phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad));
volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(outVector, inVector, phase_inc[0], phase, num_points);
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
@ -89,6 +121,22 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3(lv_16sc_t* outVec
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_u_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
float rem_carrier_phase_in_rad = 0.345;
float phase_step_rad = 0.123;
lv_32fc_t phase[1];
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad));
lv_32fc_t phase_inc[1];
phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad));
volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(outVector, inVector, phase_inc[0], phase, num_points);
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_NEON
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
@ -105,4 +153,20 @@ static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon(lv_16sc_t* outVecto
#endif /* LV_HAVE_NEON */
#ifdef LV_HAVE_NEON
static inline void volk_gnsssdr_16ic_rotatorpuppet_16ic_neon_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
float rem_carrier_phase_in_rad = 0.345;
float phase_step_rad = 0.123;
lv_32fc_t phase[1];
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), -sin(rem_carrier_phase_in_rad));
lv_32fc_t phase_inc[1];
phase_inc[0] = lv_cmake(cos(phase_step_rad), -sin(phase_step_rad));
volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(outVector, inVector, phase_inc[0], phase, num_points);
}
#endif /* LV_HAVE_NEON */
#endif /* INCLUDED_volk_gnsssdr_16ic_rotatorpuppet_16ic_H */

View File

@ -61,6 +61,7 @@
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <math.h>
//#include <stdio.h>
#ifdef LV_HAVE_GENERIC
@ -71,6 +72,56 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic(lv_16sc_t* ou
lv_16sc_t tmp16;
lv_32fc_t tmp32;
for(i = 0; i < (unsigned int)(num_points); ++i)
{
tmp16 = *inVector++;
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
*outVector++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
(*phase) *= phase_inc;
// Regenerate phase
if (i % 512 == 0)
{
//printf("Phase before regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
#ifdef __cplusplus
(*phase) /= std::abs((*phase));
#else
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
#endif
//printf("Phase after regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
}
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_generic_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
unsigned int ROTATOR_RELOAD = 512;
unsigned int n = 0;
unsigned int j = 0;
lv_16sc_t tmp16;
lv_32fc_t tmp32;
for (; n < num_points / ROTATOR_RELOAD; n++)
{
for (j = 0; j < ROTATOR_RELOAD; j++)
{
tmp16 = *inVector++;
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
*outVector++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
(*phase) *= phase_inc;
}
// Regenerate phase
//printf("Phase before regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
#ifdef __cplusplus
(*phase) /= std::abs((*phase));
#else
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
#endif
//printf("Phase after regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
}
for (j = 0; j < num_points % ROTATOR_RELOAD; j++)
{
tmp16 = *inVector++;
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
@ -130,6 +181,168 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
//next two samples
_in += 2;
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
// store four output samples
result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic
_mm_store_si128((__m128i*)_out, result);
// Regenerate phase
if ((number % 512) == 0)
{
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
tmp2 = _mm_hadd_ps(tmp1, tmp1);
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
tmp2 = _mm_sqrt_ps(tmp1);
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
}
//next two samples
_in += 2;
_out += 4;
}
_mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg);
(*phase) = two_phase_acc[0];
for (unsigned int i = sse_iters * 4; i < num_points; ++i)
{
tmp16 = *_in++;
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
*_out++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
(*phase) *= phase_inc;
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
const unsigned int ROTATOR_RELOAD = 512;
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
const lv_16sc_t* _in = inVector;
lv_16sc_t* _out = outVector;
__m128 yl, yh, tmp1, tmp2, tmp3;
lv_16sc_t tmp16;
lv_32fc_t tmp32;
for (unsigned int n = 0; n < sse_iters / ROTATOR_RELOAD; n++)
{
for (unsigned int j = 0; j < ROTATOR_RELOAD; j++)
{
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
//next two samples
_in += 2;
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
// store four output samples
result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic
_mm_store_si128((__m128i*)_out, result);
//next two samples
_in += 2;
_out += 4;
}
// Regenerate phase
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
tmp2 = _mm_hadd_ps(tmp1, tmp1);
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
tmp2 = _mm_sqrt_ps(tmp1);
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
}
for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++)
{
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
//next two samples
_in += 2;
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
@ -171,6 +384,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_a_sse3(lv_16sc_t* out
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@ -241,6 +455,16 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic
_mm_storeu_si128((__m128i*)_out, result);
// Regenerate phase
if ((number % 512) == 0)
{
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
tmp2 = _mm_hadd_ps(tmp1, tmp1);
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
tmp2 = _mm_sqrt_ps(tmp1);
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
}
//next two samples
_in += 2;
_out += 4;
@ -261,6 +485,156 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3(lv_16sc_t* out
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_u_sse3_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
const unsigned int sse_iters = num_points / 4;
unsigned int ROTATOR_RELOAD = 512;
__m128 a, b, two_phase_acc_reg, two_phase_inc_reg;
__m128i c1, c2, result;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*) two_phase_acc);
const lv_16sc_t* _in = inVector;
lv_16sc_t* _out = outVector;
__m128 yl, yh, tmp1, tmp2, tmp3;
lv_16sc_t tmp16;
lv_32fc_t tmp32;
for (unsigned int n = 0; n < sse_iters / ROTATOR_RELOAD; n++)
{
for (unsigned int j = 0; j < ROTATOR_RELOAD; j++)
{
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
//next two samples
_in += 2;
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
// store four output samples
result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic
_mm_storeu_si128((__m128i*)_out, result);
//next two samples
_in += 2;
_out += 4;
}
// Regenerate phase
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
tmp2 = _mm_hadd_ps(tmp1, tmp1);
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
tmp2 = _mm_sqrt_ps(tmp1);
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
}
for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++)
{
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
c1 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
//next two samples
_in += 2;
a = _mm_set_ps((float)(lv_cimag(_in[1])), (float)(lv_creal(_in[1])), (float)(lv_cimag(_in[0])), (float)(lv_creal(_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(a, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
a = _mm_shuffle_ps(a, a, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(a, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
b = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
c2 = _mm_cvtps_epi32(b); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
// store four output samples
result = _mm_packs_epi32(c1, c2);// convert from 32ic to 16ic
_mm_storeu_si128((__m128i*)_out, result);
//next two samples
_in += 2;
_out += 4;
}
_mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg);
(*phase) = two_phase_acc[0];
for (unsigned int i = sse_iters * 4; i < num_points; ++i)
{
tmp16 = *_in++;
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
*_out++ = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
(*phase) *= phase_inc;
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
@ -271,6 +645,10 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
lv_16sc_t tmp16_;
lv_32fc_t tmp32_;
float arg_phase0 = cargf(*phase);
float arg_phase_inc = cargf(phase_inc);
float phase_est = 0.0;
const lv_16sc_t* _in = inVector;
lv_16sc_t* _out = outVector;
@ -351,6 +729,24 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
/* store the four complex results */
vst2_s16((int16_t*)_out, tmp16);
_out += 4;
// Regenerate phase
if ((i % 512) == 0)
{
//printf("Computed phase: %f\n", cos(cargf(lv_cmake(_phase_real[0],_phase_imag[0]))));
phase_est = arg_phase0 + (i + 1) * 4 * arg_phase_inc;
//printf("Estimated phase: %f\n\n", cos(phase_est));
*phase = lv_cmake(cos(phase_est), sin(phase_est));
phase2 = (lv_32fc_t)(*phase) * phase_inc;
phase3 = phase2 * phase_inc;
phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
_phase_real = vld1q_f32(____phase_real);
_phase_imag = vld1q_f32(____phase_imag);
}
}
vst1q_f32((float32_t*)__phase_real, _phase_real);
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
@ -368,4 +764,190 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon(lv_16sc_t* outVe
#endif /* LV_HAVE_NEON */
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_neon_reload(lv_16sc_t* outVector, const lv_16sc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, unsigned int num_points)
{
unsigned int i = 0;
const unsigned int neon_iters = num_points / 4;
const unsigned int ROTATOR_RELOAD = 512;
lv_16sc_t tmp16_;
lv_32fc_t tmp32_;
float arg_phase0 = cargf(*phase);
float arg_phase_inc = cargf(phase_inc);
float phase_est = 0.0;
const lv_16sc_t* _in = inVector;
lv_16sc_t* _out = outVector;
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
float32x4_t _phase4_real = vld1q_f32(__phase4_real);
float32x4_t _phase4_imag = vld1q_f32(__phase4_imag);
lv_32fc_t phase2 = (lv_32fc_t)(*phase) * phase_inc;
lv_32fc_t phase3 = phase2 * phase_inc;
lv_32fc_t phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
float32x4_t _phase_real = vld1q_f32(__phase_real);
float32x4_t _phase_imag = vld1q_f32(__phase_imag);
float32x4_t half = vdupq_n_f32(0.5f);
int16x4x2_t tmp16;
int32x4x2_t tmp32i;
float32x4x2_t tmp32f, tmp_real, tmp_imag;
float32x4_t sign, PlusHalf, Round;
if (neon_iters > 0)
{
for (unsigned int n = 0; n < neon_iters / ROTATOR_RELOAD; n++)
{
for (unsigned int j = 0; j < ROTATOR_RELOAD; j++)
{
/* load 4 complex numbers (int 16 bits each component) */
tmp16 = vld2_s16((int16_t*)_in);
__builtin_prefetch(_in + 8);
_in += 4;
/* promote them to int 32 bits */
tmp32i.val[0] = vmovl_s16(tmp16.val[0]);
tmp32i.val[1] = vmovl_s16(tmp16.val[1]);
/* promote them to float 32 bits */
tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]);
tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]);
/* complex multiplication of four complex samples (float 32 bits each component) */
tmp_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real);
tmp_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag);
tmp_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag);
tmp_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real);
tmp32f.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
tmp32f.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
/* downcast results to int32 */
/* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31)));
PlusHalf = vaddq_f32(tmp32f.val[0], half);
Round = vsubq_f32(PlusHalf, sign);
tmp32i.val[0] = vcvtq_s32_f32(Round);
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31)));
PlusHalf = vaddq_f32(tmp32f.val[1], half);
Round = vsubq_f32(PlusHalf, sign);
tmp32i.val[1] = vcvtq_s32_f32(Round);
/* downcast results to int16 */
tmp16.val[0] = vqmovn_s32(tmp32i.val[0]);
tmp16.val[1] = vqmovn_s32(tmp32i.val[1]);
/* compute next four phases */
tmp_real.val[0] = vmulq_f32(_phase_real, _phase4_real);
tmp_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag);
tmp_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag);
tmp_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real);
_phase_real = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
_phase_imag = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
/* store the four complex results */
vst2_s16((int16_t*)_out, tmp16);
_out += 4;
}
// Regenerate phase
//printf("Computed phase: %f\n", cos(cargf(lv_cmake(_phase_real[0],_phase_imag[0]))));
phase_est = arg_phase0 + (n + 1) * ROTATOR_RELOAD * 4 * arg_phase_inc;
//printf("Estimated phase: %f\n\n", cos(phase_est));
*phase = lv_cmake(cos(phase_est), sin(phase_est));
phase2 = (lv_32fc_t)(*phase) * phase_inc;
phase3 = phase2 * phase_inc;
phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
_phase_real = vld1q_f32(____phase_real);
_phase_imag = vld1q_f32(____phase_imag);
}
for (unsigned int j = 0; j < neon_iters % ROTATOR_RELOAD; j++)
{
/* load 4 complex numbers (int 16 bits each component) */
tmp16 = vld2_s16((int16_t*)_in);
__builtin_prefetch(_in + 8);
_in += 4;
/* promote them to int 32 bits */
tmp32i.val[0] = vmovl_s16(tmp16.val[0]);
tmp32i.val[1] = vmovl_s16(tmp16.val[1]);
/* promote them to float 32 bits */
tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]);
tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]);
/* complex multiplication of four complex samples (float 32 bits each component) */
tmp_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real);
tmp_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag);
tmp_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag);
tmp_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real);
tmp32f.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
tmp32f.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
/* downcast results to int32 */
/* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31)));
PlusHalf = vaddq_f32(tmp32f.val[0], half);
Round = vsubq_f32(PlusHalf, sign);
tmp32i.val[0] = vcvtq_s32_f32(Round);
sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31)));
PlusHalf = vaddq_f32(tmp32f.val[1], half);
Round = vsubq_f32(PlusHalf, sign);
tmp32i.val[1] = vcvtq_s32_f32(Round);
/* downcast results to int16 */
tmp16.val[0] = vqmovn_s32(tmp32i.val[0]);
tmp16.val[1] = vqmovn_s32(tmp32i.val[1]);
/* compute next four phases */
tmp_real.val[0] = vmulq_f32(_phase_real, _phase4_real);
tmp_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag);
tmp_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag);
tmp_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real);
_phase_real = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
_phase_imag = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
/* store the four complex results */
vst2_s16((int16_t*)_out, tmp16);
_out += 4;
}
vst1q_f32((float32_t*)__phase_real, _phase_real);
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
(*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]);
}
for(i = 0; i < neon_iters % 4; ++i)
{
tmp16_ = *_in++;
tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase);
*_out++ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_)));
(*phase) *= phase_inc;
}
}
#endif /* LV_HAVE_NEON */
#endif /* INCLUDED_volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_H */

View File

@ -94,7 +94,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
if (sse_iters > 0)
{
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc, result;
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl, realcacc, imagcacc;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
realcacc = _mm_setzero_si128();
@ -105,8 +105,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
for(unsigned int number = 0; number < sse_iters; number++)
{
//std::complex<T> memory structure: real part -> reinterpret_cast<cv T*>(a)[2*i]
//imaginery part -> reinterpret_cast<cv T*>(a)[2*i + 1]
// a[127:0]=[a3.i,a3.r,a2.i,a2.r,a1.i,a1.r,a0.i,a0.r]
a = _mm_load_si128((__m128i*)_in_a); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
__builtin_prefetch(_in_a + 8);
@ -115,7 +113,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
real = _mm_subs_epi16(c,c_sr);
real = _mm_subs_epi16(c, c_sr);
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
@ -123,7 +121,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, ....
imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, ....
imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic!
imag = _mm_adds_epi16(imag1, imag2); //with saturation arithmetic!
realcacc = _mm_adds_epi16(realcacc, real);
imagcacc = _mm_adds_epi16(imagcacc, imag);
@ -135,9 +133,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_a_sse2(lv_16sc_t* out, con
realcacc = _mm_and_si128(realcacc, mask_real);
imagcacc = _mm_and_si128(imagcacc, mask_imag);
result = _mm_or_si128(realcacc, imagcacc);
a = _mm_or_si128(realcacc, imagcacc);
_mm_store_si128((__m128i*)dotProductVector,result); // Store the results back into the dot product vector
_mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
for (int i = 0; i < 4; ++i)
{
@ -202,7 +200,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_u_sse2(lv_16sc_t* out, con
imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, ....
imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, ....
imag = _mm_adds_epi16(imag1, imag2); //with saturation aritmetic!
imag = _mm_adds_epi16(imag1, imag2); //with saturation arithmetic!
realcacc = _mm_adds_epi16(realcacc, real);
imagcacc = _mm_adds_epi16(imagcacc, imag);
@ -245,46 +243,57 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon(lv_16sc_t* out, const
lv_16sc_t* a_ptr = (lv_16sc_t*) in_a;
lv_16sc_t* b_ptr = (lv_16sc_t*) in_b;
// for 2-lane vectors, 1st lane holds the real part,
// 2nd lane holds the imaginary part
int16x4x2_t a_val, b_val, c_val, accumulator;
int16x4x2_t tmp_real, tmp_imag;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
accumulator.val[0] = vdup_n_s16(0);
accumulator.val[1] = vdup_n_s16(0);
*out = lv_cmake((int16_t)0, (int16_t)0);
for(number = 0; number < quarter_points; ++number)
if (quarter_points > 0)
{
a_val = vld2_s16((int16_t*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
b_val = vld2_s16((int16_t*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
__builtin_prefetch(a_ptr + 8);
__builtin_prefetch(b_ptr + 8);
// for 2-lane vectors, 1st lane holds the real part,
// 2nd lane holds the imaginary part
int16x4x2_t a_val, b_val, c_val, accumulator;
int16x4x2_t tmp_real, tmp_imag;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4];
accumulator.val[0] = vdup_n_s16(0);
accumulator.val[1] = vdup_n_s16(0);
lv_16sc_t dotProduct = lv_cmake((int16_t)0, (int16_t)0);
// multiply the real*real and imag*imag to get real result
// a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r
tmp_real.val[0] = vmul_s16(a_val.val[0], b_val.val[0]);
// a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i
tmp_real.val[1] = vmul_s16(a_val.val[1], b_val.val[1]);
for(number = 0; number < quarter_points; ++number)
{
a_val = vld2_s16((int16_t*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
b_val = vld2_s16((int16_t*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
__builtin_prefetch(a_ptr + 8);
__builtin_prefetch(b_ptr + 8);
// Multiply cross terms to get the imaginary result
// a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i
tmp_imag.val[0] = vmul_s16(a_val.val[0], b_val.val[1]);
// a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r
tmp_imag.val[1] = vmul_s16(a_val.val[1], b_val.val[0]);
// multiply the real*real and imag*imag to get real result
// a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r
tmp_real.val[0] = vmul_s16(a_val.val[0], b_val.val[0]);
// a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i
tmp_real.val[1] = vmul_s16(a_val.val[1], b_val.val[1]);
c_val.val[0] = vsub_s16(tmp_real.val[0], tmp_real.val[1]);
c_val.val[1] = vadd_s16(tmp_imag.val[0], tmp_imag.val[1]);
// Multiply cross terms to get the imaginary result
// a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i
tmp_imag.val[0] = vmul_s16(a_val.val[0], b_val.val[1]);
// a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r
tmp_imag.val[1] = vmul_s16(a_val.val[1], b_val.val[0]);
accumulator.val[0] = vadd_s16(accumulator.val[0], c_val.val[0]);
accumulator.val[1] = vadd_s16(accumulator.val[1], c_val.val[1]);
c_val.val[0] = vqsub_s16(tmp_real.val[0], tmp_real.val[1]);
c_val.val[1] = vqadd_s16(tmp_imag.val[0], tmp_imag.val[1]);
a_ptr += 4;
b_ptr += 4;
accumulator.val[0] = vqadd_s16(accumulator.val[0], c_val.val[0]);
accumulator.val[1] = vqadd_s16(accumulator.val[1], c_val.val[1]);
a_ptr += 4;
b_ptr += 4;
}
vst2_s16((int16_t*)accum_result, accumulator);
for (unsigned int i = 0; i < 4; ++i)
{
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(accum_result[i])), sat_adds16i(lv_cimag(dotProduct), lv_cimag(accum_result[i])));
}
*out = dotProduct;
}
vst2_s16((int16_t*)accum_result, accumulator);
*out = accum_result[0] + accum_result[1] + accum_result[2] + accum_result[3];
// tail case
for(number = quarter_points * 4; number < num_points; ++number)
{

View File

@ -84,6 +84,25 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic(lv_16sc_t* resu
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic_sat(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
result[n_vec] = lv_cmake(0,0);
for (unsigned int n = 0; n < num_points; n++)
{
lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(in_common[n]), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(in_common[n]), lv_cimag(in_a[n_vec][n]))),
sat_adds16i(sat_muls16i(lv_creal(in_common[n]), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(in_common[n]), lv_creal(in_a[n_vec][n]))));
result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp)));
}
}
}
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@ -398,8 +417,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res
tmp.val[0] = vmls_s16(tmp.val[0], a_val.val[1], b_val.val[1]);
tmp.val[1] = vmla_s16(tmp.val[1], a_val.val[0], b_val.val[1]);
accumulator[n_vec].val[0] = vadd_s16(accumulator[n_vec].val[0], tmp.val[0]);
accumulator[n_vec].val[1] = vadd_s16(accumulator[n_vec].val[1], tmp.val[1]);
accumulator[n_vec].val[0] = vqadd_s16(accumulator[n_vec].val[0], tmp.val[0]);
accumulator[n_vec].val[1] = vqadd_s16(accumulator[n_vec].val[1], tmp.val[1]);
}
_in_common += 4;
}

View File

@ -63,6 +63,30 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic(lv_16sc_t*
#endif /* Generic */
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_generic_sat(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
int num_a_vectors = 3;
lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
for(unsigned int n = 0; n < num_a_vectors; n++)
{
in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
}
volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_generic_sat(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points);
for(unsigned int n = 0; n < num_a_vectors; n++)
{
volk_gnsssdr_free(in_a[n]);
}
volk_gnsssdr_free(in_a);
}
#endif /* Generic */
#ifdef LV_HAVE_SSE2
static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{

View File

@ -43,6 +43,7 @@
*
* Rotates and multiplies the reference complex vector with an arbitrary number of other complex vectors,
* accumulates the results and stores them in the output vector.
* The rotation is done at a fixed rate per sample, from an initial \p phase offset.
* This function can be used for Doppler wipe-off and multiple correlator.
*
* <b>Dispatcher Prototype</b>
@ -71,7 +72,7 @@
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <volk_gnsssdr/saturation_arithmetic.h>
#include <math.h>
//#include <stdio.h>
#include <stdio.h>
#ifdef LV_HAVE_GENERIC
@ -88,6 +89,19 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc
tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
// Regenerate phase
if (n % 256 == 0)
{
//printf("Phase before regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
#ifdef __cplusplus
(*phase) /= std::abs((*phase));
#else
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
#endif
//printf("Phase after regeneration %i: %f,%f Modulus: %f\n", n,lv_creal(*phase),lv_cimag(*phase), cabsf(*phase));
}
(*phase) *= phase_inc;
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
@ -101,6 +115,60 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic(lv_16sc
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic_reload(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t tmp16;
lv_32fc_t tmp32;
const unsigned int ROTATOR_RELOAD = 256;
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
result[n_vec] = lv_cmake(0,0);
}
for (unsigned int n = 0; n < num_points / ROTATOR_RELOAD; n++)
{
for (unsigned int j = 0; j < ROTATOR_RELOAD; j++)
{
tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
(*phase) *= phase_inc;
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
lv_16sc_t tmp = tmp16 * in_a[n_vec][n * ROTATOR_RELOAD + j];
//lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n]))));
result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp)));
}
}
/* Regenerate phase */
#ifdef __cplusplus
(*phase) /= std::abs((*phase));
#else
//(*phase) /= cabsf((*phase));
(*phase) /= hypotf(lv_creal(*phase), lv_cimag(*phase));
#endif
}
for (unsigned int j = 0; j < num_points % ROTATOR_RELOAD; j++)
{
tmp16 = *in_common++; //if(n<10 || n >= 8108) printf("generic phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
(*phase) *= phase_inc;
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
lv_16sc_t tmp = tmp16 * in_a[n_vec][ (num_points / ROTATOR_RELOAD) * ROTATOR_RELOAD + j ];
//lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n]))));
result[n_vec] = lv_cmake(sat_adds16i(lv_creal(result[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(result[n_vec]), lv_cimag(tmp)));
}
}
}
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@ -169,6 +237,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
//next two samples
_in_common += 2;
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in_common + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
@ -212,6 +281,15 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
}
// Regenerate phase
if ((number % 128) == 0)
{
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
tmp2 = _mm_hadd_ps(tmp1, tmp1);
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
tmp2 = _mm_sqrt_ps(tmp1);
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
}
}
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
@ -233,6 +311,254 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_
free(realcacc);
free(imagcacc);
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
tmp2 = _mm_hadd_ps(tmp1, tmp1);
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
tmp2 = _mm_sqrt_ps(tmp1);
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
_mm_store_ps((float*)two_phase_acc, two_phase_acc_reg);
//(*phase) = lv_cmake((float*)two_phase_acc[0], (float*)two_phase_acc[1]);
(*phase) = two_phase_acc[0];
for(unsigned int n = sse_iters * 4; n < num_points; n++)
{
tmp16 = in_common[n]; //printf("a_sse phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase));
tmp32 = lv_cmake((float)lv_creal(tmp16), (float)lv_cimag(tmp16)) * (*phase);
tmp16 = lv_cmake((int16_t)rintf(lv_creal(tmp32)), (int16_t)rintf(lv_cimag(tmp32)));
(*phase) *= phase_inc;
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
lv_16sc_t tmp = tmp16 * in_a[n_vec][n];
//lv_16sc_t tmp = lv_cmake(sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_creal(in_a[n_vec][n])), - sat_muls16i(lv_cimag(tmp16), lv_cimag(in_a[n_vec][n]))) , sat_adds16i(sat_muls16i(lv_creal(tmp16), lv_cimag(in_a[n_vec][n])), sat_muls16i(lv_cimag(tmp16), lv_creal(in_a[n_vec][n]))));
_out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)),
sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp)));
}
}
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
const unsigned int sse_iters = num_points / 4;
const unsigned int ROTATOR_RELOAD = 128;
const lv_16sc_t** _in_a = in_a;
const lv_16sc_t* _in_common = in_common;
lv_16sc_t* _out = result;
__VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4];
//todo dyn mem reg
__m128i* realcacc;
__m128i* imagcacc;
realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0
__m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl;
mask_imag = _mm_set_epi8(255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0);
mask_real = _mm_set_epi8(0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255);
// phase rotation registers
__m128 pa, pb, two_phase_acc_reg, two_phase_inc_reg;
__m128i pc1, pc2;
__attribute__((aligned(16))) lv_32fc_t two_phase_inc[2];
two_phase_inc[0] = phase_inc * phase_inc;
two_phase_inc[1] = phase_inc * phase_inc;
two_phase_inc_reg = _mm_load_ps((float*) two_phase_inc);
__attribute__((aligned(16))) lv_32fc_t two_phase_acc[2];
two_phase_acc[0] = (*phase);
two_phase_acc[1] = (*phase) * phase_inc;
two_phase_acc_reg = _mm_load_ps((float*)two_phase_acc);
__m128 yl, yh, tmp1, tmp2, tmp3;
lv_16sc_t tmp16;
lv_32fc_t tmp32;
for (unsigned int number = 0; number < sse_iters / ROTATOR_RELOAD; ++number)
{
for (unsigned int j = 0; j < ROTATOR_RELOAD; j++)
{
// Phase rotation on operand in_common starts here:
//printf("generic phase %i: %f,%f\n", n*4,lv_creal(*phase),lv_cimag(*phase));
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
pc1 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
//next two samples
_in_common += 2;
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in_common + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
pc2 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
// store four rotated in_common samples in the register b
b = _mm_packs_epi32(pc1, pc2);// convert from 32ic to 16ic
//next two samples
_in_common += 2;
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
a = _mm_load_si128((__m128i*)&(_in_a[n_vec][(number * ROTATOR_RELOAD + j) * 4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
real = _mm_subs_epi16(c, c_sr);
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, ....
imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, ....
imag = _mm_adds_epi16(imag1, imag2);
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
}
}
// regenerate phase
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
tmp2 = _mm_hadd_ps(tmp1, tmp1);
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
tmp2 = _mm_sqrt_ps(tmp1);
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
}
for (unsigned int j = 0; j < sse_iters % ROTATOR_RELOAD; j++)
{
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
pc1 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
//next two samples
_in_common += 2;
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in_common + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(pa, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
pa = _mm_shuffle_ps(pa, pa, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(pa, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
pb = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
pc2 = _mm_cvtps_epi32(pb); // convert from 32fc to 32ic
//complex 32fc multiplication two_phase_acc_reg=two_phase_acc_reg*two_phase_inc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(two_phase_inc_reg, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
tmp3 = _mm_shuffle_ps(two_phase_inc_reg, two_phase_inc_reg, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(tmp3, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
two_phase_acc_reg = _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
// store four rotated in_common samples in the register b
b = _mm_packs_epi32(pc1, pc2);// convert from 32ic to 16ic
//next two samples
_in_common += 2;
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
a = _mm_load_si128((__m128i*)&(_in_a[n_vec][((sse_iters / ROTATOR_RELOAD) * ROTATOR_RELOAD + j) * 4])); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg
c = _mm_mullo_epi16(a, b); // a3.i*b3.i, a3.r*b3.r, ....
c_sr = _mm_srli_si128(c, 2); // Shift a right by imm8 bytes while shifting in zeros, and store the results in dst.
real = _mm_subs_epi16(c, c_sr);
b_sl = _mm_slli_si128(b, 2); // b3.r, b2.i ....
a_sl = _mm_slli_si128(a, 2); // a3.r, a2.i ....
imag1 = _mm_mullo_epi16(a, b_sl); // a3.i*b3.r, ....
imag2 = _mm_mullo_epi16(b, a_sl); // b3.i*a3.r, ....
imag = _mm_adds_epi16(imag1, imag2);
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
}
}
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
{
realcacc[n_vec] = _mm_and_si128(realcacc[n_vec], mask_real);
imagcacc[n_vec] = _mm_and_si128(imagcacc[n_vec], mask_imag);
a = _mm_or_si128(realcacc[n_vec], imagcacc[n_vec]);
_mm_store_si128((__m128i*)dotProductVector, a); // Store the results back into the dot product vector
dotProduct = lv_cmake(0,0);
for (int i = 0; i < 4; ++i)
{
dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])),
sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i])));
}
_out[n_vec] = dotProduct;
}
free(realcacc);
free(imagcacc);
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
tmp2 = _mm_hadd_ps(tmp1, tmp1);
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
tmp2 = _mm_sqrt_ps(tmp1);
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
_mm_store_ps((float*)two_phase_acc, two_phase_acc_reg);
//(*phase) = lv_cmake((float*)two_phase_acc[0], (float*)two_phase_acc[1]);
(*phase) = two_phase_acc[0];
@ -303,7 +629,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
for(unsigned int number = 0; number < sse_iters; number++)
{
// Phase rotation on operand in_common starts here:
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in_common + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
@ -326,6 +651,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
//next two samples
_in_common += 2;
pa = _mm_set_ps((float)(lv_cimag(_in_common[1])), (float)(lv_creal(_in_common[1])), (float)(lv_cimag(_in_common[0])), (float)(lv_creal(_in_common[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
__builtin_prefetch(_in_common + 8);
//complex 32fc multiplication b=a*two_phase_acc_reg
yl = _mm_moveldup_ps(two_phase_acc_reg); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(two_phase_acc_reg); // Load yh with ci,ci,di,di
@ -369,6 +695,15 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real);
imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag);
}
// Regenerate phase
if ((number % 256) == 0)
{
tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg);
tmp2 = _mm_hadd_ps(tmp1, tmp1);
tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
tmp2 = _mm_sqrt_ps(tmp1);
two_phase_acc_reg = _mm_div_ps(two_phase_acc_reg, tmp2);
}
}
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
@ -393,7 +728,6 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_
_mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg);
(*phase) = two_phase_acc[0];
for(unsigned int n = sse_iters * 4; n < num_points; n++)
{
tmp16 = in_common[n];
@ -428,6 +762,9 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
if (neon_iters > 0)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
float arg_phase0 = cargf(*phase);
float arg_phase_inc = cargf(phase_inc);
float phase_est;
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
@ -538,6 +875,22 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
accumulator[n_vec].val[0] = vqadd_s16(accumulator[n_vec].val[0], c_val.val[0]);
accumulator[n_vec].val[1] = vqadd_s16(accumulator[n_vec].val[1], c_val.val[1]);
}
// Regenerate phase
if ((number % 256) == 0)
{
phase_est = arg_phase0 + (number + 1) * 4 * arg_phase_inc;
*phase = lv_cmake(cos(phase_est), sin(phase_est));
phase2 = (lv_32fc_t)(*phase) * phase_inc;
phase3 = phase2 * phase_inc;
phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
_phase_real = vld1q_f32(____phase_real);
_phase_imag = vld1q_f32(____phase_imag);
}
}
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
@ -577,6 +930,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t*
#ifdef LV_HAVE_NEON
#include <arm_neon.h>
#include <volk_gnsssdr/volk_gnsssdr_neon_intrinsics.h>
static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
{
@ -592,7 +946,10 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
if (neon_iters > 0)
{
lv_16sc_t dotProduct = lv_cmake(0,0);
float arg_phase0 = cargf(*phase);
float arg_phase_inc = cargf(phase_inc);
float phase_est;
//printf("arg phase0: %f", arg_phase0);
lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) };
@ -677,6 +1034,37 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
_phase_real = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]);
_phase_imag = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]);
// Regenerate phase
if ((number % 256) == 0)
{
//printf("computed phase: %f\n", cos(cargf(lv_cmake(_phase_real[0],_phase_imag[0]))));
phase_est = arg_phase0 + (number + 1) * 4 * arg_phase_inc;
//printf("Estimated phase: %f\n\n", cos(phase_est));
*phase = lv_cmake(cos(phase_est), sin(phase_est));
phase2 = (lv_32fc_t)(*phase) * phase_inc;
phase3 = phase2 * phase_inc;
phase4 = phase3 * phase_inc;
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) };
__VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) };
_phase_real = vld1q_f32(____phase_real);
_phase_imag = vld1q_f32(____phase_imag);
// Round = vmulq_f32(_phase_real, _phase_real);
// Round = vmlaq_f32(Round, _phase_imag, _phase_imag);
// Round = vsqrtq_f32(Round);//printf("sqrt: %f \n", Round[0]);
//Round = vrsqrteq_f32(Round);printf("1/sqtr: %f \n",Round[0]);
//Round = vrecpeq_f32((Round);
// _phase_real = vdivq_f32(_phase_real, Round);
// _phase_imag = vdivq_f32(_phase_imag, Round);
//_phase_real = vmulq_f32(_phase_real, Round);
//_phase_imag = vmulq_f32(_phase_imag, Round);
//printf("After %i: %f,%f, %f\n\n", number, _phase_real[0], _phase_imag[0], sqrt(_phase_real[0]*_phase_real[0]+_phase_imag[0]*_phase_imag[0]));
}
vst1q_f32((float32_t*)__phase_real, _phase_real);
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
@ -708,6 +1096,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
_out[n_vec] = dotProduct;
}
free(accumulator);
vst1q_f32((float32_t*)__phase_real, _phase_real);
vst1q_f32((float32_t*)__phase_imag, _phase_imag);
@ -731,3 +1120,4 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s
#endif /* LV_HAVE_NEON */
#endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_H*/

View File

@ -70,6 +70,36 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic(lv_
#endif // Generic
#ifdef LV_HAVE_GENERIC
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_generic_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
float rem_carrier_phase_in_rad = 0.345;
float phase_step_rad = 0.1;
lv_32fc_t phase[1];
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
lv_32fc_t phase_inc[1];
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
int num_a_vectors = 3;
lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
for(unsigned int n = 0; n < num_a_vectors; n++)
{
in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
}
volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_generic_reload(result, local_code, phase_inc[0], phase,(const lv_16sc_t**) in_a, num_a_vectors, num_points);
for(unsigned int n = 0; n < num_a_vectors; n++)
{
volk_gnsssdr_free(in_a[n]);
}
volk_gnsssdr_free(in_a);
}
#endif // Generic
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
@ -101,6 +131,37 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3(lv_1
#endif // SSE3
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_a_sse3_reload(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{
// phases must be normalized. Phase rotator expects a complex exponential input!
float rem_carrier_phase_in_rad = 0.345;
float phase_step_rad = 0.1;
lv_32fc_t phase[1];
phase[0] = lv_cmake(cos(rem_carrier_phase_in_rad), sin(rem_carrier_phase_in_rad));
lv_32fc_t phase_inc[1];
phase_inc[0] = lv_cmake(cos(phase_step_rad), sin(phase_step_rad));
int num_a_vectors = 3;
lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment());
for(unsigned int n = 0; n < num_a_vectors; n++)
{
in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t) * num_points, volk_gnsssdr_get_alignment());
memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t) * num_points);
}
volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(result, local_code, phase_inc[0], phase, (const lv_16sc_t**) in_a, num_a_vectors, num_points);
for(unsigned int n = 0; n < num_a_vectors; n++)
{
volk_gnsssdr_free(in_a[n]);
}
volk_gnsssdr_free(in_a);
}
#endif // SSE3
#ifdef LV_HAVE_SSE3
static inline void volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic_u_sse3(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points)
{

View File

@ -58,6 +58,9 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t
// some others need more iterations ***** ADDED BY GNSS-SDR
volk_gnsssdr_test_params_t test_params_more_iters = volk_gnsssdr_test_params_t(test_params.tol(), test_params.scalar(),
test_params.vlen(), 100000, test_params.benchmark_mode(), test_params.kernel_regex());
// ... or more tolerance ***** ADDED BY GNSS-SDR
volk_gnsssdr_test_params_t test_params_int16 = volk_gnsssdr_test_params_t(16, test_params.scalar(),
test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex());
std::vector<volk_gnsssdr_test_case_t> test_cases = boost::assign::list_of
@ -77,11 +80,11 @@ std::vector<volk_gnsssdr_test_case_t> init_test_list(volk_gnsssdr_test_params_t
(VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_dot_prod_16ic, test_params))
(VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_multiply_16ic, test_params_more_iters))
(VOLK_INIT_TEST(volk_gnsssdr_16ic_convert_32fc, test_params_more_iters))
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_rotatorpuppet_16ic, volk_gnsssdr_16ic_s32fc_x2_rotator_16ic, test_params))
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_rotatorpuppet_16ic, volk_gnsssdr_16ic_s32fc_x2_rotator_16ic, test_params_int1))
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerpuppet_16ic, volk_gnsssdr_16ic_resampler_16ic, test_params))
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_resamplerxnpuppet_16ic, volk_gnsssdr_16ic_xn_resampler_16ic_xn, test_params))
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_dot_prod_16ic_xn, test_params))
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn, test_params))
(VOLK_INIT_PUPP(volk_gnsssdr_16ic_x2_rotator_dotprodxnpuppet_16ic, volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn, test_params_int16))
;
return test_cases;

View File

@ -76,10 +76,10 @@ void load_random_data(void *data, volk_gnsssdr_type_t type, unsigned int n)
else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
break;
case 2:
// 16 bits dot product saturates very fast even with moderate length vectors
// we produce here only 4 bits input range
if(type.is_signed) ((int16_t *)data)[i] = (int16_t)((int16_t) scaled_rand % 16);
else ((uint16_t *)data)[i] = (uint16_t) (int16_t)((int16_t) scaled_rand % 16);
// 16 bit multiplication saturates very fast
// we produce here only 3 bits input range
if(type.is_signed) ((int16_t *)data)[i] = (int16_t)((int16_t) scaled_rand % 8);
else ((uint16_t *)data)[i] = (uint16_t) (int16_t)((int16_t) scaled_rand % 8);
break;
case 1:
if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;

View File

@ -25,7 +25,6 @@ set(TRACKING_ADAPTER_SOURCES
galileo_e1_dll_pll_veml_tracking.cc
galileo_e1_tcp_connector_tracking.cc
gps_l1_ca_dll_fll_pll_tracking.cc
gps_l1_ca_dll_pll_optim_tracking.cc
gps_l1_ca_dll_pll_tracking.cc
gps_l1_ca_dll_pll_c_aid_tracking.cc
gps_l1_ca_tcp_connector_tracking.cc

View File

@ -1,159 +0,0 @@
/*!
* \file gps_l1_ca_dll_pll_optim_tracking.cc
* \brief Interface of an adapter of a speed optimized DLL+PLL tracking loop block
* for GPS L1 C/A to a TrackingInterface
* \author Javier Arribas, 2012. jarribas(at)cttc.es
*
* GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED:
* - Code Doppler is not compensated in the local replica
* Code DLL + carrier PLL according to the algorithms described in:
* K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
* Approach, Birkha user, 2007
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#include "gps_l1_ca_dll_pll_optim_tracking.h"
#include <cmath>
#include <glog/logging.h>
#include "GPS_L1_CA.h"
#include "configuration_interface.h"
using google::LogMessage;
GpsL1CaDllPllOptimTracking::GpsL1CaDllPllOptimTracking(
ConfigurationInterface* configuration, std::string role,
unsigned int in_streams, unsigned int out_streams,
boost::shared_ptr<gr::msg_queue> queue) :
role_(role), in_streams_(in_streams), out_streams_(out_streams),
queue_(queue)
{
DLOG(INFO) << "role " << role;
//################# CONFIGURATION PARAMETERS ########################
int fs_in;
int vector_length;
int f_if;
bool dump;
std::string dump_filename;
std::string item_type;
std::string default_item_type = "gr_complex";
float pll_bw_hz;
float dll_bw_hz;
float early_late_space_chips;
item_type = configuration->property(role + ".item_type",default_item_type);
//vector_length = configuration->property(role + ".vector_length", 2048);
fs_in = configuration->property("GNSS-SDR.internal_fs_hz", 2048000);
f_if = configuration->property(role + ".if", 0);
dump = configuration->property(role + ".dump", false);
pll_bw_hz = configuration->property(role + ".pll_bw_hz", 50.0);
dll_bw_hz = configuration->property(role + ".dll_bw_hz", 2.0);
early_late_space_chips = configuration->property(role + ".early_late_space_chips", 0.5);
std::string default_dump_filename = "./track_ch";
dump_filename = configuration->property(role + ".dump_filename", default_dump_filename); //unused!
vector_length = round(fs_in / (GPS_L1_CA_CODE_RATE_HZ / GPS_L1_CA_CODE_LENGTH_CHIPS));
//################# MAKE TRACKING GNURadio object ###################
if (item_type.compare("gr_complex") == 0)
{
item_size_ = sizeof(gr_complex);
tracking_ = gps_l1_ca_dll_pll_make_optim_tracking_cc(
f_if,
fs_in,
vector_length,
queue_,
dump,
dump_filename,
pll_bw_hz,
dll_bw_hz,
early_late_space_chips);
}
else
{
item_size_ = sizeof(gr_complex);
LOG(WARNING) << item_type << " unknown tracking item type.";
}
channel_ = 0;
channel_internal_queue_ = 0;
DLOG(INFO) << "tracking(" << tracking_->unique_id() << ")";
}
GpsL1CaDllPllOptimTracking::~GpsL1CaDllPllOptimTracking()
{}
void GpsL1CaDllPllOptimTracking::start_tracking()
{
tracking_->start_tracking();
}
/*
* Set tracking channel unique ID
*/
void GpsL1CaDllPllOptimTracking::set_channel(unsigned int channel)
{
channel_ = channel;
tracking_->set_channel(channel);
}
/*
* Set tracking channel internal queue
*/
void GpsL1CaDllPllOptimTracking::set_channel_queue(
concurrent_queue<int> *channel_internal_queue)
{
channel_internal_queue_ = channel_internal_queue;
tracking_->set_channel_queue(channel_internal_queue_);
}
void GpsL1CaDllPllOptimTracking::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro)
{
tracking_->set_gnss_synchro(p_gnss_synchro);
}
void GpsL1CaDllPllOptimTracking::connect(gr::top_block_sptr top_block)
{
if(top_block) { /* top_block is not null */};
//nothing to connect, now the tracking uses gr_sync_decimator
}
void GpsL1CaDllPllOptimTracking::disconnect(gr::top_block_sptr top_block)
{
if(top_block) { /* top_block is not null */};
//nothing to disconnect, now the tracking uses gr_sync_decimator
}
gr::basic_block_sptr GpsL1CaDllPllOptimTracking::get_left_block()
{
return tracking_;
}
gr::basic_block_sptr GpsL1CaDllPllOptimTracking::get_right_block()
{
return tracking_;
}

View File

@ -1,119 +0,0 @@
/*!
* \file gps_l1_ca_dll_pll_optim_tracking.h
* \brief Interface of an adapter of a speed optimized DLL+PLL tracking loop block
* for GPS L1 C/A to a TrackingInterface
* \author Javier Arribas, 2012. jarribas(at)cttc.es
*
* GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED:
* - Code Doppler is not compensated in the local replica
* Code DLL + carrier PLL according to the algorithms described in:
* K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
* Approach, Birkha user, 2007
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_H_
#define GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_H_
#include <string>
#include <gnuradio/msg_queue.h>
#include "tracking_interface.h"
#include "gps_l1_ca_dll_pll_optim_tracking_cc.h"
class ConfigurationInterface;
/*!
* \brief This class implements a code DLL + carrier PLL tracking loop
*/
class GpsL1CaDllPllOptimTracking : public TrackingInterface
{
public:
GpsL1CaDllPllOptimTracking(ConfigurationInterface* configuration,
std::string role,
unsigned int in_streams,
unsigned int out_streams,
boost::shared_ptr<gr::msg_queue> queue);
virtual ~GpsL1CaDllPllOptimTracking();
std::string role()
{
return role_;
}
//! Returns "GPS_L1_CA_DLL_PLL_Optim_Tracking"
std::string implementation()
{
return "GPS_L1_CA_DLL_PLL_Optim_Tracking";
}
size_t item_size()
{
return item_size_;
}
void connect(gr::top_block_sptr top_block);
void disconnect(gr::top_block_sptr top_block);
gr::basic_block_sptr get_left_block();
gr::basic_block_sptr get_right_block();
/*!
* \brief Set tracking channel unique ID
*/
void set_channel(unsigned int channel);
/*!
* \brief Set acquisition/tracking common Gnss_Synchro object pointer
* to efficiently exchange synchronization data between acquisition and tracking blocks
*/
void set_gnss_synchro(Gnss_Synchro* p_gnss_synchro);
/*!
* \brief Set tracking channel internal queue
*/
void set_channel_queue(concurrent_queue<int> *channel_internal_queue);
void start_tracking();
private:
gps_l1_ca_dll_pll_optim_tracking_cc_sptr tracking_;
size_t item_size_;
unsigned int channel_;
std::string role_;
unsigned int in_streams_;
unsigned int out_streams_;
boost::shared_ptr<gr::msg_queue> queue_;
concurrent_queue<int> *channel_internal_queue_;
};
#endif // GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_H_

View File

@ -26,7 +26,6 @@ set(TRACKING_GR_BLOCKS_SOURCES
galileo_e1_dll_pll_veml_tracking_cc.cc
galileo_e1_tcp_connector_tracking_cc.cc
gps_l1_ca_dll_fll_pll_tracking_cc.cc
gps_l1_ca_dll_pll_optim_tracking_cc.cc
gps_l1_ca_dll_pll_tracking_cc.cc
gps_l1_ca_tcp_connector_tracking_cc.cc
galileo_e5a_dll_pll_tracking_cc.cc

View File

@ -1,654 +0,0 @@
/*!
* \file gps_l1_ca_dll_pll_optim_tracking_cc.cc
* \brief Implementation of a code DLL + carrier PLL tracking block
* \author Javier Arribas, 2012. jarribas(at)cttc.es
* GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED:
* - Code Doppler is not compensated in the local replica
* Code DLL + carrier PLL according to the algorithms described in:
* [1] K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
* Approach, Birkha user, 2007
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#include "gps_l1_ca_dll_pll_optim_tracking_cc.h"
#include <iostream>
#include <memory>
#include <sstream>
#include <boost/lexical_cast.hpp>
#include <gnuradio/io_signature.h>
#include <glog/logging.h>
#include <volk/volk.h>
#include "gps_sdr_signal_processing.h"
#include "tracking_discriminators.h"
#include "lock_detectors.h"
#include "GPS_L1_CA.h"
#include "nco_lib.h"
#include "control_message_factory.h"
/*!
* \todo Include in definition header file
*/
#define CN0_ESTIMATION_SAMPLES 20
#define MINIMUM_VALID_CN0 25
#define MAXIMUM_LOCK_FAIL_COUNTER 50
#define CARRIER_LOCK_THRESHOLD 0.85
using google::LogMessage;
gps_l1_ca_dll_pll_optim_tracking_cc_sptr
gps_l1_ca_dll_pll_make_optim_tracking_cc(
long if_freq,
long fs_in,
unsigned int vector_length,
boost::shared_ptr<gr::msg_queue> queue,
bool dump,
std::string dump_filename,
float pll_bw_hz,
float dll_bw_hz,
float early_late_space_chips)
{
return gps_l1_ca_dll_pll_optim_tracking_cc_sptr(new Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(if_freq,
fs_in, vector_length, queue, dump, dump_filename, pll_bw_hz, dll_bw_hz, early_late_space_chips));
}
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::forecast (int noutput_items,
gr_vector_int &ninput_items_required)
{
if (noutput_items != 0)
{
ninput_items_required[0] = d_gnuradio_forecast_samples; //set the required available samples in each call
}
}
Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(
long if_freq,
long fs_in,
unsigned int vector_length,
boost::shared_ptr<gr::msg_queue> queue,
bool dump,
std::string dump_filename,
float pll_bw_hz,
float dll_bw_hz,
float early_late_space_chips) :
gr::block("Gps_L1_Ca_Dll_Pll_Tracking_cc",
gr::io_signature::make(1, 1, sizeof(gr_complex)),
gr::io_signature::make(1, 1, sizeof(Gnss_Synchro)))
{
// initialize internal vars
d_queue = queue;
d_dump = dump;
d_if_freq = if_freq;
d_fs_in = fs_in;
d_vector_length = vector_length;
d_gnuradio_forecast_samples = static_cast<int>(d_vector_length) * 2;
d_dump_filename = dump_filename;
// Initialize tracking ==========================================
d_code_loop_filter.set_DLL_BW(dll_bw_hz);
d_carrier_loop_filter.set_PLL_BW(pll_bw_hz);
//--- DLL variables --------------------------------------------------------
d_early_late_spc_chips = early_late_space_chips; // Define early-late offset (in chips)
// Initialization of local code replica
// Get space for a vector with the C/A code replica sampled 1x/chip
d_ca_code = static_cast<gr_complex*>(volk_malloc((GPS_L1_CA_CODE_LENGTH_CHIPS + 2) * sizeof(gr_complex), volk_get_alignment()));
// Get space for the resampled early / prompt / late local replicas
d_early_code = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment()));
d_prompt_code = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment()));
d_late_code = static_cast<gr_complex*>(volk_malloc(2 * d_vector_length * sizeof(gr_complex), volk_get_alignment()));
// space for carrier wipeoff and signal baseband vectors
d_carr_sign = static_cast<gr_complex*>(volk_malloc(2*d_vector_length * sizeof(gr_complex), volk_get_alignment()));
// correlator outputs (scalar)
d_Early = static_cast<gr_complex*>(volk_malloc(sizeof(gr_complex), volk_get_alignment()));
d_Prompt = static_cast<gr_complex*>(volk_malloc(sizeof(gr_complex), volk_get_alignment()));
d_Late = static_cast<gr_complex*>(volk_malloc(sizeof(gr_complex), volk_get_alignment()));
//--- Perform initializations ------------------------------
// define initial code frequency basis of NCO
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ;
// define residual code phase (in chips)
d_rem_code_phase_samples = 0.0;
// define residual carrier phase
d_rem_carr_phase_rad = 0.0;
// sample synchronization
d_sample_counter = 0;
//d_sample_counter_seconds = 0;
d_acq_sample_stamp = 0;
d_enable_tracking = false;
d_pull_in = false;
d_last_seg = 0;
d_current_prn_length_samples = static_cast<int>(d_vector_length);
// CN0 estimation and lock detector buffers
d_cn0_estimation_counter = 0;
d_Prompt_buffer = new gr_complex[CN0_ESTIMATION_SAMPLES];
d_carrier_lock_test = 1;
d_CN0_SNV_dB_Hz = 0;
d_carrier_lock_fail_counter = 0;
d_carrier_lock_threshold = CARRIER_LOCK_THRESHOLD;
systemName["G"] = std::string("GPS");
d_channel_internal_queue = 0;
d_acquisition_gnss_synchro = 0;
d_channel = 0;
d_acq_code_phase_samples = 0.0;
d_acq_carrier_doppler_hz = 0.0;
d_carrier_doppler_hz = 0.0;
d_acc_carrier_phase_rad = 0.0;
d_code_phase_samples = 0.0;
d_acc_code_phase_secs = 0.0;
}
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::start_tracking()
{
// correct the code phase according to the delay between acq and trk
d_acq_code_phase_samples = d_acquisition_gnss_synchro->Acq_delay_samples;
d_acq_carrier_doppler_hz = d_acquisition_gnss_synchro->Acq_doppler_hz;
d_acq_sample_stamp = d_acquisition_gnss_synchro->Acq_samplestamp_samples;
long int acq_trk_diff_samples;
double acq_trk_diff_seconds;
acq_trk_diff_samples = static_cast<long int>(d_sample_counter) - static_cast<long int>(d_acq_sample_stamp); //-d_vector_length;
LOG(INFO) << "Number of samples between Acquisition and Tracking =" << acq_trk_diff_samples;
acq_trk_diff_seconds = static_cast<float>(acq_trk_diff_samples) / static_cast<float>(d_fs_in);
//doppler effect
// Fd=(C/(C+Vr))*F
double radial_velocity;
radial_velocity = (GPS_L1_FREQ_HZ + d_acq_carrier_doppler_hz) / GPS_L1_FREQ_HZ;
// new chip and prn sequence periods based on acq Doppler
double T_chip_mod_seconds;
double T_prn_mod_seconds;
double T_prn_mod_samples;
d_code_freq_chips = radial_velocity * GPS_L1_CA_CODE_RATE_HZ;
T_chip_mod_seconds = 1/d_code_freq_chips;
T_prn_mod_seconds = T_chip_mod_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
T_prn_mod_samples = T_prn_mod_seconds * static_cast<float>(d_fs_in);
d_current_prn_length_samples = round(T_prn_mod_samples);
double T_prn_true_seconds = GPS_L1_CA_CODE_LENGTH_CHIPS / GPS_L1_CA_CODE_RATE_HZ;
double T_prn_true_samples = T_prn_true_seconds * static_cast<float>(d_fs_in);
double T_prn_diff_seconds = T_prn_true_seconds - T_prn_mod_seconds;
double N_prn_diff = acq_trk_diff_seconds / T_prn_true_seconds;
double corrected_acq_phase_samples, delay_correction_samples;
corrected_acq_phase_samples = fmod((d_acq_code_phase_samples + T_prn_diff_seconds * N_prn_diff * static_cast<float>(d_fs_in)), T_prn_true_samples);
if (corrected_acq_phase_samples < 0)
{
corrected_acq_phase_samples = T_prn_mod_samples + corrected_acq_phase_samples;
}
delay_correction_samples = d_acq_code_phase_samples - corrected_acq_phase_samples;
d_acq_code_phase_samples = corrected_acq_phase_samples;
d_carrier_doppler_hz = d_acq_carrier_doppler_hz;
// DLL/PLL filter initialization
d_carrier_loop_filter.initialize(); //initialize the carrier filter
d_code_loop_filter.initialize(); //initialize the code filter
// generate local reference ALWAYS starting at chip 1 (1 sample per chip)
gps_l1_ca_code_gen_complex(&d_ca_code[1], d_acquisition_gnss_synchro->PRN, 0);
d_ca_code[0] = d_ca_code[static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS)];
d_ca_code[static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS) + 1] = d_ca_code[1];
//******************************************************************************
// Experimental: pre-sampled local signal replica at nominal code frequency.
// No code doppler correction
double tcode_chips;
int associated_chip_index;
int code_length_chips = static_cast<float>(GPS_L1_CA_CODE_LENGTH_CHIPS);
double code_phase_step_chips;
int early_late_spc_samples;
int epl_loop_length_samples;
// unified loop for E, P, L code vectors
code_phase_step_chips = (static_cast<double>(d_code_freq_chips)) / (static_cast<double>(d_fs_in));
tcode_chips = 0;
// Alternative EPL code generation (40% of speed improvement!)
early_late_spc_samples = round(d_early_late_spc_chips / code_phase_step_chips);
epl_loop_length_samples = d_current_prn_length_samples +early_late_spc_samples*2;
for (int i = 0; i < epl_loop_length_samples; i++)
{
associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips));
d_early_code[i] = d_ca_code[associated_chip_index];
tcode_chips = tcode_chips + code_phase_step_chips;
}
memcpy(d_prompt_code, &d_early_code[early_late_spc_samples], d_current_prn_length_samples* sizeof(gr_complex));
memcpy(d_late_code, &d_early_code[early_late_spc_samples*2], d_current_prn_length_samples* sizeof(gr_complex));
//******************************************************************************
d_carrier_lock_fail_counter = 0;
d_rem_code_phase_samples = 0;
d_rem_carr_phase_rad = 0;
d_acc_carrier_phase_rad = 0;
d_code_phase_samples = d_acq_code_phase_samples;
std::string sys_ = &d_acquisition_gnss_synchro->System;
sys = sys_.substr(0,1);
// DEBUG OUTPUT
std::cout << "Tracking start on channel " << d_channel << " for satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) << std::endl;
LOG(INFO) << "Starting tracking of satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN) << " on channel " << d_channel;
// enable tracking
d_pull_in = true;
d_enable_tracking = true;
LOG(INFO) << "PULL-IN Doppler [Hz]=" << d_carrier_doppler_hz
<< " Code Phase correction [samples]=" << delay_correction_samples
<< " PULL-IN Code Phase [samples]=" << d_acq_code_phase_samples << std::endl;
}
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_code()
{
double tcode_chips;
double rem_code_phase_chips;
int associated_chip_index;
int code_length_chips = static_cast<int>(GPS_L1_CA_CODE_LENGTH_CHIPS);
double code_phase_step_chips;
int early_late_spc_samples;
int epl_loop_length_samples;
// unified loop for E, P, L code vectors
code_phase_step_chips = (static_cast<double>(d_code_freq_chips)) / (static_cast<double>(d_fs_in));
rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
tcode_chips = -rem_code_phase_chips;
//EPL code generation
early_late_spc_samples = round(d_early_late_spc_chips / code_phase_step_chips);
epl_loop_length_samples = d_current_prn_length_samples + early_late_spc_samples*2;
for (int i = 0; i < epl_loop_length_samples; i++)
{
associated_chip_index = 1 + round(fmod(tcode_chips - d_early_late_spc_chips, code_length_chips));
d_early_code[i] = d_ca_code[associated_chip_index];
tcode_chips = tcode_chips + code_phase_step_chips;
}
memcpy(d_prompt_code, &d_early_code[early_late_spc_samples], d_current_prn_length_samples* sizeof(gr_complex));
memcpy(d_late_code, &d_early_code[early_late_spc_samples*2], d_current_prn_length_samples* sizeof(gr_complex));
}
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::update_local_carrier()
{
float phase_step_rad;
phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);
fxp_nco(d_carr_sign, d_current_prn_length_samples, d_rem_carr_phase_rad, phase_step_rad);
}
Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::~Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc()
{
d_dump_file.close();
volk_free(d_prompt_code);
volk_free(d_late_code);
volk_free(d_early_code);
volk_free(d_carr_sign);
volk_free(d_Early);
volk_free(d_Prompt);
volk_free(d_Late);
delete[] d_ca_code;
delete[] d_Prompt_buffer;
}
// Tracking signal processing
int Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::general_work (int noutput_items, gr_vector_int &ninput_items,
gr_vector_const_void_star &input_items, gr_vector_void_star &output_items)
{
// stream to collect cout calls to improve thread safety
std::stringstream tmp_str_stream;
double carr_error_hz = 0.0;
double carr_error_filt_hz = 0.0;
double code_error_chips = 0.0;
double code_error_filt_chips = 0.0;
if (d_enable_tracking == true)
{
/*
* Receiver signal alignment
*/
if (d_pull_in == true)
{
int samples_offset;
float acq_trk_shif_correction_samples;
int acq_to_trk_delay_samples;
acq_to_trk_delay_samples = d_sample_counter - d_acq_sample_stamp;
acq_trk_shif_correction_samples = d_current_prn_length_samples - fmod(static_cast<float>(acq_to_trk_delay_samples), static_cast<float>(d_current_prn_length_samples));
samples_offset = round(d_acq_code_phase_samples + acq_trk_shif_correction_samples);
d_sample_counter = d_sample_counter + samples_offset; //count for the processed samples
d_pull_in = false;
consume_each(samples_offset); //shift input to perform alignment with local replica
return 1;
}
// GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
Gnss_Synchro current_synchro_data;
// Fill the acquisition data
current_synchro_data = *d_acquisition_gnss_synchro;
// Block input data and block output stream pointers
const gr_complex* in = (gr_complex*) input_items[0]; //PRN start block alignment
Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0];
// Generate local code and carrier replicas (using \hat{f}_d(k-1))
//update_local_code(); //disabled in the speed optimized tracking!
update_local_carrier();
// perform Early, Prompt and Late correlation
#if USING_VOLK_CW_EPL_CORR_CUSTOM
d_correlator.Carrier_wipeoff_and_EPL_volk_custom(d_current_prn_length_samples,
in,
d_carr_sign,
d_early_code,
d_prompt_code,
d_late_code,
d_Early,
d_Prompt,
d_Late);
#else
d_correlator.Carrier_wipeoff_and_EPL_volk(d_current_prn_length_samples,
in,
d_carr_sign,
d_early_code,
d_prompt_code,
d_late_code,
d_Early,
d_Prompt,
d_Late);
#endif
// ################## PLL ##########################################################
// PLL discriminator
carr_error_hz = pll_cloop_two_quadrant_atan(*d_Prompt) / GPS_TWO_PI;
// Carrier discriminator filter
carr_error_filt_hz = d_carrier_loop_filter.get_carrier_nco(carr_error_hz);
// New carrier Doppler frequency estimation
d_carrier_doppler_hz = d_acq_carrier_doppler_hz + carr_error_filt_hz;
// New code Doppler frequency estimation
d_code_freq_chips = GPS_L1_CA_CODE_RATE_HZ + ((d_carrier_doppler_hz * GPS_L1_CA_CODE_RATE_HZ) / GPS_L1_FREQ_HZ);
//carrier phase accumulator for (K) doppler estimation
d_acc_carrier_phase_rad -= GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD;
//remnant carrier phase to prevent overflow in the code NCO
d_rem_carr_phase_rad = d_rem_carr_phase_rad + GPS_TWO_PI * d_carrier_doppler_hz * GPS_L1_CA_CODE_PERIOD;
d_rem_carr_phase_rad = fmod(d_rem_carr_phase_rad, GPS_TWO_PI);
// ################## DLL ##########################################################
// DLL discriminator
code_error_chips = dll_nc_e_minus_l_normalized(*d_Early, *d_Late); //[chips/Ti]
// Code discriminator filter
code_error_filt_chips = d_code_loop_filter.get_code_nco(code_error_chips); //[chips/second]
//Code phase accumulator
double code_error_filt_secs;
code_error_filt_secs = (GPS_L1_CA_CODE_PERIOD * code_error_filt_chips) / GPS_L1_CA_CODE_RATE_HZ; //[seconds]
d_acc_code_phase_secs = d_acc_code_phase_secs + code_error_filt_secs;
// ################## CARRIER AND CODE NCO BUFFER ALIGNEMENT #######################
// keep alignment parameters for the next input buffer
double T_chip_seconds;
double T_prn_seconds;
double T_prn_samples;
double K_blk_samples;
// Compute the next buffer length based in the new period of the PRN sequence and the code phase error estimation
T_chip_seconds = 1.0 / d_code_freq_chips;
T_prn_seconds = T_chip_seconds * GPS_L1_CA_CODE_LENGTH_CHIPS;
T_prn_samples = T_prn_seconds * static_cast<double>(d_fs_in);
K_blk_samples = T_prn_samples + d_rem_code_phase_samples + code_error_filt_secs * static_cast<double>(d_fs_in);
d_current_prn_length_samples = round(K_blk_samples); //round to a discrete samples
//d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
// ####### CN0 ESTIMATION AND LOCK DETECTORS ######
if (d_cn0_estimation_counter < CN0_ESTIMATION_SAMPLES)
{
// fill buffer with prompt correlator output values
d_Prompt_buffer[d_cn0_estimation_counter] = *d_Prompt;
d_cn0_estimation_counter++;
}
else
{
d_cn0_estimation_counter = 0;
// Code lock indicator
d_CN0_SNV_dB_Hz = cn0_svn_estimator(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES, d_fs_in, GPS_L1_CA_CODE_LENGTH_CHIPS);
// Carrier lock indicator
d_carrier_lock_test = carrier_lock_detector(d_Prompt_buffer, CN0_ESTIMATION_SAMPLES);
// Loss of lock detection
if (d_carrier_lock_test < d_carrier_lock_threshold or d_CN0_SNV_dB_Hz < MINIMUM_VALID_CN0)
{
d_carrier_lock_fail_counter++;
}
else
{
if (d_carrier_lock_fail_counter > 0) d_carrier_lock_fail_counter--;
}
if (d_carrier_lock_fail_counter > MAXIMUM_LOCK_FAIL_COUNTER)
{
std::cout << "Loss of lock in channel " << d_channel << "!" << std::endl;
LOG(INFO) << "Loss of lock in channel " << d_channel << "!";
std::unique_ptr<ControlMessageFactory> cmf(new ControlMessageFactory());
if (d_queue != gr::msg_queue::sptr())
{
d_queue->handle(cmf->GetQueueMessage(d_channel, 2));
}
d_carrier_lock_fail_counter = 0;
d_enable_tracking = false; // TODO: check if disabling tracking is consistent with the channel state machine
}
}
// ########### Output the tracking data to navigation and PVT ##########
current_synchro_data.Prompt_I = static_cast<double>((*d_Prompt).real());
current_synchro_data.Prompt_Q = static_cast<double>((*d_Prompt).imag());
// Tracking_timestamp_secs is aligned with the PRN start sample
//current_synchro_data.Tracking_timestamp_secs = ((double)d_sample_counter + (double)d_current_prn_length_samples + (double)d_rem_code_phase_samples) / static_cast<double>(d_fs_in);
current_synchro_data.Tracking_timestamp_secs = (static_cast<double>(d_sample_counter) + static_cast<double>(d_rem_code_phase_samples)) / static_cast<double>(d_fs_in);
d_rem_code_phase_samples = K_blk_samples - d_current_prn_length_samples; //rounding error < 1 sample
// This tracking block aligns the Tracking_timestamp_secs with the start sample of the PRN, thus, Code_phase_secs=0
current_synchro_data.Code_phase_secs = 0;
current_synchro_data.Carrier_phase_rads = static_cast<double>(d_acc_carrier_phase_rad);
current_synchro_data.Carrier_Doppler_hz = static_cast<double>(d_carrier_doppler_hz);
current_synchro_data.CN0_dB_hz = static_cast<double>(d_CN0_SNV_dB_Hz);
current_synchro_data.Flag_valid_pseudorange = false;
*out[0] = current_synchro_data;
// ########## DEBUG OUTPUT
/*!
* \todo The stop timer has to be moved to the signal source!
*/
if (floor(d_sample_counter / d_fs_in) != d_last_seg)
{
d_last_seg = floor(d_sample_counter / d_fs_in);
if (d_channel == 0)
{
// debug: Second counter in channel 0
tmp_str_stream << "Current input signal time = " << d_last_seg << " [s]" << std::endl;
std::cout << tmp_str_stream.rdbuf();
LOG(INFO) << tmp_str_stream.rdbuf();
}
tmp_str_stream << "Tracking CH " << d_channel << ": Satellite " << Gnss_Satellite(systemName[sys], d_acquisition_gnss_synchro->PRN)
<< ", Doppler=" << d_carrier_doppler_hz << " [Hz] CN0 = " << d_CN0_SNV_dB_Hz << " [dB-Hz]" << std::endl;
//std::cout << tmp_str_stream.rdbuf() << std::flush;
LOG(INFO) << tmp_str_stream.rdbuf();
//if (d_channel == 0 || d_last_seg==5) d_carrier_lock_fail_counter=500; //DEBUG: force unlock!
}
}
else
{
// ########## DEBUG OUTPUT (TIME ONLY for channel 0 when tracking is disabled)
/*!
* \todo The stop timer has to be moved to the signal source!
*/
// stream to collect cout calls to improve thread safety
std::stringstream tmp_str_stream;
if (floor(d_sample_counter / d_fs_in) != d_last_seg)
{
d_last_seg = floor(d_sample_counter / d_fs_in);
if (d_channel == 0)
{
// debug: Second counter in channel 0
tmp_str_stream << "Current input signal time = " << d_last_seg << " [s]" << std::endl << std::flush;
std::cout << tmp_str_stream.rdbuf() << std::flush;
}
}
*d_Early = gr_complex(0,0);
*d_Prompt = gr_complex(0,0);
*d_Late = gr_complex(0,0);
Gnss_Synchro **out = (Gnss_Synchro **) &output_items[0]; //block output streams pointer
// GNSS_SYNCHRO OBJECT to interchange data between tracking->telemetry_decoder
d_acquisition_gnss_synchro->Flag_valid_pseudorange = false;
*out[0] = *d_acquisition_gnss_synchro;
}
if(d_dump)
{
// MULTIPLEXED FILE RECORDING - Record results to file
float prompt_I;
float prompt_Q;
float tmp_E, tmp_P, tmp_L;
float tmp_float;
double tmp_double;
prompt_I = (*d_Prompt).real();
prompt_Q = (*d_Prompt).imag();
tmp_E = std::abs<float>(*d_Early);
tmp_P = std::abs<float>(*d_Prompt);
tmp_L = std::abs<float>(*d_Late);
try
{
// EPR
d_dump_file.write((char*)&tmp_E, sizeof(float));
d_dump_file.write((char*)&tmp_P, sizeof(float));
d_dump_file.write((char*)&tmp_L, sizeof(float));
// PROMPT I and Q (to analyze navigation symbols)
d_dump_file.write((char*)&prompt_I, sizeof(float));
d_dump_file.write((char*)&prompt_Q, sizeof(float));
// PRN start sample stamp
//tmp_float=(float)d_sample_counter;
d_dump_file.write((char*)&d_sample_counter, sizeof(unsigned long int));
// accumulated carrier phase
tmp_float = d_acc_carrier_phase_rad;
d_dump_file.write((char*)&tmp_float, sizeof(float));
// carrier and code frequency
tmp_float = d_carrier_doppler_hz;
d_dump_file.write((char*)&tmp_float, sizeof(float));
tmp_float = d_code_freq_chips;
d_dump_file.write((char*)&tmp_float, sizeof(float));
//PLL commands
tmp_float = carr_error_hz;
d_dump_file.write((char*)&tmp_float, sizeof(float));
tmp_float = carr_error_filt_hz;
d_dump_file.write((char*)&tmp_float, sizeof(float));
//DLL commands
tmp_float = code_error_chips;
d_dump_file.write((char*)&tmp_float, sizeof(float));
tmp_float = code_error_filt_chips;
d_dump_file.write((char*)&tmp_float, sizeof(float));
// CN0 and carrier lock test
tmp_float = d_CN0_SNV_dB_Hz;
d_dump_file.write((char*)&tmp_float, sizeof(float));
tmp_float = d_carrier_lock_test;
d_dump_file.write((char*)&tmp_float, sizeof(float));
// AUX vars (for debug purposes)
tmp_float = d_rem_code_phase_samples;
d_dump_file.write((char*)&tmp_float, sizeof(float));
tmp_double = (double)(d_sample_counter + d_current_prn_length_samples);
d_dump_file.write((char*)&tmp_double, sizeof(double));
}
catch (std::ifstream::failure& e)
{
LOG(WARNING) << "Exception writing trk dump file " << e.what();
}
}
consume_each(d_current_prn_length_samples); // this is necesary in gr_block derivates
d_sample_counter += d_current_prn_length_samples; //count for the processed samples
if((noutput_items == 0) || (ninput_items[0] == 0))
{
LOG(WARNING) << "noutput_items = 0";
}
return 1; //output tracking result ALWAYS even in the case of d_enable_tracking==false
}
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel(unsigned int channel)
{
d_channel = channel;
LOG(INFO) << "Tracking Channel set to " << d_channel;
// ############# ENABLE DATA FILE LOG #################
if (d_dump == true)
{
if (d_dump_file.is_open() == false)
{
try
{
d_dump_filename.append(boost::lexical_cast<std::string>(d_channel));
d_dump_filename.append(".dat");
d_dump_file.exceptions (std::ifstream::failbit | std::ifstream::badbit);
d_dump_file.open(d_dump_filename.c_str(), std::ios::out | std::ios::binary);
LOG(INFO) << "Tracking dump enabled on channel " << d_channel << " Log file: " << d_dump_filename.c_str();
}
catch (std::ifstream::failure& e)
{
LOG(WARNING) << "channel " << d_channel << " Exception opening trk dump file " << e.what();
}
}
}
}
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_channel_queue(concurrent_queue<int> *channel_internal_queue)
{
d_channel_internal_queue = channel_internal_queue;
}
void Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc::set_gnss_synchro(Gnss_Synchro* p_gnss_synchro)
{
d_acquisition_gnss_synchro = p_gnss_synchro;
}

View File

@ -1,181 +0,0 @@
/*!
* \file gps_l1_ca_dll_pll_optim_tracking_cc.h
* \brief Implementation of a code DLL + carrier PLL tracking block
* \author Javier Arribas, 2012. jarribas(at)cttc.es
* GPS L1 TRACKING MODULE OPTIMIZED FOR SPEED:
* - Code Doppler is not compensated in the local replica
* Code DLL + carrier PLL according to the algorithms described in:
* [1] K.Borre, D.M.Akos, N.Bertelsen, P.Rinder, and S.H.Jensen,
* A Software-Defined GPS and Galileo Receiver. A Single-Frequency
* Approach, Birkha user, 2007
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H
#define GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H
#include <fstream>
#include <map>
#include <string>
#include <gnuradio/block.h>
#include <gnuradio/msg_queue.h>
#include "concurrent_queue.h"
#include "gnss_synchro.h"
#include "tracking_2nd_DLL_filter.h"
#include "tracking_2nd_PLL_filter.h"
#include "correlator.h"
class Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc;
typedef boost::shared_ptr<Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc> gps_l1_ca_dll_pll_optim_tracking_cc_sptr;
gps_l1_ca_dll_pll_optim_tracking_cc_sptr gps_l1_ca_dll_pll_make_optim_tracking_cc(long if_freq,
long fs_in,
unsigned int vector_length,
boost::shared_ptr<gr::msg_queue> queue,
bool dump,
std::string dump_filename,
float pll_bw_hz,
float dll_bw_hz,
float early_late_space_chips);
/*!
* \brief This class implements a DLL + PLL tracking loop block
*/
class Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc: public gr::block
{
public:
~Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc();
void set_channel(unsigned int channel);
void set_gnss_synchro(Gnss_Synchro* p_gnss_synchro);
void start_tracking();
void set_channel_queue(concurrent_queue<int> *channel_internal_queue);
int general_work (int noutput_items, gr_vector_int &ninput_items,
gr_vector_const_void_star &input_items, gr_vector_void_star &output_items);
void forecast (int noutput_items, gr_vector_int &ninput_items_required);
private:
friend gps_l1_ca_dll_pll_optim_tracking_cc_sptr
gps_l1_ca_dll_pll_make_optim_tracking_cc(long if_freq,
long fs_in, unsigned
int vector_length,
boost::shared_ptr<gr::msg_queue> queue,
bool dump,
std::string dump_filename,
float pll_bw_hz,
float dll_bw_hz,
float early_late_space_chips);
Gps_L1_Ca_Dll_Pll_Optim_Tracking_cc(long if_freq,
long fs_in, unsigned
int vector_length,
boost::shared_ptr<gr::msg_queue> queue,
bool dump,
std::string dump_filename,
float pll_bw_hz,
float dll_bw_hz,
float early_late_space_chips);
void update_local_code();
void update_local_carrier();
// tracking configuration vars
boost::shared_ptr<gr::msg_queue> d_queue;
concurrent_queue<int> *d_channel_internal_queue;
unsigned int d_vector_length;
bool d_dump;
Gnss_Synchro* d_acquisition_gnss_synchro;
unsigned int d_channel;
int d_last_seg;
long d_if_freq;
long d_fs_in;
double d_early_late_spc_chips;
gr_complex* d_ca_code;
gr_complex* d_early_code;
gr_complex* d_late_code;
gr_complex* d_prompt_code;
gr_complex* d_carr_sign;
gr_complex *d_Early;
gr_complex *d_Prompt;
gr_complex *d_Late;
// remaining code phase and carrier phase between tracking loops
double d_rem_code_phase_samples;
double d_rem_carr_phase_rad;
// PLL and DLL filter library
Tracking_2nd_DLL_filter d_code_loop_filter;
Tracking_2nd_PLL_filter d_carrier_loop_filter;
// acquisition
double d_acq_code_phase_samples;
double d_acq_carrier_doppler_hz;
// correlator
Correlator d_correlator;
// tracking vars
double d_code_freq_chips;
double d_carrier_doppler_hz;
double d_acc_carrier_phase_rad;
double d_code_phase_samples;
double d_acc_code_phase_secs;
//PRN period in samples
int d_current_prn_length_samples;
//processing samples counters
unsigned long int d_sample_counter;
unsigned long int d_acq_sample_stamp;
// CN0 estimation and lock detector
int d_cn0_estimation_counter;
gr_complex* d_Prompt_buffer;
double d_carrier_lock_test;
double d_CN0_SNV_dB_Hz;
double d_carrier_lock_threshold;
int d_carrier_lock_fail_counter;
// control vars
int d_gnuradio_forecast_samples;
bool d_enable_tracking;
bool d_pull_in;
// file dump
std::string d_dump_filename;
std::ofstream d_dump_file;
std::map<std::string, std::string> systemName;
std::string sys;
};
#endif //GNSS_SDR_GPS_L1_CA_DLL_PLL_OPTIM_TRACKING_CC_H

View File

@ -80,7 +80,6 @@
#include "galileo_e5a_noncoherent_iq_acquisition_caf.h"
#include "gps_l1_ca_dll_pll_tracking.h"
#include "gps_l1_ca_dll_pll_c_aid_tracking.h"
#include "gps_l1_ca_dll_pll_optim_tracking.h"
#include "gps_l1_ca_dll_fll_pll_tracking.h"
#include "gps_l1_ca_tcp_connector_tracking.h"
#include "galileo_e1_dll_pll_veml_tracking.h"
@ -1324,12 +1323,6 @@ std::unique_ptr<GNSSBlockInterface> GNSSBlockFactory::GetBlock(
out_streams, queue));
block = std::move(block_);
}
else if (implementation.compare("GPS_L1_CA_DLL_PLL_Optim_Tracking") == 0)
{
std::unique_ptr<GNSSBlockInterface> block_(new GpsL1CaDllPllOptimTracking(configuration.get(), role, in_streams,
out_streams, queue));
block = std::move(block_);
}
else if (implementation.compare("GPS_L1_CA_DLL_FLL_PLL_Tracking") == 0)
{
std::unique_ptr<GNSSBlockInterface> block_(new GpsL1CaDllFllPllTracking(configuration.get(), role, in_streams,
@ -1589,12 +1582,6 @@ std::unique_ptr<TrackingInterface> GNSSBlockFactory::GetTrkBlock(
out_streams, queue));
block = std::move(block_);
}
else if (implementation.compare("GPS_L1_CA_DLL_PLL_Optim_Tracking") == 0)
{
std::unique_ptr<TrackingInterface> block_(new GpsL1CaDllPllOptimTracking(configuration.get(), role, in_streams,
out_streams, queue));
block = std::move(block_);
}
else if (implementation.compare("GPS_L1_CA_DLL_FLL_PLL_Tracking") == 0)
{
std::unique_ptr<TrackingInterface> block_(new GpsL1CaDllFllPllTracking(configuration.get(), role, in_streams,