mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2024-11-04 17:16:26 +00:00
volk_gnsssdr_s32f_x2_update_local_carrier_32fc
Added new protokernel volk_gnsssdr_s32f_x2_update_local_carrier_32fc, with volk implementation 4x quicker than generic implementation.
This commit is contained in:
parent
da4288d502
commit
313cb1c544
@ -107,6 +107,9 @@ int main(int argc, char *argv[]) {
|
|||||||
//lv_32fc_t sfv = lv_cmake((float)1, (float)2);
|
//lv_32fc_t sfv = lv_cmake((float)1, (float)2);
|
||||||
//example: VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, sfv, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
//example: VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, sfv, 204602, 1000, &results, benchmark_mode, kernel_regex);
|
||||||
|
|
||||||
|
|
||||||
|
VOLK_PROFILE(volk_gnsssdr_s32f_x2_update_local_carrier_32fc, 1e-4, 0, 7, 1, &results, benchmark_mode, kernel_regex);
|
||||||
|
|
||||||
VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc, 1e-4, 0, 7, 1, &results, benchmark_mode, kernel_regex);
|
VOLK_PROFILE(volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc, 1e-4, 0, 7, 1, &results, benchmark_mode, kernel_regex);
|
||||||
|
|
||||||
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
|
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
|
||||||
|
@ -1,3 +1,37 @@
|
|||||||
|
/*!
|
||||||
|
* \file volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc
|
||||||
|
* \brief Volk protokernel: replaces the tracking function for update_local_code
|
||||||
|
* \authors <ul>
|
||||||
|
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* Volk protokernel that replaces the tracking function for update_local_code
|
||||||
|
*
|
||||||
|
* -------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||||
|
*
|
||||||
|
* GNSS-SDR is a software defined Global Navigation
|
||||||
|
* Satellite Systems receiver
|
||||||
|
*
|
||||||
|
* This file is part of GNSS-SDR.
|
||||||
|
*
|
||||||
|
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* at your option) any later version.
|
||||||
|
*
|
||||||
|
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* -------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H
|
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H
|
||||||
#define INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H
|
#define INCLUDED_volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_u_H
|
||||||
|
|
||||||
|
@ -0,0 +1,437 @@
|
|||||||
|
/*!
|
||||||
|
* \file volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc
|
||||||
|
* \brief Volk protokernel: replaces the tracking function for update_local_carrier. Algorithm by Julien Pommier, modified by Andrés Cecilia.
|
||||||
|
* \authors <ul>
|
||||||
|
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* Volk protokernel that replaces the tracking function for update_local_carrier. Algorithm by Julien Pommier, modified by Andrés Cecilia.
|
||||||
|
*
|
||||||
|
* -------------------------------------------------------------------------
|
||||||
|
* Copyright (C) 2007 Julien Pommier
|
||||||
|
*
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgment in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*
|
||||||
|
*(this is the zlib license)
|
||||||
|
* -------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
|
||||||
|
*
|
||||||
|
* GNSS-SDR is a software defined Global Navigation
|
||||||
|
* Satellite Systems receiver
|
||||||
|
*
|
||||||
|
* This file is part of GNSS-SDR.
|
||||||
|
*
|
||||||
|
* GNSS-SDR is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* at your option) any later version.
|
||||||
|
*
|
||||||
|
* GNSS-SDR is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* -------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_u_H
|
||||||
|
#define INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_u_H
|
||||||
|
|
||||||
|
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#ifdef LV_HAVE_SSE2
|
||||||
|
#include <emmintrin.h>
|
||||||
|
/*!
|
||||||
|
\brief Accumulates the values in the input buffer
|
||||||
|
\param result The accumulated result
|
||||||
|
\param inputBuffer The buffer of data to be accumulated
|
||||||
|
\param num_points The number of values in inputBuffer to be accumulated
|
||||||
|
*/
|
||||||
|
static inline void volk_gnsssdr_s32f_x2_update_local_carrier_32fc_u_sse2(lv_32fc_t* d_carr_sign, const float phase_rad_init, const float phase_step_rad, unsigned int num_points){
|
||||||
|
|
||||||
|
// float* pointer1 = (float*)&phase_rad_init;
|
||||||
|
// *pointer1 = 0;
|
||||||
|
// float* pointer2 = (float*)&phase_step_rad;
|
||||||
|
// *pointer2 = 0.5;
|
||||||
|
|
||||||
|
const unsigned int sse_iters = num_points / 4;
|
||||||
|
|
||||||
|
__m128 _ps_minus_cephes_DP1 = _mm_set1_ps(-0.78515625f);
|
||||||
|
__m128 _ps_minus_cephes_DP2 = _mm_set1_ps(-2.4187564849853515625e-4f);
|
||||||
|
__m128 _ps_minus_cephes_DP3 = _mm_set1_ps(-3.77489497744594108e-8f);
|
||||||
|
__m128 _ps_sign_mask = _mm_set1_ps(-0.f);
|
||||||
|
__m128i _pi32_1 = _mm_set1_epi32(1);
|
||||||
|
__m128i _pi32_inv1 = _mm_set1_epi32(~1);
|
||||||
|
__m128i _pi32_2 = _mm_set1_epi32(2);
|
||||||
|
__m128i _pi32_4 = _mm_set1_epi32(4);
|
||||||
|
__m128 _ps_cephes_FOPI = _mm_set1_ps(1.27323954473516f); // 4 / PI
|
||||||
|
__m128 _ps_sincof_p0 = _mm_set1_ps(-1.9515295891E-4f);
|
||||||
|
__m128 _ps_sincof_p1 = _mm_set1_ps( 8.3321608736E-3f);
|
||||||
|
__m128 _ps_sincof_p2 = _mm_set1_ps(-1.6666654611E-1f);
|
||||||
|
__m128 _ps_coscof_p0 = _mm_set1_ps( 2.443315711809948E-005f);
|
||||||
|
__m128 _ps_coscof_p1 = _mm_set1_ps(-1.388731625493765E-003f);
|
||||||
|
__m128 _ps_coscof_p2 = _mm_set1_ps( 4.166664568298827E-002f);
|
||||||
|
__m128 _ps_1 = _mm_set1_ps(1.f);
|
||||||
|
__m128 _ps_0p5 = _mm_set1_ps(0.5f);
|
||||||
|
|
||||||
|
__m128 phase_step_rad_array = _mm_set1_ps(4*phase_step_rad);
|
||||||
|
|
||||||
|
__m128 phase_rad_array, x, s, c, swap_sign_bit_sin, sign_bit_cos, poly_mask, z, tmp, y, y2, ysin1, ysin2;
|
||||||
|
__m128 xmm1, xmm2, xmm3, sign_bit_sin;
|
||||||
|
__m128i emm0, emm2, emm4;
|
||||||
|
__VOLK_ATTR_ALIGNED(16) float sin_value[4];
|
||||||
|
__VOLK_ATTR_ALIGNED(16) float cos_value[4];
|
||||||
|
|
||||||
|
phase_rad_array = _mm_set_ps (phase_rad_init+3*phase_step_rad, phase_rad_init+2*phase_step_rad, phase_rad_init+phase_step_rad, phase_rad_init);
|
||||||
|
|
||||||
|
for(int i = 0; i < sse_iters; i++)
|
||||||
|
{
|
||||||
|
x = phase_rad_array;
|
||||||
|
|
||||||
|
/* extract the sign bit (upper one) */
|
||||||
|
sign_bit_sin = _mm_and_ps(x, _ps_sign_mask);
|
||||||
|
|
||||||
|
/* take the absolute value */
|
||||||
|
x = _mm_xor_ps(x, sign_bit_sin);
|
||||||
|
|
||||||
|
/* scale by 4/Pi */
|
||||||
|
y = _mm_mul_ps(x, _ps_cephes_FOPI);
|
||||||
|
|
||||||
|
/* store the integer part of y in emm2 */
|
||||||
|
emm2 = _mm_cvttps_epi32(y);
|
||||||
|
|
||||||
|
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||||
|
emm2 = _mm_add_epi32(emm2, _pi32_1);
|
||||||
|
emm2 = _mm_and_si128(emm2, _pi32_inv1);
|
||||||
|
y = _mm_cvtepi32_ps(emm2);
|
||||||
|
|
||||||
|
emm4 = emm2;
|
||||||
|
|
||||||
|
/* get the swap sign flag for the sine */
|
||||||
|
emm0 = _mm_and_si128(emm2, _pi32_4);
|
||||||
|
emm0 = _mm_slli_epi32(emm0, 29);
|
||||||
|
swap_sign_bit_sin = _mm_castsi128_ps(emm0);
|
||||||
|
|
||||||
|
/* get the polynom selection mask for the sine*/
|
||||||
|
emm2 = _mm_and_si128(emm2, _pi32_2);
|
||||||
|
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
|
||||||
|
poly_mask = _mm_castsi128_ps(emm2);
|
||||||
|
|
||||||
|
/* The magic pass: "Extended precision modular arithmetic"
|
||||||
|
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
||||||
|
xmm1 = _mm_mul_ps(y, _ps_minus_cephes_DP1);
|
||||||
|
xmm2 = _mm_mul_ps(y, _ps_minus_cephes_DP2);
|
||||||
|
xmm3 = _mm_mul_ps(y, _ps_minus_cephes_DP3);
|
||||||
|
x = _mm_add_ps(_mm_add_ps(x, xmm1), _mm_add_ps(xmm2, xmm3));
|
||||||
|
|
||||||
|
emm4 = _mm_sub_epi32(emm4, _pi32_2);
|
||||||
|
emm4 = _mm_andnot_si128(emm4, _pi32_4);
|
||||||
|
emm4 = _mm_slli_epi32(emm4, 29);
|
||||||
|
sign_bit_cos = _mm_castsi128_ps(emm4);
|
||||||
|
|
||||||
|
sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
|
||||||
|
|
||||||
|
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
||||||
|
z = _mm_mul_ps(x,x);
|
||||||
|
y = _ps_coscof_p0;
|
||||||
|
y = _mm_mul_ps(y, z);
|
||||||
|
y = _mm_add_ps(y, _ps_coscof_p1);
|
||||||
|
y = _mm_mul_ps(y, z);
|
||||||
|
y = _mm_add_ps(y, _ps_coscof_p2);
|
||||||
|
y = _mm_mul_ps(y, _mm_mul_ps(z, z));
|
||||||
|
tmp = _mm_mul_ps(z, _ps_0p5);
|
||||||
|
y = _mm_sub_ps(y, tmp);
|
||||||
|
y = _mm_add_ps(y, _ps_1);
|
||||||
|
|
||||||
|
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
||||||
|
y2 = _ps_sincof_p0;
|
||||||
|
y2 = _mm_mul_ps(y2, z);
|
||||||
|
y2 = _mm_add_ps(y2, _ps_sincof_p1);
|
||||||
|
y2 = _mm_mul_ps(y2, z);
|
||||||
|
y2 = _mm_add_ps(y2, _ps_sincof_p2);
|
||||||
|
y2 = _mm_mul_ps(y2, _mm_mul_ps(z, x));
|
||||||
|
y2 = _mm_add_ps(y2, x);
|
||||||
|
|
||||||
|
/* select the correct result from the two polynoms */
|
||||||
|
xmm3 = poly_mask;
|
||||||
|
ysin2 = _mm_and_ps(xmm3, y2);
|
||||||
|
ysin1 = _mm_andnot_ps(xmm3, y);
|
||||||
|
y2 = _mm_sub_ps(y2,ysin2);
|
||||||
|
y = _mm_sub_ps(y, ysin1);
|
||||||
|
|
||||||
|
xmm1 = _mm_add_ps(ysin1,ysin2);
|
||||||
|
xmm2 = _mm_add_ps(y,y2);
|
||||||
|
|
||||||
|
/* update the sign */
|
||||||
|
s = _mm_xor_ps(xmm1, sign_bit_sin);
|
||||||
|
c = _mm_xor_ps(xmm2, sign_bit_cos);
|
||||||
|
|
||||||
|
//GNSS-SDR needs to return -sin
|
||||||
|
s = _mm_xor_ps(s, _ps_sign_mask);
|
||||||
|
|
||||||
|
_mm_storeu_ps ((float*)sin_value, s);
|
||||||
|
_mm_storeu_ps ((float*)cos_value, c);
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
d_carr_sign[i] = lv_cmake(cos_value[i], sin_value[i]);
|
||||||
|
}
|
||||||
|
d_carr_sign += 4;
|
||||||
|
|
||||||
|
phase_rad_array = _mm_add_ps (phase_rad_array, phase_step_rad_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num_points%4!=0)
|
||||||
|
{
|
||||||
|
__VOLK_ATTR_ALIGNED(16) float phase_rad_store[4];
|
||||||
|
_mm_storeu_si128 ((__m128i*)phase_rad_store, phase_rad_array);
|
||||||
|
|
||||||
|
float phase_rad = phase_rad_store[0];
|
||||||
|
|
||||||
|
for(int i = 0; i < num_points%4; i++)
|
||||||
|
{
|
||||||
|
*d_carr_sign = lv_cmake(cos(phase_rad), -sin(phase_rad));
|
||||||
|
d_carr_sign++;
|
||||||
|
phase_rad += phase_step_rad;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* LV_HAVE_SSE2 */
|
||||||
|
|
||||||
|
#ifdef LV_HAVE_GENERIC
|
||||||
|
/*!
|
||||||
|
\brief Accumulates the values in the input buffer
|
||||||
|
\param result The accumulated result
|
||||||
|
\param inputBuffer The buffer of data to be accumulated
|
||||||
|
\param num_points The number of values in inputBuffer to be accumulated
|
||||||
|
*/
|
||||||
|
static inline void volk_gnsssdr_s32f_x2_update_local_carrier_32fc_generic(lv_32fc_t* d_carr_sign, const float phase_rad_init, const float phase_step_rad, unsigned int num_points){
|
||||||
|
|
||||||
|
// float* pointer1 = (float*)&phase_rad_init;
|
||||||
|
// *pointer1 = 0;
|
||||||
|
// float* pointer2 = (float*)&phase_step_rad;
|
||||||
|
// *pointer2 = 0.5;
|
||||||
|
|
||||||
|
float phase_rad = phase_rad_init;
|
||||||
|
for(int i = 0; i < num_points; i++)
|
||||||
|
{
|
||||||
|
*d_carr_sign = lv_cmake(cos(phase_rad), -sin(phase_rad));
|
||||||
|
d_carr_sign++;
|
||||||
|
phase_rad += phase_step_rad;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* LV_HAVE_GENERIC */
|
||||||
|
#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_u_H */
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_a_H
|
||||||
|
#define INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_a_H
|
||||||
|
|
||||||
|
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#ifdef LV_HAVE_SSE2
|
||||||
|
#include <emmintrin.h>
|
||||||
|
/*!
|
||||||
|
\brief Accumulates the values in the input buffer
|
||||||
|
\param result The accumulated result
|
||||||
|
\param inputBuffer The buffer of data to be accumulated
|
||||||
|
\param num_points The number of values in inputBuffer to be accumulated
|
||||||
|
*/
|
||||||
|
static inline void volk_gnsssdr_s32f_x2_update_local_carrier_32fc_a_sse2(lv_32fc_t* d_carr_sign, const float phase_rad_init, const float phase_step_rad, unsigned int num_points){
|
||||||
|
|
||||||
|
// float* pointer1 = (float*)&phase_rad_init;
|
||||||
|
// *pointer1 = 0;
|
||||||
|
// float* pointer2 = (float*)&phase_step_rad;
|
||||||
|
// *pointer2 = 0.5;
|
||||||
|
|
||||||
|
const unsigned int sse_iters = num_points / 4;
|
||||||
|
|
||||||
|
__m128 _ps_minus_cephes_DP1 = _mm_set1_ps(-0.78515625f);
|
||||||
|
__m128 _ps_minus_cephes_DP2 = _mm_set1_ps(-2.4187564849853515625e-4f);
|
||||||
|
__m128 _ps_minus_cephes_DP3 = _mm_set1_ps(-3.77489497744594108e-8f);
|
||||||
|
__m128 _ps_sign_mask = _mm_set1_ps(-0.f);
|
||||||
|
__m128i _pi32_1 = _mm_set1_epi32(1);
|
||||||
|
__m128i _pi32_inv1 = _mm_set1_epi32(~1);
|
||||||
|
__m128i _pi32_2 = _mm_set1_epi32(2);
|
||||||
|
__m128i _pi32_4 = _mm_set1_epi32(4);
|
||||||
|
__m128 _ps_cephes_FOPI = _mm_set1_ps(1.27323954473516f); // 4 / PI
|
||||||
|
__m128 _ps_sincof_p0 = _mm_set1_ps(-1.9515295891E-4f);
|
||||||
|
__m128 _ps_sincof_p1 = _mm_set1_ps( 8.3321608736E-3f);
|
||||||
|
__m128 _ps_sincof_p2 = _mm_set1_ps(-1.6666654611E-1f);
|
||||||
|
__m128 _ps_coscof_p0 = _mm_set1_ps( 2.443315711809948E-005f);
|
||||||
|
__m128 _ps_coscof_p1 = _mm_set1_ps(-1.388731625493765E-003f);
|
||||||
|
__m128 _ps_coscof_p2 = _mm_set1_ps( 4.166664568298827E-002f);
|
||||||
|
__m128 _ps_1 = _mm_set1_ps(1.f);
|
||||||
|
__m128 _ps_0p5 = _mm_set1_ps(0.5f);
|
||||||
|
|
||||||
|
__m128 phase_step_rad_array = _mm_set1_ps(4*phase_step_rad);
|
||||||
|
|
||||||
|
__m128 phase_rad_array, x, s, c, swap_sign_bit_sin, sign_bit_cos, poly_mask, z, tmp, y, y2, ysin1, ysin2;
|
||||||
|
__m128 xmm1, xmm2, xmm3, sign_bit_sin;
|
||||||
|
__m128i emm0, emm2, emm4;
|
||||||
|
__VOLK_ATTR_ALIGNED(16) float sin_value[4];
|
||||||
|
__VOLK_ATTR_ALIGNED(16) float cos_value[4];
|
||||||
|
|
||||||
|
phase_rad_array = _mm_set_ps (phase_rad_init+3*phase_step_rad, phase_rad_init+2*phase_step_rad, phase_rad_init+phase_step_rad, phase_rad_init);
|
||||||
|
|
||||||
|
for(int i = 0; i < sse_iters; i++)
|
||||||
|
{
|
||||||
|
x = phase_rad_array;
|
||||||
|
|
||||||
|
/* extract the sign bit (upper one) */
|
||||||
|
sign_bit_sin = _mm_and_ps(x, _ps_sign_mask);
|
||||||
|
|
||||||
|
/* take the absolute value */
|
||||||
|
x = _mm_xor_ps(x, sign_bit_sin);
|
||||||
|
|
||||||
|
/* scale by 4/Pi */
|
||||||
|
y = _mm_mul_ps(x, _ps_cephes_FOPI);
|
||||||
|
|
||||||
|
/* store the integer part of y in emm2 */
|
||||||
|
emm2 = _mm_cvttps_epi32(y);
|
||||||
|
|
||||||
|
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||||
|
emm2 = _mm_add_epi32(emm2, _pi32_1);
|
||||||
|
emm2 = _mm_and_si128(emm2, _pi32_inv1);
|
||||||
|
y = _mm_cvtepi32_ps(emm2);
|
||||||
|
|
||||||
|
emm4 = emm2;
|
||||||
|
|
||||||
|
/* get the swap sign flag for the sine */
|
||||||
|
emm0 = _mm_and_si128(emm2, _pi32_4);
|
||||||
|
emm0 = _mm_slli_epi32(emm0, 29);
|
||||||
|
swap_sign_bit_sin = _mm_castsi128_ps(emm0);
|
||||||
|
|
||||||
|
/* get the polynom selection mask for the sine*/
|
||||||
|
emm2 = _mm_and_si128(emm2, _pi32_2);
|
||||||
|
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
|
||||||
|
poly_mask = _mm_castsi128_ps(emm2);
|
||||||
|
|
||||||
|
/* The magic pass: "Extended precision modular arithmetic"
|
||||||
|
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
||||||
|
xmm1 = _mm_mul_ps(y, _ps_minus_cephes_DP1);
|
||||||
|
xmm2 = _mm_mul_ps(y, _ps_minus_cephes_DP2);
|
||||||
|
xmm3 = _mm_mul_ps(y, _ps_minus_cephes_DP3);
|
||||||
|
x = _mm_add_ps(_mm_add_ps(x, xmm1), _mm_add_ps(xmm2, xmm3));
|
||||||
|
|
||||||
|
emm4 = _mm_sub_epi32(emm4, _pi32_2);
|
||||||
|
emm4 = _mm_andnot_si128(emm4, _pi32_4);
|
||||||
|
emm4 = _mm_slli_epi32(emm4, 29);
|
||||||
|
sign_bit_cos = _mm_castsi128_ps(emm4);
|
||||||
|
|
||||||
|
sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
|
||||||
|
|
||||||
|
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
||||||
|
z = _mm_mul_ps(x,x);
|
||||||
|
y = _ps_coscof_p0;
|
||||||
|
y = _mm_mul_ps(y, z);
|
||||||
|
y = _mm_add_ps(y, _ps_coscof_p1);
|
||||||
|
y = _mm_mul_ps(y, z);
|
||||||
|
y = _mm_add_ps(y, _ps_coscof_p2);
|
||||||
|
y = _mm_mul_ps(y, _mm_mul_ps(z, z));
|
||||||
|
tmp = _mm_mul_ps(z, _ps_0p5);
|
||||||
|
y = _mm_sub_ps(y, tmp);
|
||||||
|
y = _mm_add_ps(y, _ps_1);
|
||||||
|
|
||||||
|
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
||||||
|
y2 = _ps_sincof_p0;
|
||||||
|
y2 = _mm_mul_ps(y2, z);
|
||||||
|
y2 = _mm_add_ps(y2, _ps_sincof_p1);
|
||||||
|
y2 = _mm_mul_ps(y2, z);
|
||||||
|
y2 = _mm_add_ps(y2, _ps_sincof_p2);
|
||||||
|
y2 = _mm_mul_ps(y2, _mm_mul_ps(z, x));
|
||||||
|
y2 = _mm_add_ps(y2, x);
|
||||||
|
|
||||||
|
/* select the correct result from the two polynoms */
|
||||||
|
xmm3 = poly_mask;
|
||||||
|
ysin2 = _mm_and_ps(xmm3, y2);
|
||||||
|
ysin1 = _mm_andnot_ps(xmm3, y);
|
||||||
|
y2 = _mm_sub_ps(y2,ysin2);
|
||||||
|
y = _mm_sub_ps(y, ysin1);
|
||||||
|
|
||||||
|
xmm1 = _mm_add_ps(ysin1,ysin2);
|
||||||
|
xmm2 = _mm_add_ps(y,y2);
|
||||||
|
|
||||||
|
/* update the sign */
|
||||||
|
s = _mm_xor_ps(xmm1, sign_bit_sin);
|
||||||
|
c = _mm_xor_ps(xmm2, sign_bit_cos);
|
||||||
|
|
||||||
|
//GNSS-SDR needs to return -sin
|
||||||
|
s = _mm_xor_ps(s, _ps_sign_mask);
|
||||||
|
|
||||||
|
_mm_store_ps ((float*)sin_value, s);
|
||||||
|
_mm_store_ps ((float*)cos_value, c);
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
d_carr_sign[i] = lv_cmake(cos_value[i], sin_value[i]);
|
||||||
|
}
|
||||||
|
d_carr_sign += 4;
|
||||||
|
|
||||||
|
phase_rad_array = _mm_add_ps (phase_rad_array, phase_step_rad_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (num_points%4!=0)
|
||||||
|
{
|
||||||
|
__VOLK_ATTR_ALIGNED(16) float phase_rad_store[4];
|
||||||
|
_mm_store_si128 ((__m128i*)phase_rad_store, phase_rad_array);
|
||||||
|
|
||||||
|
float phase_rad = phase_rad_store[0];
|
||||||
|
|
||||||
|
for(int i = 0; i < num_points%4; i++)
|
||||||
|
{
|
||||||
|
*d_carr_sign = lv_cmake(cos(phase_rad), -sin(phase_rad));
|
||||||
|
d_carr_sign++;
|
||||||
|
phase_rad += phase_step_rad;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* LV_HAVE_SSE2 */
|
||||||
|
|
||||||
|
#ifdef LV_HAVE_GENERIC
|
||||||
|
/*!
|
||||||
|
\brief Accumulates the values in the input buffer
|
||||||
|
\param result The accumulated result
|
||||||
|
\param inputBuffer The buffer of data to be accumulated
|
||||||
|
\param num_points The number of values in inputBuffer to be accumulated
|
||||||
|
*/
|
||||||
|
static inline void volk_gnsssdr_s32f_x2_update_local_carrier_32fc_a_generic(lv_32fc_t* d_carr_sign, const float phase_rad_init, const float phase_step_rad, unsigned int num_points){
|
||||||
|
|
||||||
|
// float* pointer1 = (float*)&phase_rad_init;
|
||||||
|
// *pointer1 = 0;
|
||||||
|
// float* pointer2 = (float*)&phase_step_rad;
|
||||||
|
// *pointer2 = 0.5;
|
||||||
|
|
||||||
|
float phase_rad = phase_rad_init;
|
||||||
|
for(int i = 0; i < num_points; i++)
|
||||||
|
{
|
||||||
|
*d_carr_sign = lv_cmake(cos(phase_rad), -sin(phase_rad));
|
||||||
|
d_carr_sign++;
|
||||||
|
phase_rad += phase_step_rad;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* LV_HAVE_GENERIC */
|
||||||
|
#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_x2_update_local_carrier_32fc_a_H */
|
||||||
|
|
@ -68,6 +68,9 @@ VOLK_RUN_TESTS(volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 20462, 1);
|
|||||||
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x7_cw_vepl_corr_TEST_32fc_x5, 1e-4, 0, 20462, 1);
|
VOLK_RUN_TESTS(volk_gnsssdr_8ic_x7_cw_vepl_corr_TEST_32fc_x5, 1e-4, 0, 20462, 1);
|
||||||
|
|
||||||
VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc, 1e-4, 0, 20462, 1);
|
VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc, 1e-4, 0, 20462, 1);
|
||||||
|
VOLK_RUN_TESTS(volk_gnsssdr_s32f_x2_update_local_carrier_32fc, 1e-4, 0, 20462, 1);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -272,7 +272,7 @@ void galileo_e1_dll_pll_veml_tracking_cc::update_local_code()
|
|||||||
|
|
||||||
epl_loop_length_samples = d_current_prn_length_samples + very_early_late_spc_samples*2;
|
epl_loop_length_samples = d_current_prn_length_samples + very_early_late_spc_samples*2;
|
||||||
|
|
||||||
volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_manual(d_very_early_code, (float) d_very_early_late_spc_chips, (float) code_length_half_chips, (float) code_phase_step_half_chips, (float) tcode_half_chips, d_ca_code, epl_loop_length_samples, "generic");
|
//volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_manual(d_very_early_code, (float) d_very_early_late_spc_chips, (float) code_length_half_chips, (float) code_phase_step_half_chips, (float) tcode_half_chips, d_ca_code, epl_loop_length_samples, "generic");
|
||||||
|
|
||||||
volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_manual(d_very_early_code, (float) d_very_early_late_spc_chips, (float) code_length_half_chips, (float) code_phase_step_half_chips, (float) tcode_half_chips, d_ca_code, epl_loop_length_samples, "u_sse4_1");
|
volk_gnsssdr_32fc_s32f_x4_update_local_code_32fc_manual(d_very_early_code, (float) d_very_early_late_spc_chips, (float) code_length_half_chips, (float) code_phase_step_half_chips, (float) tcode_half_chips, d_ca_code, epl_loop_length_samples, "u_sse4_1");
|
||||||
|
|
||||||
@ -297,11 +297,16 @@ void galileo_e1_dll_pll_veml_tracking_cc::update_local_carrier()
|
|||||||
phase_step_rad = (float)GPS_TWO_PI*d_carrier_doppler_hz / (float)d_fs_in;
|
phase_step_rad = (float)GPS_TWO_PI*d_carrier_doppler_hz / (float)d_fs_in;
|
||||||
// Initialize the carrier phase with the remanent carrier phase of the K-2 loop
|
// Initialize the carrier phase with the remanent carrier phase of the K-2 loop
|
||||||
phase_rad = d_rem_carr_phase_rad;
|
phase_rad = d_rem_carr_phase_rad;
|
||||||
for(int i = 0; i < d_current_prn_length_samples; i++)
|
|
||||||
{
|
//volk_gnsssdr_s32f_x2_update_local_carrier_32fc_manual(d_carr_sign, phase_rad, phase_step_rad, d_current_prn_length_samples, "generic");
|
||||||
d_carr_sign[i] = gr_complex(cos(phase_rad), -sin(phase_rad));
|
|
||||||
phase_rad += phase_step_rad;
|
volk_gnsssdr_s32f_x2_update_local_carrier_32fc_manual(d_carr_sign, phase_rad, phase_step_rad, d_current_prn_length_samples, "u_sse2");
|
||||||
}
|
|
||||||
|
// for(int i = 0; i < d_current_prn_length_samples; i++)
|
||||||
|
// {
|
||||||
|
// d_carr_sign[i] = gr_complex(cos(phase_rad), -sin(phase_rad));
|
||||||
|
// phase_rad += phase_step_rad;
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
galileo_e1_dll_pll_veml_tracking_cc::~galileo_e1_dll_pll_veml_tracking_cc()
|
galileo_e1_dll_pll_veml_tracking_cc::~galileo_e1_dll_pll_veml_tracking_cc()
|
||||||
|
Loading…
Reference in New Issue
Block a user