1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-12-14 04:00:34 +00:00

Modified tracking in order to test the protokernel

Modified tracking in order to test the new 8 bits protokernels’
performance.
This commit is contained in:
Andrés Cecilia Luque 2014-09-27 01:44:49 +02:00
parent 6aa338e582
commit b2dab27cdb
5 changed files with 258 additions and 20 deletions

View File

@ -106,25 +106,27 @@ int main(int argc, char *argv[]) {
//GNSS-SDR PROTO-KERNELS
//lv_32fc_t sfv = lv_cmake((float)1, (float)2);
//example: VOLK_PROFILE(volk_gnsssdr_8ic_s8ic_multiply_8ic, 1e-4, sfv, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_convert_8ic, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_convert_8ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_s32f_convert_8ic, 1e-4, 5, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_TEST_32fc_x5, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_TEST_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_TEST_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_x7_cw_vepl_corr_32fc_x5, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16ic_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_x5_cw_epl_corr_32fc_x3, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_convert_16ic, 1e-4, 0, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32fc_convert_16ic, 1e-4, 0, 16000, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32f_accumulator_s32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
/*VOLK_PROFILE(volk_gnsssdr_32f_accumulator_s32f, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8i_accumulator_s8i, 1e-4, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32f_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_8i_index_max_16u, 3, 0, 204602, 5000, &results, benchmark_mode, kernel_regex);
@ -144,7 +146,7 @@ int main(int argc, char *argv[]) {
VOLK_PROFILE(volk_gnsssdr_8u_x2_multiply_8u, 1e-4, 0, 204602, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_64f_accumulator_64f, 1e-4, 0, 16000, 1000, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_32f_s32f_convert_16i, 1e-4, 1, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16i_s32f_convert_32f, 1e-4, 1, 204602, 250, &results, benchmark_mode, kernel_regex);
VOLK_PROFILE(volk_gnsssdr_16i_s32f_convert_32f, 1e-4, 1, 204602, 250, &results, benchmark_mode, kernel_regex);*/
if(store_results) {
char path[1024];
volk_gnsssdr_get_config_path(path);

View File

@ -0,0 +1,231 @@
/*!
* \file volk_gnsssdr_32fc_s32f_convert_8ic.h
* \brief Volk protokernel: converts float32 complex values to 8 integer complex values taking care of overflow
* \authors <ul>
* <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
* </ul>
*
* -------------------------------------------------------------------------
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* GNSS-SDR is a software defined Global Navigation
* Satellite Systems receiver
*
* This file is part of GNSS-SDR.
*
* GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H
#define INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_s32f_convert_8ic_u_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
const unsigned int sse_iters = num_points/8;
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
__m128 invScalar = _mm_set_ps1(1.0/scalar);
float min_val = -128;
float max_val = 127;
__m128 inputVal1, inputVal2, inputVal3, inputVal4;
__m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
__m128i int8InputVal;
__m128 ret1, ret2, ret3, ret4;
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
for(unsigned int i = 0;i < sse_iters; i++){
inputVal1 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal3 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal4 = _mm_loadu_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal1 = _mm_mul_ps(inputVal1, invScalar);
inputVal2 = _mm_mul_ps(inputVal2, invScalar);
inputVal3 = _mm_mul_ps(inputVal3, invScalar);
inputVal4 = _mm_mul_ps(inputVal4, invScalar);
// Clip
ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
intInputVal3 = _mm_cvtps_epi32(ret3);
intInputVal4 = _mm_cvtps_epi32(ret4);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
_mm_storeu_si128((__m128i*)outputVectorPtr, int8InputVal);
outputVectorPtr += 16;
}
float scaled = 0;
for(unsigned int i = 0; i < (num_points%4)*4; i++){
scaled = inputVectorPtr[i]/scalar;
if(scaled > max_val)
scaled = max_val;
else if(scaled < min_val)
scaled = min_val;
outputVectorPtr[i] = (int8_t)rintf(scaled);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_s32f_convert_8ic_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
float scaled = 0;
float min_val = -128;
float max_val = 127;
for(unsigned int i = 0; i < num_points*2; i++){
scaled = (inputVectorPtr[i])/scalar;
if(scaled > max_val)
scaled = max_val;
else if(scaled < min_val)
scaled = min_val;
outputVectorPtr[i] = (int8_t)rintf(scaled);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H
#define INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H
#include <volk/volk_common.h>
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_s32f_convert_8ic_a_sse2(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
const unsigned int sse_iters = num_points/8;
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
__m128 invScalar = _mm_set_ps1(1.0/scalar);
float min_val = -128;
float max_val = 127;
__m128 inputVal1, inputVal2, inputVal3, inputVal4;
__m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
__m128i int8InputVal;
__m128 ret1, ret2, ret3, ret4;
__m128 vmin_val = _mm_set_ps1(min_val);
__m128 vmax_val = _mm_set_ps1(max_val);
for(unsigned int i = 0;i < sse_iters; i++){
inputVal1 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal2 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal3 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal4 = _mm_load_ps((float*)inputVectorPtr); inputVectorPtr += 4;
inputVal1 = _mm_mul_ps(inputVal1, invScalar);
inputVal2 = _mm_mul_ps(inputVal2, invScalar);
inputVal3 = _mm_mul_ps(inputVal3, invScalar);
inputVal4 = _mm_mul_ps(inputVal4, invScalar);
// Clip
ret1 = _mm_max_ps(_mm_min_ps(inputVal1, vmax_val), vmin_val);
ret2 = _mm_max_ps(_mm_min_ps(inputVal2, vmax_val), vmin_val);
ret3 = _mm_max_ps(_mm_min_ps(inputVal3, vmax_val), vmin_val);
ret4 = _mm_max_ps(_mm_min_ps(inputVal4, vmax_val), vmin_val);
intInputVal1 = _mm_cvtps_epi32(ret1);
intInputVal2 = _mm_cvtps_epi32(ret2);
intInputVal3 = _mm_cvtps_epi32(ret3);
intInputVal4 = _mm_cvtps_epi32(ret4);
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
intInputVal2 = _mm_packs_epi32(intInputVal3, intInputVal4);
int8InputVal = _mm_packs_epi16(intInputVal1, intInputVal2);
_mm_store_si128((__m128i*)outputVectorPtr, int8InputVal);
outputVectorPtr += 16;
}
float scaled = 0;
for(unsigned int i = 0; i < (num_points%4)*4; i++){
scaled = inputVectorPtr[i]/scalar;
if(scaled > max_val)
scaled = max_val;
else if(scaled < min_val)
scaled = min_val;
outputVectorPtr[i] = (int8_t)rintf(scaled);
}
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_s32f_convert_8ic_a_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, const float scalar, unsigned int num_points){
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
float scaled = 0;
float min_val = -128;
float max_val = 127;
for(unsigned int i = 0; i < num_points*2; i++){
scaled = inputVectorPtr[i]/scalar;
if(scaled > max_val)
scaled = max_val;
else if(scaled < min_val)
scaled = min_val;
outputVectorPtr[i] = (int8_t)rintf(scaled);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_s32f_convert_8ic_a_H */

View File

@ -50,6 +50,7 @@ VOLK_RUN_TESTS(volk_gnsssdr_8i_max_s8i, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_64f_accumulator_64f, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_convert_16ic, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_s32f_convert_8ic, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_32fc_convert_8ic, 3, 0, 20462, 1);
VOLK_RUN_TESTS(volk_gnsssdr_16i_s32f_convert_32f, 3, 0, 20462, 1);

View File

@ -328,7 +328,7 @@ galileo_e1_dll_pll_veml_tracking_cc::~galileo_e1_dll_pll_veml_tracking_cc()
volk_free(d_late_code8);
volk_free(d_very_late_code8);
volk_free(d_carr_sign8);
volk_free(in16);
volk_free(in8);
delete[] d_ca_code;
delete[] d_Prompt_buffer;
@ -376,7 +376,7 @@ int galileo_e1_dll_pll_veml_tracking_cc::general_work (int noutput_items,gr_vect
update_local_code();
update_local_carrier();
// perform carrier wipe-off and compute Very Early, Early, Prompt, Late and Very Late correlation
//perform carrier wipe-off and compute Very Early, Early, Prompt, Late and Very Late correlation
d_correlator.Carrier_wipeoff_and_VEPL_volk(d_current_prn_length_samples,
in,
d_carr_sign,
@ -402,17 +402,21 @@ int galileo_e1_dll_pll_veml_tracking_cc::general_work (int noutput_items,gr_vect
volk_gnsssdr_32fc_convert_16ic(in16, in, d_current_prn_length_samples);
volk_gnsssdr_32fc_convert_16ic(d_carr_sign16, d_carr_sign, d_current_prn_length_samples);
volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5(d_Very_Early, d_Early, d_Prompt, d_Late, d_Very_Late, in16, d_carr_sign16, d_very_early_code16, d_early_code16, d_prompt_code16, d_late_code16, d_very_late_code16, d_current_prn_length_samples);
volk_gnsssdr_32fc_convert_8ic(d_very_early_code8, d_very_early_code, d_current_prn_length_samples);
volk_gnsssdr_32fc_convert_8ic(d_early_code8, d_early_code, d_current_prn_length_samples);
volk_gnsssdr_32fc_convert_8ic(d_prompt_code8, d_prompt_code, d_current_prn_length_samples);
volk_gnsssdr_32fc_convert_8ic(d_late_code8, d_late_code, d_current_prn_length_samples);
volk_gnsssdr_32fc_convert_8ic(d_very_late_code8, d_very_late_code, d_current_prn_length_samples);
volk_gnsssdr_32fc_convert_8ic(in8, in, d_current_prn_length_samples);
volk_gnsssdr_32fc_convert_8ic(d_carr_sign8, d_carr_sign, d_current_prn_length_samples);
volk_gnsssdr_16ic_x7_cw_vepl_corr_32fc_x5(d_Very_Early, d_Early, d_Prompt, d_Late, d_Very_Late, in16, d_carr_sign16, d_very_early_code16, d_early_code16, d_prompt_code16, d_late_code16, d_very_late_code16, d_current_prn_length_samples);
volk_gnsssdr_32fc_s32f_convert_8ic(in8, in, 4, d_current_prn_length_samples);
volk_gnsssdr_8ic_x7_cw_vepl_corr_32fc_x5(d_Very_Early, d_Early, d_Prompt, d_Late, d_Very_Late, in8, d_carr_sign8, d_very_early_code8, d_early_code8, d_prompt_code8, d_late_code8, d_very_late_code8, d_current_prn_length_samples);
volk_gnsssdr_8ic_x7_cw_vepl_corr_unsafe_32fc_x5(d_Very_Early, d_Early, d_Prompt, d_Late, d_Very_Late, in8, d_carr_sign8, d_very_early_code8, d_early_code8, d_prompt_code8, d_late_code8, d_very_late_code8, d_current_prn_length_samples);
volk_gnsssdr_8ic_x7_cw_vepl_corr_safe_32fc_x5(d_Very_Early, d_Early, d_Prompt, d_Late, d_Very_Late, in8, d_carr_sign8, d_very_early_code8, d_early_code8, d_prompt_code8, d_late_code8, d_very_late_code8, d_current_prn_length_samples);
// ################## PLL ##########################################################