1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2025-01-27 17:34:53 +00:00
This commit is contained in:
Carles Fernandez 2014-11-23 21:34:41 +01:00
commit b43863bea8
39 changed files with 1493 additions and 1335 deletions

View File

@ -146,7 +146,7 @@ arma::vec galileo_e1_ls_pvt::leastSquarePos(arma::mat satpos, arma::vec obs, arm
arma::vec Rot_X; arma::vec Rot_X;
double rho2; double rho2;
double traveltime; double traveltime;
double trop; double trop = 0.0;
double dlambda; double dlambda;
double dphi; double dphi;
double h; double h;

View File

@ -148,7 +148,7 @@ arma::vec gps_l1_ca_ls_pvt::leastSquarePos(arma::mat satpos, arma::vec obs, arma
arma::vec Rot_X; arma::vec Rot_X;
double rho2; double rho2;
double traveltime; double traveltime;
double trop; double trop = 0.0;
double dlambda; double dlambda;
double dphi; double dphi;
double h; double h;

View File

@ -148,7 +148,7 @@ arma::vec hybrid_ls_pvt::leastSquarePos(arma::mat satpos, arma::vec obs, arma::m
arma::vec Rot_X; arma::vec Rot_X;
double rho2; double rho2;
double traveltime; double traveltime;
double trop; double trop = 0.0;
double dlambda; double dlambda;
double dphi; double dphi;
double h; double h;

View File

@ -0,0 +1,20 @@
########################################################################
# Adding proto-kernels to the module
########################################################################
1) Add your proto-kernels inside the kernels/ folder, and the ORC implementations inside the orc/ folder. Add the macros implementations inside the /kernels/CommonMacros folder. (those folders are found in the root of the volk_gnsssdr module)
2) Add one profiling line for each of the proto-kernels inside the /apps/volk_gnsssdr_profile.cc file.
3) Add one test line for each of the proto-kernels inside the /lib/testqa.cc file. ########################################################################
# Modifications to allow profiling of some proto-kernels with special parameters
######################################################################## Some of the proto-kernels that GNSS-SDR needs are not supported by the profiling environment of the volk_gnsssdr module. In order to profile them some modifications need to be done to two files: 1) src/algorithms/libs/volk_gnsssdr/lib/qa_utils.cc At the first part of this file there are defined the parameters supported by the environment. The number after run_cast_test indicates the total number of parameters passed to the proto-kernel (input +output parameters). The other part indicates the type of the data passed. Inside func(....) you will need to add the same number of buffs[ ] that the one specified after run_cast_test.
2) src/algorithms/libs/volk_gnsssdr/lib/qa_utils.h In the header you will need to add typedefs for the new definitions made in the .cc file. Take care: you will need to add the same number of void * that the one specified after run_cast_test.
3) To be able to use volk_gnsssdr and default volk functions at the same time in the same file, it is required to add the template files that volk_gnsssdr module uses at build time to generate some headers.
The files are found inside tmpl/: volk_gnsssdr.tmpl.h
volk_gnsssdr_typedefs.tmpl.h
volk_gnsssdr_machines.tmpl.h
volk_gnsssdr_cpu.tmpl.h
volk_gnsssdr_config_fixed.tmpl.h

View File

@ -27,7 +27,7 @@ Processors providing SIMD instruction sets compute with multiple processing elem
\.TP \.TP
Check http://gnss-sdr.org for more information. Check http://gnss-sdr.org for more information.
.SH HISTORY .SH HISTORY
This library was originally developed by Andres Cecilia Luque in the framework of the Summer of Code in Space program (SOCIS 2014) by the European Space Agency (ESA), and then integrated into \fBgnss-sdr\fR. This library was originally developed by Andres Cecilia Luque in the framework of the Summer of Code in Space program (SOCIS 2014) by the European Space Agency (ESA), and then integrated into \fBgnss-sdr\fR. This software is based on the VOLK library available at https://gnuradio.org/redmine/projects/gnuradio/wiki/Volk
.SH BUGS .SH BUGS
No known bugs. No known bugs.
.SH AUTHOR .SH AUTHOR

View File

@ -1,5 +1,9 @@
/* -*- c++ -*- */ /*!
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) * \file constants.h
* \brief volk_gnsssdr constants
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
* *
* This file is part of GNSS-SDR. * This file is part of GNSS-SDR.
* *
@ -17,8 +21,8 @@
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef INCLUDED_VOLK_CONSTANTS_H #ifndef GNSS_SDR_VOLK_GNSSSDR_CONSTANTS_H
#define INCLUDED_VOLK_CONSTANTS_H #define GNSS_SDR_VOLK_GNSSSDR_CONSTANTS_H
#include <volk_gnsssdr/volk_gnsssdr_common.h> #include <volk_gnsssdr/volk_gnsssdr_common.h>
@ -33,4 +37,4 @@ VOLK_API char* volk_gnsssdr_available_machines();
__VOLK_DECL_END __VOLK_DECL_END
#endif /* INCLUDED_VOLK_CONSTANTS_H */ #endif /* GNSS_SDR_VOLK_GNSSSDR_CONSTANTS_H */

View File

@ -1,4 +1,9 @@
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) /*!
* \file volk_gnsssdr_common.h
* \brief Cross-platform attribute macros
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
* *
* This file is part of GNSS-SDR. * This file is part of GNSS-SDR.
* *
@ -111,4 +116,4 @@ union bit128{
#define bit128_p(x) ((union bit128 *)(x)) #define bit128_p(x) ((union bit128 *)(x))
#endif /*INCLUDED_LIBVOLK_COMMON_H*/ #endif /* INCLUDED_LIBVOLK_COMMON_H */

View File

@ -1,4 +1,9 @@
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) /*!
* \file volk_gnsssdr_complex.h
* \brief Provide typedefs and operators for all complex types in C and C++.
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
* *
* This file is part of GNSS-SDR. * This file is part of GNSS-SDR.
* *

View File

@ -1,5 +1,10 @@
/* -*- c -*- */ /*!
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) * \file volk_gnsssdr_malloc.h
* \brief The volk_gnsssdr_malloc function behaves like malloc in that it
* returns a pointer to the allocated memory.
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
* *
* This file is part of GNSS-SDR. * This file is part of GNSS-SDR.
* *
@ -34,7 +39,7 @@ __VOLK_DECL_BEGIN
* memory that are guaranteed to be on an alignment, VOLK handles this * memory that are guaranteed to be on an alignment, VOLK handles this
* itself. The volk_gnsssdr_malloc function behaves like malloc in that it * itself. The volk_gnsssdr_malloc function behaves like malloc in that it
* returns a pointer to the allocated memory. However, it also takes * returns a pointer to the allocated memory. However, it also takes
* in an alignment specfication, which is usually something like 16 or * in an alignment specification, which is usually something like 16 or
* 32 to ensure that the aligned memory is located on a particular * 32 to ensure that the aligned memory is located on a particular
* byte boundary for use with SIMD. * byte boundary for use with SIMD.
* *
@ -55,7 +60,7 @@ VOLK_API void *volk_gnsssdr_malloc(size_t size, size_t alignment);
/*! /*!
* \brief Free's memory allocated by volk_gnsssdr_malloc. * \brief Free's memory allocated by volk_gnsssdr_malloc.
* \param aptr The aligned pointer allocaed by volk_gnsssdr_malloc. * \param aptr The aligned pointer allocated by volk_gnsssdr_malloc.
*/ */
VOLK_API void volk_gnsssdr_free(void *aptr); VOLK_API void volk_gnsssdr_free(void *aptr);

View File

@ -1,4 +1,10 @@
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) /*!
* \file volk_gnsssdr_prefs.h
* \brief Gets path to volk_gnsssdr_config profiling info and loads
* prefs into global prefs struct
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
* *
* This file is part of GNSS-SDR. * This file is part of GNSS-SDR.
* *
@ -31,16 +37,16 @@ typedef struct volk_gnsssdr_arch_pref
char impl_u[128]; //best unaligned impl char impl_u[128]; //best unaligned impl
} volk_gnsssdr_arch_pref_t; } volk_gnsssdr_arch_pref_t;
//////////////////////////////////////////////////////////////////////// /*!
// get path to volk_gnsssdr_config profiling info * \brief get path to volk_gnsssdr_config profiling info
//////////////////////////////////////////////////////////////////////// */
VOLK_API void volk_gnsssdr_get_config_path(char *); VOLK_API void volk_gnsssdr_get_config_path(char *);
//////////////////////////////////////////////////////////////////////// /*!
// load prefs into global prefs struct * \brief load prefs into global prefs struct
//////////////////////////////////////////////////////////////////////// */
VOLK_API size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **); VOLK_API size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **);
__VOLK_DECL_END __VOLK_DECL_END
#endif //INCLUDED_VOLK_PREFS_H #endif /* INCLUDED_VOLK_PREFS_H */

View File

@ -9,13 +9,13 @@ Inline functions have been tested, and they introduce a really small time penalt
Syntax Syntax
#################################################################### ####################################################################
In order to allow better understanding of the code I created the macros with an specific syntax. In order to allow better understanding of the code we created the macros with a specific syntax:
1) Inside CommonMacros.h you will find macros for common operations. I will explain the syntax with an example: 1) Inside CommonMacros.h you will find macros for common operations. we will explain the syntax with an example:
example: CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output) example: CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
First of all, you find the characters “CM”, which means CommonMacros. After that the type and the amount of inputs is placed: “_16IC_X4” (16 bits complex integers, four inputs). The syntax for type is the same as the one used with volk protokernels, refer to GNURadio documentation for more help. The it comes the name of the macro (“_SCALAR_PRODUCT”), and after that the type and the amount of outputs (“_16IC_X2”). Finally it is placed the SSE minimum version needed to run (“_U_SSE2”). In the arguments you will find (from left to right) the inputs (four inputs: realx, imagx, realy, imagy), some variables that the macro needs to work (realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy) and finally the outputs (two outputs: real_output, imag_output). First of all, you find the characters â"CM", which means CommonMacros. After that the type and the amount of inputs is placed: "_16IC_X4" (16 bits complex integers, four inputs). The syntax for type is the same as the one used with volk protokernels, refer to GNURadio documentation for more help. The it comes the name of the macro ("_SCALAR_PRODUCT"), and after that the type and the amount of outputs ("_16IC_X2"). Finally it is placed the SSE minimum version needed to run ("_U_SSE2"). In the arguments you will find (from left to right) the inputs (four inputs: realx, imagx, realy, imagy), some variables that the macro needs to work (realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy) and finally the outputs (two outputs: real_output, imag_output).
The variables that the macro needs are specified when calling it in order to avoid after-compile problems: if you want to use a macro you will need to declare all the variables it needs before, or you will not be able to compile. The variables that the macro needs are specified when calling it in order to avoid after-compile problems: if you want to use a macro you will need to declare all the variables it needs before, or you will not be able to compile.
2) Inside all the other headers, CommonMacros_XXXXXX.h you will find macros for a specific group of proto-kernels. The syntax is the same as the CommonMacros.h 2) Inside all the other headers, CommonMacros_XXXXXX.h you will find macros for a specific group of proto-kernels. The syntax is the same as the CommonMacros.h
@ -24,11 +24,11 @@ The variables that the macro needs are specified when calling it in order to avo
Workflow Workflow
#################################################################### ####################################################################
In order to use the macros easily, I usually test the code without macros inside a testing proto-kernel, where you are able to test it, debug it and use breakpoints. In order to use the macros easily, we usually test the code without macros inside a testing proto-kernel, where you are able to test it, debug it and use breakpoints.
When it works I place code inside a macro an I test it again. When it works we place code inside a macro an I test it again.
#################################################################### ####################################################################
Why macros Why macros
#################################################################### ####################################################################
1) They are the only way I could find for sharing code between proto-kernels without performance penalty. 1) They are the only way we could find for sharing code between proto-kernels without performance penalty.
2) It is true that they are really difficult to debug, but if you work with them responsibly it is not so hard. Volk_gnsssdr checks all the SSE proto-kernels implementations results against the generic implementation results, so if your macro is not working you will appreciate it after profiling it. 2) It is true that they are really difficult to debug, but if you work with them responsibly it is not so hard. Volk_gnsssdr checks all the SSE proto-kernels implementations results against the generic implementation results, so if your macro is not working you will appreciate it after profiling it.

View File

@ -25,83 +25,87 @@
#ifndef LV_HAVE_SSE2 #ifndef LV_HAVE_SSE2
void qa_16s_add_quad_aligned16::t1() { void qa_16s_add_quad_aligned16::t1()
printf("sse2 not available... no test performed\n"); {
printf("sse2 not available... no test performed\n");
} }
#else #else
void qa_16s_add_quad_aligned16::t1()
{
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3200;
const int ITERS = 100000;
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short input4[vlen];
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
__VOLK_ATTR_ALIGNED(16) short output2[vlen];
__VOLK_ATTR_ALIGNED(16) short output3[vlen];
__VOLK_ATTR_ALIGNED(16) short output01[vlen];
__VOLK_ATTR_ALIGNED(16) short output11[vlen];
__VOLK_ATTR_ALIGNED(16) short output21[vlen];
__VOLK_ATTR_ALIGNED(16) short output31[vlen];
void qa_16s_add_quad_aligned16::t1() { for(int i = 0; i < vlen; ++i)
{
short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
volk_gnsssdr_environment_init(); input0[i] = plus0 - minus0;
clock_t start, end; input1[i] = plus1 - minus1;
double total; input2[i] = plus2 - minus2;
const int vlen = 3200; input3[i] = plus3 - minus3;
const int ITERS = 100000; input4[i] = plus4 - minus4;
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short input4[vlen];
__VOLK_ATTR_ALIGNED(16) short output0[vlen]; }
__VOLK_ATTR_ALIGNED(16) short output1[vlen]; printf("16s_add_quad_aligned\n");
__VOLK_ATTR_ALIGNED(16) short output2[vlen];
__VOLK_ATTR_ALIGNED(16) short output3[vlen];
__VOLK_ATTR_ALIGNED(16) short output01[vlen];
__VOLK_ATTR_ALIGNED(16) short output11[vlen];
__VOLK_ATTR_ALIGNED(16) short output21[vlen];
__VOLK_ATTR_ALIGNED(16) short output31[vlen];
for(int i = 0; i < vlen; ++i) { start = clock();
short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; for(int count = 0; count < ITERS; ++count)
short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2; {
short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2; volk_gnsssdr_16s_add_quad_aligned16_manual(output0, output1, output2, output3, input0, input1, input2, input3, input4, vlen << 1 , "generic");
short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2; }
short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2; end = clock();
short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2; total = (double)(end-start)/(double)CLOCKS_PER_SEC;
short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2; printf("generic_time: %f\n", total);
short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2; start = clock();
short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2; for(int count = 0; count < ITERS; ++count)
short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2; {
volk_gnsssdr_16s_add_quad_aligned16_manual(output01, output11, output21, output31, input0, input1, input2, input3, input4, vlen << 1 , "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total);
for(int i = 0; i < 1; ++i)
{
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
input0[i] = plus0 - minus0; for(int i = 0; i < vlen; ++i)
input1[i] = plus1 - minus1; {
input2[i] = plus2 - minus2; //printf("%d...%d\n", output0[i], output01[i]);
input3[i] = plus3 - minus3; CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
input4[i] = plus4 - minus4; CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]);
CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]);
} CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]);
printf("16s_add_quad_aligned\n"); }
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_16s_add_quad_aligned16_manual(output0, output1, output2, output3, input0, input1, input2, input3, input4, vlen << 1 , "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_16s_add_quad_aligned16_manual(output01, output11, output21, output31, input0, input1, input2, input3, input4, vlen << 1 , "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total);
for(int i = 0; i < 1; ++i) {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]);
CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]);
CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]);
}
} }
#endif #endif

View File

@ -22,14 +22,14 @@
#include <cppunit/extensions/HelperMacros.h> #include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h> #include <cppunit/TestCase.h>
class qa_16s_add_quad_aligned16 : public CppUnit::TestCase { class qa_16s_add_quad_aligned16 : public CppUnit::TestCase
{
CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16); private:
CPPUNIT_TEST (t1); void t1 ();
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
}; };

View File

@ -25,99 +25,98 @@
#ifndef LV_HAVE_SSSE3 #ifndef LV_HAVE_SSSE3
void qa_16s_branch_4_state_8_aligned16::t1() { void qa_16s_branch_4_state_8_aligned16::t1()
printf("ssse3 not available... no test performed\n"); {
printf("ssse3 not available... no test performed\n");
} }
#else #else
void qa_16s_branch_4_state_8_aligned16::t1() { void qa_16s_branch_4_state_8_aligned16::t1()
const int num_iters = 1000000; {
const int vlen = 32; const int num_iters = 1000000;
const int vlen = 32;
static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03}; static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03};
static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01}; static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01};
static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f}; static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d}; static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
static char* permuters[4] = {permute0, permute1, permute2, permute3}; static char* permuters[4] = {permute0, permute1, permute2, permute3};
unsigned int num_bytes = vlen << 1; unsigned int num_bytes = vlen << 1;
volk_gnsssdr_environment_init(); volk_gnsssdr_environment_init();
clock_t start, end; clock_t start, end;
double total; double total;
__VOLK_ATTR_ALIGNED(16) short target[vlen]; __VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen]; __VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short target3[vlen]; __VOLK_ATTR_ALIGNED(16) short target3[vlen];
__VOLK_ATTR_ALIGNED(16) short src0[vlen]; __VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = { __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = {
7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 }; 7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = { __VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = { __VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = { __VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = {
0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 }; 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = { __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = {
0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff }; 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
for(int i = 0; i < vlen; ++i)
{
src0[i] = i;
}
printf("16s_branch_4_state_8_aligned\n");
for(int i = 0; i < vlen; ++i) { start = clock();
src0[i] = i; for(int i = 0; i < num_iters; ++i)
{
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
} total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time: %f\n", total);
printf("16s_branch_4_state_8_aligned\n"); start = clock();
for(int i = 0; i < num_iters; ++i)
{
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
start = clock(); printf("branch_4_state_8_time, ssse3: %f\n", total);
for(int i = 0; i < num_iters; ++i) {
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC; start = clock();
for(int i = 0; i < num_iters; ++i)
{
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
}
end = clock();
printf("permute_and_scalar_add_time: %f\n", total); total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time, generic: %f\n", total);
for(int i = 0; i < vlen; ++i)
{
printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
}
start = clock(); for(int i = 0; i < vlen; ++i)
for(int i = 0; i < num_iters; ++i) { {
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC; CPPUNIT_ASSERT(target[i] == target2[i]);
CPPUNIT_ASSERT(target[i] == target3[i]);
printf("branch_4_state_8_time, ssse3: %f\n", total); }
start = clock();
for(int i = 0; i < num_iters; ++i) {
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time, generic: %f\n", total);
for(int i = 0; i < vlen; ++i) {
printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
}
for(int i = 0; i < vlen; ++i) {
CPPUNIT_ASSERT(target[i] == target2[i]);
CPPUNIT_ASSERT(target[i] == target3[i]);
}
} }

View File

@ -27,71 +27,75 @@
#ifndef LV_HAVE_SSE2 #ifndef LV_HAVE_SSE2
void qa_16s_permute_and_scalar_add_aligned16::t1() { void qa_16s_permute_and_scalar_add_aligned16::t1()
printf("sse2 not available... no test performed\n"); {
printf("sse2 not available... no test performed\n");
} }
#else #else
void qa_16s_permute_and_scalar_add_aligned16::t1() { void qa_16s_permute_and_scalar_add_aligned16::t1()
const int vlen = 64; {
const int vlen = 64;
unsigned int num_bytes = vlen << 1; unsigned int num_bytes = vlen << 1;
volk_gnsssdr_environment_init(); volk_gnsssdr_environment_init();
clock_t start, end; clock_t start, end;
double total; double total;
__VOLK_ATTR_ALIGNED(16) short target[vlen]; __VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen]; __VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short src0[vlen]; __VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen]; __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen]; __VOLK_ATTR_ALIGNED(16) short cntl0[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen]; __VOLK_ATTR_ALIGNED(16) short cntl1[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen]; __VOLK_ATTR_ALIGNED(16) short cntl2[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen]; __VOLK_ATTR_ALIGNED(16) short cntl3[vlen];
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4}; __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
for(int i = 0; i < vlen; ++i) { for(int i = 0; i < vlen; ++i)
src0[i] = i; {
permute_indexes[i] = (3 * i)%vlen; src0[i] = i;
cntl0[i] = 0xff; permute_indexes[i] = (3 * i)%vlen;
cntl1[i] = 0xff * (i%2); cntl0[i] = 0xff;
cntl2[i] = 0xff * ((i>>1)%2); cntl1[i] = 0xff * (i%2);
cntl3[i] = 0xff * ((i%4) == 3); cntl2[i] = 0xff * ((i>>1)%2);
} cntl3[i] = 0xff * ((i%4) == 3);
}
printf("16s_permute_and_scalar_add_aligned\n"); printf("16s_permute_and_scalar_add_aligned\n");
start = clock(); start = clock();
for(int i = 0; i < 100000; ++i) { for(int i = 0; i < 100000; ++i)
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic"); {
} volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic");
end = clock(); }
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC; total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total); printf("generic_time: %f\n", total);
start = clock(); start = clock();
for(int i = 0; i < 100000; ++i) { for(int i = 0; i < 100000; ++i)
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2"); {
} volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
end = clock(); }
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC; total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total); printf("sse2_time: %f\n", total);
//for(int i = 0; i < vlen; ++i) {
for(int i = 0; i < vlen; ++i) {
//printf("generic... %d, sse2... %d\n", target[i], target2[i]); //printf("generic... %d, sse2... %d\n", target[i], target2[i]);
} //}
for(int i = 0; i < vlen; ++i) { for(int i = 0; i < vlen; ++i)
{
CPPUNIT_ASSERT(target[i] == target2[i]); CPPUNIT_ASSERT(target[i] == target2[i]);
} }
} }
#endif #endif

View File

@ -26,53 +26,57 @@
#ifndef LV_HAVE_SSE2 #ifndef LV_HAVE_SSE2
void qa_16s_quad_max_star_aligned16::t1() { void qa_16s_quad_max_star_aligned16::t1()
printf("sse2 not available... no test performed\n"); {
printf("sse2 not available... no test performed\n");
} }
#else #else
void qa_16s_quad_max_star_aligned16::t1() { void qa_16s_quad_max_star_aligned16::t1()
const int vlen = 34; {
const int vlen = 34;
__VOLK_ATTR_ALIGNED(16) short input0[vlen]; __VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen]; __VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen]; __VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen]; __VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short output0[vlen]; __VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen]; __VOLK_ATTR_ALIGNED(16) short output1[vlen];
for(int i = 0; i < vlen; ++i) { for(int i = 0; i < vlen; ++i)
short plus0 = (short) (rand() - (RAND_MAX/2)); {
short plus1 = (short) (rand() - (RAND_MAX/2)); short plus0 = (short) (rand() - (RAND_MAX/2));
short plus2 = (short) (rand() - (RAND_MAX/2)); short plus1 = (short) (rand() - (RAND_MAX/2));
short plus3 = (short) (rand() - (RAND_MAX/2)); short plus2 = (short) (rand() - (RAND_MAX/2));
short plus3 = (short) (rand() - (RAND_MAX/2));
short minus0 = (short) (rand() - (RAND_MAX/2)); short minus0 = (short) (rand() - (RAND_MAX/2));
short minus1 = (short) (rand() - (RAND_MAX/2)); short minus1 = (short) (rand() - (RAND_MAX/2));
short minus2 = (short) (rand() - (RAND_MAX/2)); short minus2 = (short) (rand() - (RAND_MAX/2));
short minus3 = (short) (rand() - (RAND_MAX/2)); short minus3 = (short) (rand() - (RAND_MAX/2));
input0[i] = plus0 - minus0; input0[i] = plus0 - minus0;
input1[i] = plus1 - minus1; input1[i] = plus1 - minus1;
input2[i] = plus2 - minus2; input2[i] = plus2 - minus2;
input3[i] = plus3 - minus3; input3[i] = plus3 - minus3;
} }
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, input3, 2*vlen, "generic"); volk_gnsssdr_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, input3, 2*vlen, "generic");
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, input3, 2*vlen, "sse2"); volk_gnsssdr_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, input3, 2*vlen, "sse2");
printf("16s_quad_max_star_aligned\n"); printf("16s_quad_max_star_aligned\n");
for(int i = 0; i < vlen; ++i) { for(int i = 0; i < vlen; ++i)
printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]); {
} printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]);
}
for(int i = 0; i < vlen; ++i) { for(int i = 0; i < vlen; ++i)
{
CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]); CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
} }
} }
#endif #endif

View File

@ -27,54 +27,60 @@
#ifndef LV_HAVE_SSE #ifndef LV_HAVE_SSE
void qa_32f_fm_detect_aligned16::t1() { void qa_32f_fm_detect_aligned16::t1()
printf("sse not available... no test performed\n"); {
printf("sse not available... no test performed\n");
} }
#else #else
void qa_32f_fm_detect_aligned16::t1() { void qa_32f_fm_detect_aligned16::t1()
{
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) float input0[vlen];
volk_gnsssdr_environment_init(); __VOLK_ATTR_ALIGNED(16) float output0[vlen];
clock_t start, end; __VOLK_ATTR_ALIGNED(16) float output01[vlen];
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) float input0[vlen];
__VOLK_ATTR_ALIGNED(16) float output0[vlen]; for(int i = 0; i < vlen; ++i)
__VOLK_ATTR_ALIGNED(16) float output01[vlen]; {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
}
printf("32f_fm_detect_aligned\n");
for(int i = 0; i < vlen; ++i) { start = clock();
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)); float save = 0.1;
} for(int count = 0; count < ITERS; ++count)
printf("32f_fm_detect_aligned\n"); {
volk_gnsssdr_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, "generic");
start = clock(); }
float save = 0.1; end = clock();
for(int count = 0; count < ITERS; ++count) { total = (double)(end-start)/(double)CLOCKS_PER_SEC;
volk_gnsssdr_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, "generic"); printf("generic_time: %f\n", total);
} start = clock();
end = clock(); save = 0.1;
total = (double)(end-start)/(double)CLOCKS_PER_SEC; for(int count = 0; count < ITERS; ++count)
printf("generic_time: %f\n", total); {
start = clock(); volk_gnsssdr_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, "sse");
save = 0.1; }
for(int count = 0; count < ITERS; ++count) { end = clock();
volk_gnsssdr_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, "sse"); total = (double)(end-start)/(double)CLOCKS_PER_SEC;
} printf("sse_time: %f\n", total);
end = clock(); //for(int i = 0; i < 1; ++i)
total = (double)(end-start)/(double)CLOCKS_PER_SEC; // {
printf("sse_time: %f\n", total);
for(int i = 0; i < 1; ++i) {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
} //}
for(int i = 0; i < vlen; ++i) { for(int i = 0; i < vlen; ++i)
//printf("%d...%d\n", output0[i], output01[i]); {
CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4); //printf("%d...%d\n", output0[i], output01[i]);
} CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4);
}
} }
#endif #endif

View File

@ -26,96 +26,97 @@
#define ERR_DELTA (1e-4) #define ERR_DELTA (1e-4)
#define NUM_ITERS 1000000 #define NUM_ITERS 1000000
#define VEC_LEN 3097 #define VEC_LEN 3097
static float uniform() {
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) static float uniform()
{
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
} }
static void static void
random_floats (float *buf, unsigned n) random_floats (float *buf, unsigned n)
{ {
unsigned int i = 0; unsigned int i = 0;
for (; i < n; i++) { for (; i < n; i++)
{
buf[i] = uniform () * 32767; buf[i] = uniform () * 32767;
}
}
} }
#ifndef LV_HAVE_SSE #ifndef LV_HAVE_SSE
void qa_32f_index_max_aligned16::t1(){ void qa_32f_index_max_aligned16::t1()
printf("sse not available... no test performed\n"); {
printf("sse not available... no test performed\n");
} }
#else #else
void qa_32f_index_max_aligned16::t1(){ void qa_32f_index_max_aligned16::t1()
{
const int vlen = VEC_LEN;
const int vlen = VEC_LEN; volk_gnsssdr_runtime_init();
volk_gnsssdr_environment_init();
int ret;
volk_gnsssdr_runtime_init(); unsigned int* target_sse4_1;
unsigned int* target_sse;
unsigned int* target_generic;
float* src0 ;
volk_gnsssdr_environment_init(); unsigned int i_target_sse4_1;
int ret; target_sse4_1 = &i_target_sse4_1;
unsigned int i_target_sse;
target_sse = &i_target_sse;
unsigned int i_target_generic;
target_generic = &i_target_generic;
unsigned int* target_sse4_1; ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
unsigned int* target_sse;
unsigned int* target_generic;
float* src0 ;
random_floats((float*)src0, vlen);
unsigned int i_target_sse4_1; printf("32f_index_max_aligned16\n");
target_sse4_1 = &i_target_sse4_1;
unsigned int i_target_sse;
target_sse = &i_target_sse;
unsigned int i_target_generic;
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float)); clock_t start, end;
double total;
random_floats((float*)src0, vlen); start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
printf("32f_index_max_aligned16\n"); start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
}
clock_t start, end; end = clock();
double total; total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
get_volk_gnsssdr_runtime()->volk_gnsssdr_32f_index_max_aligned16(target_sse4_1, src0, vlen);
}
start = clock(); end = clock();
for(int k = 0; k < NUM_ITERS; ++k) { total = (double)(end-start)/(double)CLOCKS_PER_SEC;
volk_gnsssdr_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic"); printf("sse4.1 time: %f\n", total);
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
start = clock(); printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
for(int k = 0; k < NUM_ITERS; ++k) { CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
volk_gnsssdr_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2"); CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
}
end = clock(); free(src0);
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
get_volk_gnsssdr_runtime()->volk_gnsssdr_32f_index_max_aligned16(target_sse4_1, src0, vlen);
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse4.1 time: %f\n", total);
printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
free(src0);
} }
#endif /*LV_HAVE_SSE3*/ #endif /*LV_HAVE_SSE3*/

View File

@ -25,83 +25,80 @@
#define ERR_DELTA (1e-4) #define ERR_DELTA (1e-4)
#define NUM_ITERS 1000000 #define NUM_ITERS 1000000
#define VEC_LEN 3096 #define VEC_LEN 3096
static float uniform() {
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1) static float uniform()
{
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
} }
static void static void
random_floats (float *buf, unsigned n) random_floats (float *buf, unsigned n)
{ {
unsigned int i = 0; unsigned int i = 0;
for (; i < n; i++) { for (; i < n; i++)
{
buf[i] = uniform () * 32767;
buf[i] = uniform () * 32767; }
}
} }
#ifndef LV_HAVE_SSE3 #ifndef LV_HAVE_SSE3
void qa_32fc_index_max_aligned16::t1(){ void qa_32fc_index_max_aligned16::t1()
printf("sse3 not available... no test performed\n"); {
printf("sse3 not available... no test performed\n");
} }
#else #else
void qa_32fc_index_max_aligned16::t1()
{
const int vlen = VEC_LEN;
void qa_32fc_index_max_aligned16::t1(){ volk_gnsssdr_environment_init();
int ret;
const int vlen = VEC_LEN; unsigned int* target;
unsigned int* target_generic;
std::complex<float>* src0 ;
volk_gnsssdr_environment_init(); unsigned int i_target;
int ret; target = &i_target;
unsigned int i_target_generic;
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen << 3);
unsigned int* target; random_floats((float*)src0, vlen * 2);
unsigned int* target_generic;
std::complex<float>* src0 ;
printf("32fc_index_max_aligned16\n");
unsigned int i_target; clock_t start, end;
target = &i_target; double total;
unsigned int i_target_generic;
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen << 3);
random_floats((float*)src0, vlen * 2); start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
printf("32fc_index_max_aligned16\n"); start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
}
clock_t start, end; end = clock();
double total; total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3 time: %f\n", total);
printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
start = clock(); free(src0);
for(int k = 0; k < NUM_ITERS; ++k) {
volk_gnsssdr_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_gnsssdr_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3 time: %f\n", total);
printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
free(src0);
} }
#endif /*LV_HAVE_SSE3*/ #endif /*LV_HAVE_SSE3*/

View File

@ -26,57 +26,62 @@
#ifndef LV_HAVE_SSE3 #ifndef LV_HAVE_SSE3
void qa_32fc_power_spectral_density_32f_aligned16::t1() { void qa_32fc_power_spectral_density_32f_aligned16::t1()
printf("sse3 not available... no test performed\n"); {
printf("sse3 not available... no test performed\n");
} }
#else #else
void qa_32fc_power_spectral_density_32f_aligned16::t1() { void qa_32fc_power_spectral_density_32f_aligned16::t1()
{
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
volk_gnsssdr_environment_init(); __VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
clock_t start, end; __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
__VOLK_ATTR_ALIGNED(16) float output_generic[vlen]; const float scalar = vlen;
__VOLK_ATTR_ALIGNED(16) float output_sse3[vlen]; const float rbw = 1.7;
const float scalar = vlen; float* inputLoad = (float*)input0;
const float rbw = 1.7; for(int i = 0; i < 2*vlen; ++i)
{
inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
}
printf("32fc_power_spectral_density_32f_aligned\n");
float* inputLoad = (float*)input0; start = clock();
for(int i = 0; i < 2*vlen; ++i) { for(int count = 0; count < ITERS; ++count)
inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2))); {
} volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_generic, input0, scalar, rbw, vlen, "generic");
printf("32fc_power_spectral_density_32f_aligned\n"); }
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, scalar, rbw, vlen, "sse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3_time: %f\n", total);
start = clock(); //for(int i = 0; i < 1; ++i) {
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_generic, input0, scalar, rbw, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, scalar, rbw, vlen, "sse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3_time: %f\n", total);
for(int i = 0; i < 1; ++i) {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]); //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]); //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
} //}
for(int i = 0; i < vlen; ++i) { for(int i = 0; i < vlen; ++i)
//printf("%d...%d\n", output0[i], output01[i]); {
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4)); //printf("%d...%d\n", output0[i], output01[i]);
} CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
}
} }
#endif #endif

View File

@ -16,8 +16,8 @@
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef VOLK_QA_UTILS_H #ifndef GNSS_SDR_VOLK_QA_UTILS_H
#define VOLK_QA_UTILS_H #define GNSS_SDR_VOLK_QA_UTILS_H
#include <cstdlib> #include <cstdlib>
#include <string> #include <string>
@ -42,22 +42,24 @@ volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string);
float uniform(void); float uniform(void);
void random_floats(float *buf, unsigned n); void random_floats(float *buf, unsigned n);
class volk_gnsssdr_test_time_t { class volk_gnsssdr_test_time_t
public: {
std::string name; public:
double time; std::string name;
std::string units; double time;
std::string units;
}; };
class volk_gnsssdr_test_results_t { class volk_gnsssdr_test_results_t
public: {
std::string name; public:
std::string config_name; std::string name;
int vlen; std::string config_name;
int iter; int vlen;
std::map<std::string, volk_gnsssdr_test_time_t> results; int iter;
std::string best_arch_a; std::map<std::string, volk_gnsssdr_test_time_t> results;
std::string best_arch_u; std::string best_arch_a;
std::string best_arch_u;
}; };
bool run_volk_gnsssdr_tests( bool run_volk_gnsssdr_tests(
@ -117,4 +119,4 @@ typedef void (*volk_gnsssdr_fn_12arg_s8ic)(void *, void *, void *, void *, void
//ADDED BY GNSS-SDR. END //ADDED BY GNSS-SDR. END
#endif //VOLK_QA_UTILS_H #endif // GNSS_SDR_VOLK_QA_UTILS_H

View File

@ -1,5 +1,5 @@
/* -*- c++ -*- */ /*
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
* *
* This file is part of GNSS-SDR. * This file is part of GNSS-SDR.
* *

View File

@ -1,5 +1,5 @@
/* -*- c -*- */ /*
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
* *
* This file is part of GNSS-SDR. * This file is part of GNSS-SDR.
* *
@ -26,7 +26,7 @@
/* /*
* For #defines used to determine support for allocation functions, * For #defines used to determine support for allocation functions,
* see: http://linux.die.net/man/3/aligned_alloc * see: http://linux.die.net/man/3/aligned_alloc
*/ */
// Disabling use of aligned_alloc. This function requires that size be // Disabling use of aligned_alloc. This function requires that size be
// a multiple of alignment, which is too restrictive for many uses of // a multiple of alignment, which is too restrictive for many uses of
@ -57,20 +57,22 @@
void *volk_gnsssdr_malloc(size_t size, size_t alignment) void *volk_gnsssdr_malloc(size_t size, size_t alignment)
{ {
void *ptr; void *ptr;
int err = posix_memalign(&ptr, alignment, size); int err = posix_memalign(&ptr, alignment, size);
if(err == 0) { if(err == 0)
return ptr; {
} return ptr;
else { }
fprintf(stderr, "VOLK: Error allocating memory (posix_memalign: %d)\n", err); else
return NULL; {
} fprintf(stderr, "VOLK: Error allocating memory (posix_memalign: %d)\n", err);
return NULL;
}
} }
void volk_gnsssdr_free(void *ptr) void volk_gnsssdr_free(void *ptr)
{ {
free(ptr); free(ptr);
} }
// _aligned_malloc has no restriction on size, // _aligned_malloc has no restriction on size,
@ -79,16 +81,17 @@ void volk_gnsssdr_free(void *ptr)
void *volk_gnsssdr_malloc(size_t size, size_t alignment) void *volk_gnsssdr_malloc(size_t size, size_t alignment)
{ {
void *ptr = _aligned_malloc(size, alignment); void *ptr = _aligned_malloc(size, alignment);
if(ptr == NULL) { if(ptr == NULL)
fprintf(stderr, "VOLK: Error allocating memory (_aligned_malloc)\n"); {
} fprintf(stderr, "VOLK: Error allocating memory (_aligned_malloc)\n");
return ptr; }
return ptr;
} }
void volk_gnsssdr_free(void *ptr) void volk_gnsssdr_free(void *ptr)
{ {
_aligned_free(ptr); _aligned_free(ptr);
} }
// No standard handlers; we'll do it ourselves. // No standard handlers; we'll do it ourselves.
@ -96,43 +99,43 @@ void volk_gnsssdr_free(void *ptr)
struct block_info struct block_info
{ {
void *real; void *real;
}; };
void * void *
volk_gnsssdr_malloc(size_t size, size_t alignment) volk_gnsssdr_malloc(size_t size, size_t alignment)
{ {
void *real, *user; void *real, *user;
struct block_info *info; struct block_info *info;
/* At least align to sizeof our struct */ /* At least align to sizeof our struct */
if (alignment < sizeof(struct block_info)) if (alignment < sizeof(struct block_info))
alignment = sizeof(struct block_info); alignment = sizeof(struct block_info);
/* Alloc */ /* Alloc */
real = malloc(size + (2 * alignment - 1)); real = malloc(size + (2 * alignment - 1));
/* Get pointer to the various zones */ /* Get pointer to the various zones */
user = (void *)((((uintptr_t) real) + sizeof(struct block_info) + alignment - 1) & ~(alignment - 1)); user = (void *)((((uintptr_t) real) + sizeof(struct block_info) + alignment - 1) & ~(alignment - 1));
info = (struct block_info *)(((uintptr_t)user) - sizeof(struct block_info)); info = (struct block_info *)(((uintptr_t)user) - sizeof(struct block_info));
/* Store the info for the free */ /* Store the info for the free */
info->real = real; info->real = real;
/* Return pointer to user */ /* Return pointer to user */
return user; return user;
} }
void void
volk_gnsssdr_free(void *ptr) volk_gnsssdr_free(void *ptr)
{ {
struct block_info *info; struct block_info *info;
/* Get the real pointer */ /* Get the real pointer */
info = (struct block_info *)(((uintptr_t)ptr) - sizeof(struct block_info)); info = (struct block_info *)(((uintptr_t)ptr) - sizeof(struct block_info));
/* Release real pointer */ /* Release real pointer */
free(info->real); free(info->real);
} }
#endif // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN #endif // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN

View File

@ -31,10 +31,11 @@ void volk_gnsssdr_get_config_path(char *path)
char *home = NULL; char *home = NULL;
if (home == NULL) home = getenv("HOME"); if (home == NULL) home = getenv("HOME");
if (home == NULL) home = getenv("APPDATA"); if (home == NULL) home = getenv("APPDATA");
if (home == NULL){ if (home == NULL)
path = NULL; {
return; path = NULL;
} return;
}
strcpy(path, home); strcpy(path, home);
strcat(path, suffix); strcat(path, suffix);
} }
@ -54,14 +55,14 @@ size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **prefs_res)
//reset the file pointer and write the prefs into volk_gnsssdr_arch_prefs //reset the file pointer and write the prefs into volk_gnsssdr_arch_prefs
while(fgets(line, sizeof(line), config_file) != NULL) while(fgets(line, sizeof(line), config_file) != NULL)
{
prefs = (volk_gnsssdr_arch_pref_t *) realloc(prefs, (n_arch_prefs+1) * sizeof(*prefs));
volk_gnsssdr_arch_pref_t *p = prefs + n_arch_prefs;
if(sscanf(line, "%s %s %s", p->name, p->impl_a, p->impl_u) == 3 && !strncmp(p->name, "volk_gnsssdr_", 5))
{ {
n_arch_prefs++; prefs = (volk_gnsssdr_arch_pref_t *) realloc(prefs, (n_arch_prefs+1) * sizeof(*prefs));
volk_gnsssdr_arch_pref_t *p = prefs + n_arch_prefs;
if(sscanf(line, "%s %s %s", p->name, p->impl_a, p->impl_u) == 3 && !strncmp(p->name, "volk_gnsssdr_", 5))
{
n_arch_prefs++;
}
} }
}
fclose(config_file); fclose(config_file);
*prefs_res = prefs; *prefs_res = prefs;
return n_arch_prefs; return n_arch_prefs;

View File

@ -24,70 +24,77 @@
#include <string.h> #include <string.h>
#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4 #if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4
#define __popcnt __builtin_popcount #define __popcnt __builtin_popcount
#else #else
inline unsigned __popcnt(unsigned num) inline unsigned __popcnt(unsigned num)
{ {
unsigned pop = 0; unsigned pop = 0;
while(num) while(num)
{ {
if (num & 0x1) pop++; if (num & 0x1) pop++;
num >>= 1; num >>= 1;
} }
return pop; return pop;
} }
#endif #endif
int volk_gnsssdr_get_index( int volk_gnsssdr_get_index(
const char *impl_names[], //list of implementations by name const char *impl_names[], //list of implementations by name
const size_t n_impls, //number of implementations available const size_t n_impls, //number of implementations available
const char *impl_name //the implementation name to find const char *impl_name //the implementation name to find
){ )
{
unsigned int i; unsigned int i;
for (i = 0; i < n_impls; i++) { for (i = 0; i < n_impls; i++)
if(!strncmp(impl_names[i], impl_name, 20)) { {
return i; if(!strncmp(impl_names[i], impl_name, 20))
{
return i;
}
} }
}
//TODO return -1; //TODO return -1;
//something terrible should happen here //something terrible should happen here
printf("Volk warning: no arch found, returning generic impl\n"); printf("Volk warning: no arch found, returning generic impl\n");
return volk_gnsssdr_get_index(impl_names, n_impls, "generic"); //but we'll fake it for now return volk_gnsssdr_get_index(impl_names, n_impls, "generic"); //but we'll fake it for now
} }
int volk_gnsssdr_rank_archs(
const char *kern_name, //name of the kernel to rank
const char *impl_names[], //list of implementations by name
const int* impl_deps, //requirement mask per implementation
const bool* alignment, //alignment status of each implementation
size_t n_impls, //number of implementations available
const bool align //if false, filter aligned implementations
){
size_t i;
static volk_gnsssdr_arch_pref_t *volk_gnsssdr_arch_prefs;
static size_t n_arch_prefs = 0;
static int prefs_loaded = 0;
if(!prefs_loaded) {
n_arch_prefs = volk_gnsssdr_load_preferences(&volk_gnsssdr_arch_prefs);
prefs_loaded = 1;
}
// If we've defined VOLK_GENERIC to be anything, always return the int volk_gnsssdr_rank_archs(
// 'generic' kernel. Used in GR's QA code. const char *kern_name, //name of the kernel to rank
char *gen_env = getenv("VOLK_GENERIC"); const char *impl_names[], //list of implementations by name
if(gen_env) { const int* impl_deps, //requirement mask per implementation
return volk_gnsssdr_get_index(impl_names, n_impls, "generic"); const bool* alignment, //alignment status of each implementation
} size_t n_impls, //number of implementations available
const bool align //if false, filter aligned implementations
)
{
size_t i;
static volk_gnsssdr_arch_pref_t *volk_gnsssdr_arch_prefs;
static size_t n_arch_prefs = 0;
static int prefs_loaded = 0;
if(!prefs_loaded)
{
n_arch_prefs = volk_gnsssdr_load_preferences(&volk_gnsssdr_arch_prefs);
prefs_loaded = 1;
}
// If we've defined VOLK_GENERIC to be anything, always return the
// 'generic' kernel. Used in GR's QA code.
char *gen_env = getenv("VOLK_GENERIC");
if(gen_env)
{
return volk_gnsssdr_get_index(impl_names, n_impls, "generic");
}
//now look for the function name in the prefs list //now look for the function name in the prefs list
for(i = 0; i < n_arch_prefs; i++) for(i = 0; i < n_arch_prefs; i++)
{
if(!strncmp(kern_name, volk_gnsssdr_arch_prefs[i].name, sizeof(volk_gnsssdr_arch_prefs[i].name))) //found it
{ {
const char *impl_name = align? volk_gnsssdr_arch_prefs[i].impl_a : volk_gnsssdr_arch_prefs[i].impl_u; if(!strncmp(kern_name, volk_gnsssdr_arch_prefs[i].name, sizeof(volk_gnsssdr_arch_prefs[i].name))) //found it
return volk_gnsssdr_get_index(impl_names, n_impls, impl_name); {
const char *impl_name = align? volk_gnsssdr_arch_prefs[i].impl_a : volk_gnsssdr_arch_prefs[i].impl_u;
return volk_gnsssdr_get_index(impl_names, n_impls, impl_name);
}
} }
}
//return the best index with the largest deps //return the best index with the largest deps
size_t best_index_a = 0; size_t best_index_a = 0;
@ -95,19 +102,19 @@ int volk_gnsssdr_rank_archs(
int best_value_a = -1; int best_value_a = -1;
int best_value_u = -1; int best_value_u = -1;
for(i = 0; i < n_impls; i++) for(i = 0; i < n_impls; i++)
{
const signed val = __popcnt(impl_deps[i]);
if (alignment[i] && val > best_value_a)
{ {
best_index_a = i; const signed val = __popcnt(impl_deps[i]);
best_value_a = val; if (alignment[i] && val > best_value_a)
{
best_index_a = i;
best_value_a = val;
}
if (!alignment[i] && val > best_value_u)
{
best_index_u = i;
best_value_u = val;
}
} }
if (!alignment[i] && val > best_value_u)
{
best_index_u = i;
best_value_u = val;
}
}
//when align and we found a best aligned, use it //when align and we found a best aligned, use it
if (align && best_value_a != -1) return best_index_a; if (align && best_value_a != -1) return best_index_a;

View File

@ -16,8 +16,8 @@
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef INCLUDED_VOLK_RANK_ARCHS_H #ifndef GNSS_SDR_VOLK_GNSSSDR_RANK_ARCHS_H
#define INCLUDED_VOLK_RANK_ARCHS_H #define GNSS_SDR_VOLK_GNSSSDR_RANK_ARCHS_H
#include <stdlib.h> #include <stdlib.h>
#include <stdbool.h> #include <stdbool.h>
@ -44,4 +44,4 @@ int volk_gnsssdr_rank_archs(
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif /*INCLUDED_VOLK_RANK_ARCHS_H*/ #endif /* GNSS_SDR_VOLK_GNSSSDR_RANK_ARCHS_H */

View File

@ -1,36 +1,34 @@
#/*! #
# * \file volk_gnsssdr_8i_accumulator_s8i.orc # ORC implementation: 8 bits (char) scalar accumulator
# * \brief ORC implementation: 8 bits (char) scalar accumulator #
# * \authors <ul> # Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com #
# * </ul> # ORC code that implements an accumulator of char values
# * #
# * ORC code that implements an accumulator of char values # -------------------------------------------------------------------------
# * #
# * ------------------------------------------------------------------------- # Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# * #
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) # GNSS-SDR is a software defined Global Navigation
# * # Satellite Systems receiver
# * GNSS-SDR is a software defined Global Navigation #
# * Satellite Systems receiver # This file is part of GNSS-SDR.
# * #
# * This file is part of GNSS-SDR. # GNSS-SDR is free software: you can redistribute it and/or modify
# * # it under the terms of the GNU General Public License as published by
# * GNSS-SDR is free software: you can redistribute it and/or modify # the Free Software Foundation, either version 3 of the License, or
# * it under the terms of the GNU General Public License as published by # (at your option) any later version.
# * the Free Software Foundation, either version 3 of the License, or #
# * at your option) any later version. # GNSS-SDR is distributed in the hope that it will be useful,
# * # but WITHOUT ANY WARRANTY; without even the implied warranty of
# * GNSS-SDR is distributed in the hope that it will be useful, # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * but WITHOUT ANY WARRANTY; without even the implied warranty of # GNU General Public License for more details.
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# * GNU General Public License for more details. # You should have received a copy of the GNU General Public License
# * # along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# * You should have received a copy of the GNU General Public License #
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. # -------------------------------------------------------------------------
# * #
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8i_accumulator_s8i_a_orc_impl .function volk_gnsssdr_8i_accumulator_s8i_a_orc_impl
.source 1 src1 .source 1 src1

View File

@ -1,36 +1,33 @@
#/*! #
# * \file volk_gnsssdr_8i_x2_add_8i.orc # ORC implementation: adds pairs of 8 bits (char) scalars
# * \brief ORC implementation: adds pairs of 8 bits (char) scalars #
# * \authors <ul> # Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com #
# * </ul> #
# * # -------------------------------------------------------------------------
# * ORC code that adds pairs of 8 bits (char) scalars #
# * # Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# * ------------------------------------------------------------------------- #
# * # GNSS-SDR is a software defined Global Navigation
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) # Satellite Systems receiver
# * #
# * GNSS-SDR is a software defined Global Navigation # This file is part of GNSS-SDR.
# * Satellite Systems receiver #
# * # GNSS-SDR is free software: you can redistribute it and/or modify
# * This file is part of GNSS-SDR. # it under the terms of the GNU General Public License as published by
# * # the Free Software Foundation, either version 3 of the License, or
# * GNSS-SDR is free software: you can redistribute it and/or modify # (at your option) any later version.
# * it under the terms of the GNU General Public License as published by #
# * the Free Software Foundation, either version 3 of the License, or # GNSS-SDR is distributed in the hope that it will be useful,
# * at your option) any later version. # but WITHOUT ANY WARRANTY; without even the implied warranty of
# * # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNSS-SDR is distributed in the hope that it will be useful, # GNU General Public License for more details.
# * but WITHOUT ANY WARRANTY; without even the implied warranty of #
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # You should have received a copy of the GNU General Public License
# * GNU General Public License for more details. # along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# * #
# * You should have received a copy of the GNU General Public License # -------------------------------------------------------------------------
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. #
# *
# * -------------------------------------------------------------------------
# */
.function volk_gnsssdr_8i_x2_add_8i_a_orc_impl .function volk_gnsssdr_8i_x2_add_8i_a_orc_impl
.dest 1 dst .dest 1 dst

View File

@ -1,38 +1,37 @@
#/*! #
# * \file volk_gnsssdr_8ic_conjugate_8ic.orc # ORC implementation: calculates the conjugate of a 16 bits vector
# * \brief ORC implementation: calculates the conjugate of a 16 bits vector #
# * \authors <ul> # Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com #
# * </ul> # ORC code that calculates the conjugate of a
# * # 16 bits vector (8 bits the real part and 8 bits the imaginary part)
# * ORC code that calculates the conjugate of a # result = (real*real) + (imag*imag)
# * 16 bits vector (8 bits the real part and 8 bits the imaginary part) #
# * result = (real*real) + (imag*imag) #
# * # -------------------------------------------------------------------------
# * ------------------------------------------------------------------------- #
# * # Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) #
# * # GNSS-SDR is a software defined Global Navigation
# * GNSS-SDR is a software defined Global Navigation # Satellite Systems receiver
# * Satellite Systems receiver #
# * # This file is part of GNSS-SDR.
# * This file is part of GNSS-SDR. #
# * # GNSS-SDR is free software: you can redistribute it and/or modify
# * GNSS-SDR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by
# * it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or
# * the Free Software Foundation, either version 3 of the License, or # (at your option) any later version.
# * at your option) any later version. #
# * # GNSS-SDR is distributed in the hope that it will be useful,
# * GNSS-SDR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of
# * but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details.
# * GNU General Public License for more details. #
# * # You should have received a copy of the GNU General Public License
# * You should have received a copy of the GNU General Public License # along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. #
# * # -------------------------------------------------------------------------
# * ------------------------------------------------------------------------- #
# */
.function volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl .function volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl
.source 2 src1 .source 2 src1

View File

@ -1,38 +1,37 @@
#/*! #
# * \file volk_gnsssdr_8ic_magnitude_squared_8i.orc # ORC implementation: calculates the magnitude squared of a 16 bits vector
# * \brief ORC implementation: calculates the magnitude squared of a 16 bits vector # Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * \authors <ul> #
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com #
# * </ul> # ORC code that calculates the magnitude squared of a
# * # 16 bits vector (8 bits the real part and 8 bits the imaginary part)
# * ORC code that calculates the magnitude squared of a # result = (real*real) + (imag*imag)
# * 16 bits vector (8 bits the real part and 8 bits the imaginary part) #
# * result = (real*real) + (imag*imag) #
# * # -------------------------------------------------------------------------
# * ------------------------------------------------------------------------- #
# * # Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) #
# * # GNSS-SDR is a software defined Global Navigation
# * GNSS-SDR is a software defined Global Navigation # Satellite Systems receiver
# * Satellite Systems receiver #
# * # This file is part of GNSS-SDR.
# * This file is part of GNSS-SDR. #
# * # GNSS-SDR is free software: you can redistribute it and/or modify
# * GNSS-SDR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by
# * it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or
# * the Free Software Foundation, either version 3 of the License, or # (at your option) any later version.
# * at your option) any later version. #
# * # GNSS-SDR is distributed in the hope that it will be useful,
# * GNSS-SDR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of
# * but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details.
# * GNU General Public License for more details. #
# * # You should have received a copy of the GNU General Public License
# * You should have received a copy of the GNU General Public License # along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. #
# * # -------------------------------------------------------------------------
# * ------------------------------------------------------------------------- #
# */
.function volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl .function volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl
.source 2 src1 .source 2 src1

View File

@ -1,37 +1,36 @@
#/*! #
# * \file volk_gnsssdr_8ic_s8ic_multiply_8ic.orc # ORC implementation: multiplies a group of 16 bits vectors by one constant vector
# * \brief ORC implementation: multiplies a group of 16 bits vectors by one constant vector #
# * \authors <ul> # Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com #
# * </ul> #
# * # ORC code that multiplies a group of 16 bits vectors
# * ORC code that multiplies a group of 16 bits vectors # (8 bits the real part and 8 bits the imaginary part) by one constant vector
# * (8 bits the real part and 8 bits the imaginary part) by one constant vector #
# * # -------------------------------------------------------------------------
# * ------------------------------------------------------------------------- #
# * # Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) #
# * # GNSS-SDR is a software defined Global Navigation
# * GNSS-SDR is a software defined Global Navigation # Satellite Systems receiver
# * Satellite Systems receiver #
# * # This file is part of GNSS-SDR.
# * This file is part of GNSS-SDR. #
# * # GNSS-SDR is free software: you can redistribute it and/or modify
# * GNSS-SDR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by
# * it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or
# * the Free Software Foundation, either version 3 of the License, or # (at your option) any later version.
# * at your option) any later version. #
# * # GNSS-SDR is distributed in the hope that it will be useful,
# * GNSS-SDR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of
# * but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details.
# * GNU General Public License for more details. #
# * # You should have received a copy of the GNU General Public License
# * You should have received a copy of the GNU General Public License # along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. #
# * # -------------------------------------------------------------------------
# * ------------------------------------------------------------------------- #
# */
.function volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl .function volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl
.source 2 src1 .source 2 src1

View File

@ -1,37 +1,37 @@
#/*! #
# * \file volk_gnsssdr_8ic_x2_dot_prod_8ic.orc # ORC implementation: multiplies two 16 bits vectors and accumulates them
# * \brief ORC implementation: multiplies two 16 bits vectors and accumulates them #
# * \authors <ul> # Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com #
# * </ul> #
# * # ORC code that multiplies two 16 bits vectors (8 bits the real part
# * ORC code that multiplies two 16 bits vectors (8 bits the real part # and 8 bits the imaginary part) and accumulates them
# * and 8 bits the imaginary part) and accumulates them #
# * #
# * ------------------------------------------------------------------------- # -------------------------------------------------------------------------
# * #
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) # Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# * #
# * GNSS-SDR is a software defined Global Navigation # GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver # Satellite Systems receiver
# * #
# * This file is part of GNSS-SDR. # This file is part of GNSS-SDR.
# * #
# * GNSS-SDR is free software: you can redistribute it and/or modify # GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or # the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version. # (at your option) any later version.
# * #
# * GNSS-SDR is distributed in the hope that it will be useful, # GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details. # GNU General Public License for more details.
# * #
# * You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. # along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# * #
# * ------------------------------------------------------------------------- # -------------------------------------------------------------------------
# */ #
.function volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl .function volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl
.source 2 src1 .source 2 src1

View File

@ -1,37 +1,37 @@
#/*! #
# * \file volk_gnsssdr_8ic_x2_multiply_8ic.orc # ORC implementation: multiplies two 16 bits vectors
# * \brief ORC implementation: multiplies two 16 bits vectors #
# * \authors <ul> # Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com #
# * </ul> #
# * # ORC code that multiplies two 16 bits vectors (8 bits the real part
# * ORC code that multiplies two 16 bits vectors (8 bits the real part # and 8 bits the imaginary part)
# * and 8 bits the imaginary part) #
# * #
# * ------------------------------------------------------------------------- # -------------------------------------------------------------------------
# * #
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) # Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# * #
# * GNSS-SDR is a software defined Global Navigation # GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver # Satellite Systems receiver
# * #
# * This file is part of GNSS-SDR. # This file is part of GNSS-SDR.
# * #
# * GNSS-SDR is free software: you can redistribute it and/or modify # GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or # the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version. # (at your option) any later version.
# * #
# * GNSS-SDR is distributed in the hope that it will be useful, # GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details. # GNU General Public License for more details.
# * #
# * You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. # along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# * #
# * ------------------------------------------------------------------------- # -------------------------------------------------------------------------
# */ #
.function volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl .function volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl
.source 2 src1 .source 2 src1

View File

@ -1,47 +1,46 @@
#/*! #
# * \file volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.orc # ORC implementation: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors
# * \brief ORC implementation: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors # Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * \authors <ul> #
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com #
# * </ul> # ORC code that performs the carrier wipe-off mixing and the
# * # Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
# * ORC code that performs the carrier wipe-off mixing and the # real part and 8 bits the imaginary part):
# * Early, Prompt, and Late correlation with 16 bits vectors (8 bits the # - The carrier wipe-off is done by multiplying the input signal by the
# * real part and 8 bits the imaginary part): # carrier (multiplication of 16 bits vectors) It returns the input
# * - The carrier wipe-off is done by multiplying the input signal by the # signal in base band (BB)
# * carrier (multiplication of 16 bits vectors) It returns the input # - Early values are calculated by multiplying the input signal in BB by the
# * signal in base band (BB) # early code (multiplication of 16 bits vectors), accumulating the results
# * - Early values are calculated by multiplying the input signal in BB by the # - Prompt values are calculated by multiplying the input signal in BB by the
# * early code (multiplication of 16 bits vectors), accumulating the results # prompt code (multiplication of 16 bits vectors), accumulating the results
# * - Prompt values are calculated by multiplying the input signal in BB by the # - Late values are calculated by multiplying the input signal in BB by the
# * prompt code (multiplication of 16 bits vectors), accumulating the results # late code (multiplication of 16 bits vectors), accumulating the results
# * - Late values are calculated by multiplying the input signal in BB by the #
# * late code (multiplication of 16 bits vectors), accumulating the results #
# * # -------------------------------------------------------------------------
# * ------------------------------------------------------------------------- #
# * # Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) #
# * # GNSS-SDR is a software defined Global Navigation
# * GNSS-SDR is a software defined Global Navigation # Satellite Systems receiver
# * Satellite Systems receiver #
# * # This file is part of GNSS-SDR.
# * This file is part of GNSS-SDR. #
# * # GNSS-SDR is free software: you can redistribute it and/or modify
# * GNSS-SDR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by
# * it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or
# * the Free Software Foundation, either version 3 of the License, or # (at your option) any later version.
# * at your option) any later version. #
# * # GNSS-SDR is distributed in the hope that it will be useful,
# * GNSS-SDR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of
# * but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details.
# * GNU General Public License for more details. #
# * # You should have received a copy of the GNU General Public License
# * You should have received a copy of the GNU General Public License # along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. #
# * # -------------------------------------------------------------------------
# * ------------------------------------------------------------------------- #
# */
.function volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl .function volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl
.source 2 input .source 2 input

View File

@ -1,36 +1,35 @@
#/*! #
# * \file volk_gnsssdr_8u_x2_multiply_8u.orc # ORC implementation: multiplies unsigned char values
# * \brief ORC implementation: multiplies unsigned char values #
# * \authors <ul> # Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com #
# * </ul> #
# * # ORC code that multiplies unsigned char values (8 bits data)
# * ORC code that multiplies unsigned char values (8 bits data) #
# * # -------------------------------------------------------------------------
# * ------------------------------------------------------------------------- #
# * # Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors) #
# * # GNSS-SDR is a software defined Global Navigation
# * GNSS-SDR is a software defined Global Navigation # Satellite Systems receiver
# * Satellite Systems receiver #
# * # This file is part of GNSS-SDR.
# * This file is part of GNSS-SDR. #
# * # GNSS-SDR is free software: you can redistribute it and/or modify
# * GNSS-SDR is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by
# * it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or
# * the Free Software Foundation, either version 3 of the License, or # (at your option) any later version.
# * at your option) any later version. #
# * # GNSS-SDR is distributed in the hope that it will be useful,
# * GNSS-SDR is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of
# * but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details.
# * GNU General Public License for more details. #
# * # You should have received a copy of the GNU General Public License
# * You should have received a copy of the GNU General Public License # along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>. #
# * # -------------------------------------------------------------------------
# * ------------------------------------------------------------------------- #
# */
.function volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl .function volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl
.source 1 src1 .source 1 src1

View File

@ -1,71 +0,0 @@
########################################################################
# Patching original volk module
########################################################################
In order to fit the GNSS-SDR needs, the original volk module must be patched.
The folder containing this file has some patches to automatize the process and
modify the files quickly. To apply them you will need to run the following command:
$ patch -p5 < /Path/Of/The/Patch/nameOfThePatch.patch
The number after “-p” may change, read the patch documentation for more help.
You may need this information if you want to recreate the volk_gnsssdr module again
or you want to update the volk_gnsssdr module with the improvements introduced by GNURadio.
########################################################################
########################################################################
# Operations apply by the patches and other information (not needed if you know how to apply the patches!!!)
########################################################################
########################################################################
To create the volk module you will need to follow the following steps:
In order to understand and follow the creation and setup of the volk_gnsssdr module I will use some absolute paths: /Users/andres/Github/gnuradio => a cloned repository of the GNURadio project. /Users/andres/Github/gnss-sdr => a cloned repository of the GNSS- SDR project.
########################################################################
#FIRST STEP: using volk_modtool to create a new volk module
########################################################################
GNURadio offers a tool called volk_modtool to create and manage new volk modules and their proto-kernels. The steps to create the volk_gnsssdr module are:
1) Export the PYTHONPATH, that indicates where volk_modtool is:
$ export PYTHONPATH=/Users/andres/Github/gnuradio/volk/python
2) Go to the folder where volk_modtool executable is: $ cd /Users/andres/Github/gnuradio/volk/python/volk_modtool
3) Execute volk_modtool indicating that we want to create a new volk module (-i): $ ./volk_modtool -i
4) volk_modtool will ask us about the name of the newly created module, the destination folder where you want to store it and the base module (the base module is the volk module inside the GNURadio project): name: gnsssdr destination: /Users/andres/Github/gnss-sdr/src/algorithms/libs base: /Users/andres/github/gnuradio/volk
########################################################################
#SECOND STEP: add proto-kernels to the module
########################################################################
After creating the module you will need to add some proto-kernels to it. To accomplish it you will need to: 1) Copy your proto-kernels inside the /kernels folder. Copy the ORC implementations inside the /orc folder. Copy the macros implementations inside the /kernels/CommonMacros folder. (those folders are found in the root of the volk_gnsssdr module)
2) Add one profiling line for each of the proto-kernels inside the /apps/volk_gnsssdr_profile.cc file.
3) Add one test line for each of the proto-kernels inside the /lib/testqa.cc file. ########################################################################
#THIRD STEP: modifications to allow profiling of some proto-kernels with special parameters
######################################################################## Some of the proto-kernels that GNSS-SDR needs are not supported by the profiling environment of the volk_gnsssdr module. In order to profile them some modifications need to be done to two files: 1) Modify /src/algorithms/libs/volk_gnsssdr/lib/qa_utils.cc At the first part of this file there are defined the parameters supported by the environment. The number after run_cast_test indicates the total number of parameters passed to the proto-kernel (input +output parameters). The other part indicates the type of the data passed. Inside func(....) you will need to add the same number of buffs[ ] that the one specified after run_cast_test.
2) Modify /src/algorithms/libs/volk_gnsssdr/lib/qa_utils.h In the header you will need to add typedefs for the new definitions made in the .cc file. Take care: you will need to add the same number of void * that the one specified after run_cast_test.
########################################################################
#FOURTH STEP: optional modifications
########################################################################
1) Modify /src/algorithms/libs/volk_gnsssdr/lib/CMakeLists.txt in order to see kernel files, ORC files and macros when generating the IDE project.
2) To be able to use volk_gnsssdr and default volk functions at the same time i n the same file you will need to modify the template files that volk_gnsssdr module uses at build time to generate some headers.
The files modified are found inside /tmpl: volk_gnsssdr.tmpl.h
volk_gnsssdr_typedefs.tmpl.h
volk_gnsssdr_machines.tmpl.h
volk_gnsssdr_cpu.tmpl.h
volk_gnsssdr_config_fixed.tmpl.h The modifications consist of changing the defines of those files to different ones to allow the definition of the volk_gnsssdr functions although the default volk functions are already defined.
########################################################################
#FIFTH STEP: add volk_gnsssdr module to the GNSS-SDR project
########################################################################
In order to add the volk_gnsssdr module to the GNSS-SDR project the CMakeLists.txt global file needs to be edited.
########################################################################
#SIXTH STEP: using volk_gnsssdr functions
########################################################################
To use the proto-kernels inside volk_gnsssdr project two steps are needed: 1) in the CMakeFiles.txt you will need to add $ {VOLK_GNSSSDR_INCLUDE_DIRS} inside the include_directories function, and also add $ {VOLK_GNSSSDR_LIBRARIES} inside the target_link_libraries function.
2) Add the line #include “volk_gnsssdr.h” at the top of the file.

View File

@ -16,7 +16,7 @@
* GNSS-SDR is free software: you can redistribute it and/or modify * GNSS-SDR is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or * the Free Software Foundation, either version 3 of the License, or
* at your option) any later version. * (at your option) any later version.
* *
* GNSS-SDR is distributed in the hope that it will be useful, * GNSS-SDR is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
@ -41,7 +41,7 @@ TEST(FileOutputFilter, Instantiate)
config->set_property("Test.filename", file); config->set_property("Test.filename", file);
config->set_property("Test.item_type", "float"); config->set_property("Test.item_type", "float");
std::unique_ptr<FileOutputFilter> output_filter(new FileOutputFilter(config.get(), "Test", 1, 0)); std::unique_ptr<FileOutputFilter> output_filter(new FileOutputFilter(config.get(), "Test", 1, 0));
unsigned int res = 0; int res = 0;
if (output_filter) res = 1; if (output_filter) res = 1;
ASSERT_EQ(1, res); ASSERT_EQ(1, res);
} }

View File

@ -93,7 +93,7 @@ TEST_F(Fir_Filter_Test, Instantiate)
{ {
init(); init();
std::unique_ptr<FirFilter> filter(new FirFilter(config.get(), "InputFilter", 1, 1, queue)); std::unique_ptr<FirFilter> filter(new FirFilter(config.get(), "InputFilter", 1, 1, queue));
unsigned int res = 0; int res = 0;
if (filter) res = 1; if (filter) res = 1;
ASSERT_EQ(1, res); ASSERT_EQ(1, res);
} }