1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-06-24 05:53:16 +00:00

Code cleaning

This commit is contained in:
Carles Fernandez 2014-11-22 10:19:06 +01:00
parent 7a90027989
commit 1fadab3d08
34 changed files with 1487 additions and 1329 deletions

View File

@ -0,0 +1,20 @@
########################################################################
# Adding proto-kernels to the module
########################################################################
1) Add your proto-kernels inside the kernels/ folder, and the ORC implementations inside the orc/ folder. Add the macros implementations inside the /kernels/CommonMacros folder. (those folders are found in the root of the volk_gnsssdr module)
2) Add one profiling line for each of the proto-kernels inside the /apps/volk_gnsssdr_profile.cc file.
3) Add one test line for each of the proto-kernels inside the /lib/testqa.cc file. ########################################################################
# Modifications to allow profiling of some proto-kernels with special parameters
######################################################################## Some of the proto-kernels that GNSS-SDR needs are not supported by the profiling environment of the volk_gnsssdr module. In order to profile them some modifications need to be done to two files: 1) src/algorithms/libs/volk_gnsssdr/lib/qa_utils.cc At the first part of this file there are defined the parameters supported by the environment. The number after run_cast_test indicates the total number of parameters passed to the proto-kernel (input +output parameters). The other part indicates the type of the data passed. Inside func(....) you will need to add the same number of buffs[ ] that the one specified after run_cast_test.
2) src/algorithms/libs/volk_gnsssdr/lib/qa_utils.h In the header you will need to add typedefs for the new definitions made in the .cc file. Take care: you will need to add the same number of void * that the one specified after run_cast_test.
3) To be able to use volk_gnsssdr and default volk functions at the same time in the same file, it is required to add the template files that volk_gnsssdr module uses at build time to generate some headers.
The files are found inside tmpl/: volk_gnsssdr.tmpl.h
volk_gnsssdr_typedefs.tmpl.h
volk_gnsssdr_machines.tmpl.h
volk_gnsssdr_cpu.tmpl.h
volk_gnsssdr_config_fixed.tmpl.h

View File

@ -27,7 +27,7 @@ Processors providing SIMD instruction sets compute with multiple processing elem
\.TP
Check http://gnss-sdr.org for more information.
.SH HISTORY
This library was originally developed by Andres Cecilia Luque in the framework of the Summer of Code in Space program (SOCIS 2014) by the European Space Agency (ESA), and then integrated into \fBgnss-sdr\fR.
This library was originally developed by Andres Cecilia Luque in the framework of the Summer of Code in Space program (SOCIS 2014) by the European Space Agency (ESA), and then integrated into \fBgnss-sdr\fR. This software is based on the VOLK library available at https://gnuradio.org/redmine/projects/gnuradio/wiki/Volk
.SH BUGS
No known bugs.
.SH AUTHOR

View File

@ -1,5 +1,9 @@
/* -*- c++ -*- */
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
/*!
* \file constants.h
* \brief volk_gnsssdr constants
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
@ -17,8 +21,8 @@
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_VOLK_CONSTANTS_H
#define INCLUDED_VOLK_CONSTANTS_H
#ifndef GNSS_SDR_VOLK_GNSSSDR_CONSTANTS_H
#define GNSS_SDR_VOLK_GNSSSDR_CONSTANTS_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
@ -33,4 +37,4 @@ VOLK_API char* volk_gnsssdr_available_machines();
__VOLK_DECL_END
#endif /* INCLUDED_VOLK_CONSTANTS_H */
#endif /* GNSS_SDR_VOLK_GNSSSDR_CONSTANTS_H */

View File

@ -1,4 +1,9 @@
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
/*!
* \file volk_gnsssdr_common.h
* \brief Cross-platform attribute macros
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
@ -111,4 +116,4 @@ union bit128{
#define bit128_p(x) ((union bit128 *)(x))
#endif /*INCLUDED_LIBVOLK_COMMON_H*/
#endif /* INCLUDED_LIBVOLK_COMMON_H */

View File

@ -1,4 +1,9 @@
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
/*!
* \file volk_gnsssdr_complex.h
* \brief Provide typedefs and operators for all complex types in C and C++.
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*

View File

@ -1,5 +1,10 @@
/* -*- c -*- */
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
/*!
* \file volk_gnsssdr_malloc.h
* \brief The volk_gnsssdr_malloc function behaves like malloc in that it
* returns a pointer to the allocated memory.
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
@ -34,7 +39,7 @@ __VOLK_DECL_BEGIN
* memory that are guaranteed to be on an alignment, VOLK handles this
* itself. The volk_gnsssdr_malloc function behaves like malloc in that it
* returns a pointer to the allocated memory. However, it also takes
* in an alignment specfication, which is usually something like 16 or
* in an alignment specification, which is usually something like 16 or
* 32 to ensure that the aligned memory is located on a particular
* byte boundary for use with SIMD.
*
@ -55,7 +60,7 @@ VOLK_API void *volk_gnsssdr_malloc(size_t size, size_t alignment);
/*!
* \brief Free's memory allocated by volk_gnsssdr_malloc.
* \param aptr The aligned pointer allocaed by volk_gnsssdr_malloc.
* \param aptr The aligned pointer allocated by volk_gnsssdr_malloc.
*/
VOLK_API void volk_gnsssdr_free(void *aptr);

View File

@ -1,4 +1,10 @@
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
/*!
* \file volk_gnsssdr_prefs.h
* \brief Gets path to volk_gnsssdr_config profiling info and loads
* prefs into global prefs struct
* \author Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
@ -31,16 +37,16 @@ typedef struct volk_gnsssdr_arch_pref
char impl_u[128]; //best unaligned impl
} volk_gnsssdr_arch_pref_t;
////////////////////////////////////////////////////////////////////////
// get path to volk_gnsssdr_config profiling info
////////////////////////////////////////////////////////////////////////
/*!
* \brief get path to volk_gnsssdr_config profiling info
*/
VOLK_API void volk_gnsssdr_get_config_path(char *);
////////////////////////////////////////////////////////////////////////
// load prefs into global prefs struct
////////////////////////////////////////////////////////////////////////
/*!
* \brief load prefs into global prefs struct
*/
VOLK_API size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **);
__VOLK_DECL_END
#endif //INCLUDED_VOLK_PREFS_H
#endif /* INCLUDED_VOLK_PREFS_H */

View File

@ -9,13 +9,13 @@ Inline functions have been tested, and they introduce a really small time penalt
Syntax
####################################################################
In order to allow better understanding of the code I created the macros with an specific syntax.
In order to allow better understanding of the code we created the macros with a specific syntax:
1) Inside CommonMacros.h you will find macros for common operations. I will explain the syntax with an example:
1) Inside CommonMacros.h you will find macros for common operations. we will explain the syntax with an example:
example: CM_16IC_X4_SCALAR_PRODUCT_16IC_X2_U_SSE2(realx, imagx, realy, imagy, realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy, real_output, imag_output)
First of all, you find the characters “CM”, which means CommonMacros. After that the type and the amount of inputs is placed: “_16IC_X4” (16 bits complex integers, four inputs). The syntax for type is the same as the one used with volk protokernels, refer to GNURadio documentation for more help. The it comes the name of the macro (“_SCALAR_PRODUCT”), and after that the type and the amount of outputs (“_16IC_X2”). Finally it is placed the SSE minimum version needed to run (“_U_SSE2”). In the arguments you will find (from left to right) the inputs (four inputs: realx, imagx, realy, imagy), some variables that the macro needs to work (realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy) and finally the outputs (two outputs: real_output, imag_output).
First of all, you find the characters â"CM", which means CommonMacros. After that the type and the amount of inputs is placed: "_16IC_X4" (16 bits complex integers, four inputs). The syntax for type is the same as the one used with volk protokernels, refer to GNURadio documentation for more help. The it comes the name of the macro ("_SCALAR_PRODUCT"), and after that the type and the amount of outputs ("_16IC_X2"). Finally it is placed the SSE minimum version needed to run ("_U_SSE2"). In the arguments you will find (from left to right) the inputs (four inputs: realx, imagx, realy, imagy), some variables that the macro needs to work (realx_mult_realy, imagx_mult_imagy, realx_mult_imagy, imagx_mult_realy) and finally the outputs (two outputs: real_output, imag_output).
The variables that the macro needs are specified when calling it in order to avoid after-compile problems: if you want to use a macro you will need to declare all the variables it needs before, or you will not be able to compile.
2) Inside all the other headers, CommonMacros_XXXXXX.h you will find macros for a specific group of proto-kernels. The syntax is the same as the CommonMacros.h
@ -24,11 +24,11 @@ The variables that the macro needs are specified when calling it in order to avo
Workflow
####################################################################
In order to use the macros easily, I usually test the code without macros inside a testing proto-kernel, where you are able to test it, debug it and use breakpoints.
When it works I place code inside a macro an I test it again.
In order to use the macros easily, we usually test the code without macros inside a testing proto-kernel, where you are able to test it, debug it and use breakpoints.
When it works we place code inside a macro an I test it again.
####################################################################
Why macros
####################################################################
1) They are the only way I could find for sharing code between proto-kernels without performance penalty.
1) They are the only way we could find for sharing code between proto-kernels without performance penalty.
2) It is true that they are really difficult to debug, but if you work with them responsibly it is not so hard. Volk_gnsssdr checks all the SSE proto-kernels implementations results against the generic implementation results, so if your macro is not working you will appreciate it after profiling it.

View File

@ -25,83 +25,87 @@
#ifndef LV_HAVE_SSE2
void qa_16s_add_quad_aligned16::t1() {
printf("sse2 not available... no test performed\n");
void qa_16s_add_quad_aligned16::t1()
{
printf("sse2 not available... no test performed\n");
}
#else
void qa_16s_add_quad_aligned16::t1()
{
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3200;
const int ITERS = 100000;
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short input4[vlen];
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
__VOLK_ATTR_ALIGNED(16) short output2[vlen];
__VOLK_ATTR_ALIGNED(16) short output3[vlen];
__VOLK_ATTR_ALIGNED(16) short output01[vlen];
__VOLK_ATTR_ALIGNED(16) short output11[vlen];
__VOLK_ATTR_ALIGNED(16) short output21[vlen];
__VOLK_ATTR_ALIGNED(16) short output31[vlen];
void qa_16s_add_quad_aligned16::t1() {
for(int i = 0; i < vlen; ++i)
{
short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3200;
const int ITERS = 100000;
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short input4[vlen];
input0[i] = plus0 - minus0;
input1[i] = plus1 - minus1;
input2[i] = plus2 - minus2;
input3[i] = plus3 - minus3;
input4[i] = plus4 - minus4;
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
__VOLK_ATTR_ALIGNED(16) short output2[vlen];
__VOLK_ATTR_ALIGNED(16) short output3[vlen];
__VOLK_ATTR_ALIGNED(16) short output01[vlen];
__VOLK_ATTR_ALIGNED(16) short output11[vlen];
__VOLK_ATTR_ALIGNED(16) short output21[vlen];
__VOLK_ATTR_ALIGNED(16) short output31[vlen];
}
printf("16s_add_quad_aligned\n");
for(int i = 0; i < vlen; ++i) {
short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
start = clock();
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_16s_add_quad_aligned16_manual(output0, output1, output2, output3, input0, input1, input2, input3, input4, vlen << 1 , "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_16s_add_quad_aligned16_manual(output01, output11, output21, output31, input0, input1, input2, input3, input4, vlen << 1 , "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total);
for(int i = 0; i < 1; ++i)
{
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
input0[i] = plus0 - minus0;
input1[i] = plus1 - minus1;
input2[i] = plus2 - minus2;
input3[i] = plus3 - minus3;
input4[i] = plus4 - minus4;
}
printf("16s_add_quad_aligned\n");
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_16s_add_quad_aligned16_manual(output0, output1, output2, output3, input0, input1, input2, input3, input4, vlen << 1 , "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_16s_add_quad_aligned16_manual(output01, output11, output21, output31, input0, input1, input2, input3, input4, vlen << 1 , "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total);
for(int i = 0; i < 1; ++i) {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]);
CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]);
CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]);
}
for(int i = 0; i < vlen; ++i)
{
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]);
CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]);
CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]);
}
}
#endif

View File

@ -22,14 +22,14 @@
#include <cppunit/extensions/HelperMacros.h>
#include <cppunit/TestCase.h>
class qa_16s_add_quad_aligned16 : public CppUnit::TestCase {
class qa_16s_add_quad_aligned16 : public CppUnit::TestCase
{
CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16);
CPPUNIT_TEST (t1);
CPPUNIT_TEST_SUITE_END ();
private:
void t1 ();
private:
void t1 ();
};

View File

@ -25,99 +25,98 @@
#ifndef LV_HAVE_SSSE3
void qa_16s_branch_4_state_8_aligned16::t1() {
printf("ssse3 not available... no test performed\n");
void qa_16s_branch_4_state_8_aligned16::t1()
{
printf("ssse3 not available... no test performed\n");
}
#else
void qa_16s_branch_4_state_8_aligned16::t1() {
const int num_iters = 1000000;
const int vlen = 32;
void qa_16s_branch_4_state_8_aligned16::t1()
{
const int num_iters = 1000000;
const int vlen = 32;
static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03};
static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01};
static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
static char* permuters[4] = {permute0, permute1, permute2, permute3};
static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03};
static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01};
static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
static char* permuters[4] = {permute0, permute1, permute2, permute3};
unsigned int num_bytes = vlen << 1;
unsigned int num_bytes = vlen << 1;
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
__VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short target3[vlen];
__VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short target3[vlen];
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = {
7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = {
0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = {
0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] = {
7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 5, 7, 1, 3, 4, 6 };
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = {
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = {
0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000 };
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen] = {
0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff };
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
for(int i = 0; i < vlen; ++i)
{
src0[i] = i;
}
printf("16s_branch_4_state_8_aligned\n");
for(int i = 0; i < vlen; ++i) {
src0[i] = i;
start = clock();
for(int i = 0; i < num_iters; ++i)
{
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
}
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time: %f\n", total);
printf("16s_branch_4_state_8_aligned\n");
start = clock();
for(int i = 0; i < num_iters; ++i)
{
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
start = clock();
for(int i = 0; i < num_iters; ++i) {
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
printf("branch_4_state_8_time, ssse3: %f\n", total);
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
start = clock();
for(int i = 0; i < num_iters; ++i)
{
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
}
end = clock();
printf("permute_and_scalar_add_time: %f\n", total);
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time, generic: %f\n", total);
for(int i = 0; i < vlen; ++i)
{
printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
}
start = clock();
for(int i = 0; i < num_iters; ++i) {
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, cntl2, cntl3, scalars, "ssse3");
}
end = clock();
for(int i = 0; i < vlen; ++i)
{
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("branch_4_state_8_time, ssse3: %f\n", total);
start = clock();
for(int i = 0; i < num_iters; ++i) {
volk_gnsssdr_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, cntl2, cntl3, scalars, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("permute_and_scalar_add_time, generic: %f\n", total);
for(int i = 0; i < vlen; ++i) {
printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
}
for(int i = 0; i < vlen; ++i) {
CPPUNIT_ASSERT(target[i] == target2[i]);
CPPUNIT_ASSERT(target[i] == target3[i]);
}
CPPUNIT_ASSERT(target[i] == target2[i]);
CPPUNIT_ASSERT(target[i] == target3[i]);
}
}

View File

@ -27,71 +27,75 @@
#ifndef LV_HAVE_SSE2
void qa_16s_permute_and_scalar_add_aligned16::t1() {
printf("sse2 not available... no test performed\n");
void qa_16s_permute_and_scalar_add_aligned16::t1()
{
printf("sse2 not available... no test performed\n");
}
#else
void qa_16s_permute_and_scalar_add_aligned16::t1() {
const int vlen = 64;
void qa_16s_permute_and_scalar_add_aligned16::t1()
{
const int vlen = 64;
unsigned int num_bytes = vlen << 1;
unsigned int num_bytes = vlen << 1;
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
__VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen];
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
__VOLK_ATTR_ALIGNED(16) short target[vlen];
__VOLK_ATTR_ALIGNED(16) short target2[vlen];
__VOLK_ATTR_ALIGNED(16) short src0[vlen];
__VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl0[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl1[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl2[vlen];
__VOLK_ATTR_ALIGNED(16) short cntl3[vlen];
__VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
for(int i = 0; i < vlen; ++i) {
src0[i] = i;
permute_indexes[i] = (3 * i)%vlen;
cntl0[i] = 0xff;
cntl1[i] = 0xff * (i%2);
cntl2[i] = 0xff * ((i>>1)%2);
cntl3[i] = 0xff * ((i%4) == 3);
}
for(int i = 0; i < vlen; ++i)
{
src0[i] = i;
permute_indexes[i] = (3 * i)%vlen;
cntl0[i] = 0xff;
cntl1[i] = 0xff * (i%2);
cntl2[i] = 0xff * ((i>>1)%2);
cntl3[i] = 0xff * ((i%4) == 3);
}
printf("16s_permute_and_scalar_add_aligned\n");
printf("16s_permute_and_scalar_add_aligned\n");
start = clock();
for(int i = 0; i < 100000; ++i) {
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic");
}
end = clock();
start = clock();
for(int i = 0; i < 100000; ++i)
{
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
printf("generic_time: %f\n", total);
start = clock();
for(int i = 0; i < 100000; ++i) {
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
start = clock();
for(int i = 0; i < 100000; ++i)
{
volk_gnsssdr_16s_permute_and_scalar_add_aligned16_manual(target2, src0, permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse2_time: %f\n", total);
printf("sse2_time: %f\n", total);
for(int i = 0; i < vlen; ++i) {
//for(int i = 0; i < vlen; ++i) {
//printf("generic... %d, sse2... %d\n", target[i], target2[i]);
}
//}
for(int i = 0; i < vlen; ++i) {
CPPUNIT_ASSERT(target[i] == target2[i]);
}
for(int i = 0; i < vlen; ++i)
{
CPPUNIT_ASSERT(target[i] == target2[i]);
}
}
#endif

View File

@ -26,53 +26,57 @@
#ifndef LV_HAVE_SSE2
void qa_16s_quad_max_star_aligned16::t1() {
printf("sse2 not available... no test performed\n");
void qa_16s_quad_max_star_aligned16::t1()
{
printf("sse2 not available... no test performed\n");
}
#else
void qa_16s_quad_max_star_aligned16::t1() {
const int vlen = 34;
void qa_16s_quad_max_star_aligned16::t1()
{
const int vlen = 34;
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short input0[vlen];
__VOLK_ATTR_ALIGNED(16) short input1[vlen];
__VOLK_ATTR_ALIGNED(16) short input2[vlen];
__VOLK_ATTR_ALIGNED(16) short input3[vlen];
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
__VOLK_ATTR_ALIGNED(16) short output0[vlen];
__VOLK_ATTR_ALIGNED(16) short output1[vlen];
for(int i = 0; i < vlen; ++i) {
short plus0 = (short) (rand() - (RAND_MAX/2));
short plus1 = (short) (rand() - (RAND_MAX/2));
short plus2 = (short) (rand() - (RAND_MAX/2));
short plus3 = (short) (rand() - (RAND_MAX/2));
for(int i = 0; i < vlen; ++i)
{
short plus0 = (short) (rand() - (RAND_MAX/2));
short plus1 = (short) (rand() - (RAND_MAX/2));
short plus2 = (short) (rand() - (RAND_MAX/2));
short plus3 = (short) (rand() - (RAND_MAX/2));
short minus0 = (short) (rand() - (RAND_MAX/2));
short minus1 = (short) (rand() - (RAND_MAX/2));
short minus2 = (short) (rand() - (RAND_MAX/2));
short minus3 = (short) (rand() - (RAND_MAX/2));
short minus0 = (short) (rand() - (RAND_MAX/2));
short minus1 = (short) (rand() - (RAND_MAX/2));
short minus2 = (short) (rand() - (RAND_MAX/2));
short minus3 = (short) (rand() - (RAND_MAX/2));
input0[i] = plus0 - minus0;
input1[i] = plus1 - minus1;
input2[i] = plus2 - minus2;
input3[i] = plus3 - minus3;
}
input0[i] = plus0 - minus0;
input1[i] = plus1 - minus1;
input2[i] = plus2 - minus2;
input3[i] = plus3 - minus3;
}
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, input3, 2*vlen, "generic");
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, input3, 2*vlen, "generic");
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, input3, 2*vlen, "sse2");
volk_gnsssdr_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, input3, 2*vlen, "sse2");
printf("16s_quad_max_star_aligned\n");
for(int i = 0; i < vlen; ++i) {
printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]);
}
printf("16s_quad_max_star_aligned\n");
for(int i = 0; i < vlen; ++i)
{
printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], output1[i], input0[i], input1[i], input2[i], input3[i]);
}
for(int i = 0; i < vlen; ++i) {
CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
}
for(int i = 0; i < vlen; ++i)
{
CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
}
}
#endif

View File

@ -27,54 +27,60 @@
#ifndef LV_HAVE_SSE
void qa_32f_fm_detect_aligned16::t1() {
printf("sse not available... no test performed\n");
void qa_32f_fm_detect_aligned16::t1()
{
printf("sse not available... no test performed\n");
}
#else
void qa_32f_fm_detect_aligned16::t1() {
void qa_32f_fm_detect_aligned16::t1()
{
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) float input0[vlen];
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) float input0[vlen];
__VOLK_ATTR_ALIGNED(16) float output0[vlen];
__VOLK_ATTR_ALIGNED(16) float output01[vlen];
__VOLK_ATTR_ALIGNED(16) float output0[vlen];
__VOLK_ATTR_ALIGNED(16) float output01[vlen];
for(int i = 0; i < vlen; ++i)
{
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
}
printf("32f_fm_detect_aligned\n");
for(int i = 0; i < vlen; ++i) {
input0[i] = ((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2));
}
printf("32f_fm_detect_aligned\n");
start = clock();
float save = 0.1;
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
save = 0.1;
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, "sse");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse_time: %f\n", total);
for(int i = 0; i < 1; ++i) {
start = clock();
float save = 0.1;
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
save = 0.1;
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, "sse");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse_time: %f\n", total);
//for(int i = 0; i < 1; ++i)
// {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
//}
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4);
}
for(int i = 0; i < vlen; ++i)
{
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 1e-4);
}
}
#endif

View File

@ -26,96 +26,97 @@
#define ERR_DELTA (1e-4)
#define NUM_ITERS 1000000
#define VEC_LEN 3097
static float uniform() {
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
static float uniform()
{
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
}
static void
random_floats (float *buf, unsigned n)
{
unsigned int i = 0;
for (; i < n; i++) {
buf[i] = uniform () * 32767;
}
unsigned int i = 0;
for (; i < n; i++)
{
buf[i] = uniform () * 32767;
}
}
#ifndef LV_HAVE_SSE
void qa_32f_index_max_aligned16::t1(){
printf("sse not available... no test performed\n");
void qa_32f_index_max_aligned16::t1()
{
printf("sse not available... no test performed\n");
}
#else
void qa_32f_index_max_aligned16::t1(){
void qa_32f_index_max_aligned16::t1()
{
const int vlen = VEC_LEN;
const int vlen = VEC_LEN;
volk_gnsssdr_runtime_init();
volk_gnsssdr_environment_init();
int ret;
volk_gnsssdr_runtime_init();
unsigned int* target_sse4_1;
unsigned int* target_sse;
unsigned int* target_generic;
float* src0 ;
volk_gnsssdr_environment_init();
int ret;
unsigned int i_target_sse4_1;
target_sse4_1 = &i_target_sse4_1;
unsigned int i_target_sse;
target_sse = &i_target_sse;
unsigned int i_target_generic;
target_generic = &i_target_generic;
unsigned int* target_sse4_1;
unsigned int* target_sse;
unsigned int* target_generic;
float* src0 ;
ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
random_floats((float*)src0, vlen);
unsigned int i_target_sse4_1;
target_sse4_1 = &i_target_sse4_1;
unsigned int i_target_sse;
target_sse = &i_target_sse;
unsigned int i_target_generic;
target_generic = &i_target_generic;
printf("32f_index_max_aligned16\n");
ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
clock_t start, end;
double total;
random_floats((float*)src0, vlen);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
printf("32f_index_max_aligned16\n");
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
}
clock_t start, end;
double total;
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
get_volk_gnsssdr_runtime()->volk_gnsssdr_32f_index_max_aligned16(target_sse4_1, src0, vlen);
}
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_gnsssdr_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse4.1 time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_gnsssdr_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
}
printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
get_volk_gnsssdr_runtime()->volk_gnsssdr_32f_index_max_aligned16(target_sse4_1, src0, vlen);
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse4.1 time: %f\n", total);
printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], target_sse[0], target_sse4_1[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
free(src0);
free(src0);
}
#endif /*LV_HAVE_SSE3*/

View File

@ -25,83 +25,80 @@
#define ERR_DELTA (1e-4)
#define NUM_ITERS 1000000
#define VEC_LEN 3096
static float uniform() {
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
static float uniform()
{
return 2.0 * ((float) rand() / RAND_MAX - 0.5); // uniformly (-1, 1)
}
static void
random_floats (float *buf, unsigned n)
{
unsigned int i = 0;
for (; i < n; i++) {
unsigned int i = 0;
for (; i < n; i++)
{
buf[i] = uniform () * 32767;
buf[i] = uniform () * 32767;
}
}
}
#ifndef LV_HAVE_SSE3
void qa_32fc_index_max_aligned16::t1(){
printf("sse3 not available... no test performed\n");
void qa_32fc_index_max_aligned16::t1()
{
printf("sse3 not available... no test performed\n");
}
#else
void qa_32fc_index_max_aligned16::t1()
{
const int vlen = VEC_LEN;
void qa_32fc_index_max_aligned16::t1(){
volk_gnsssdr_environment_init();
int ret;
const int vlen = VEC_LEN;
unsigned int* target;
unsigned int* target_generic;
std::complex<float>* src0 ;
volk_gnsssdr_environment_init();
int ret;
unsigned int i_target;
target = &i_target;
unsigned int i_target_generic;
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen << 3);
unsigned int* target;
unsigned int* target_generic;
std::complex<float>* src0 ;
random_floats((float*)src0, vlen * 2);
printf("32fc_index_max_aligned16\n");
unsigned int i_target;
target = &i_target;
unsigned int i_target_generic;
target_generic = &i_target_generic;
ret = posix_memalign((void**)&src0, 16, vlen << 3);
clock_t start, end;
double total;
random_floats((float*)src0, vlen * 2);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
printf("32fc_index_max_aligned16\n");
start = clock();
for(int k = 0; k < NUM_ITERS; ++k)
{
volk_gnsssdr_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
}
clock_t start, end;
double total;
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3 time: %f\n", total);
printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_gnsssdr_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic time: %f\n", total);
start = clock();
for(int k = 0; k < NUM_ITERS; ++k) {
volk_gnsssdr_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3 time: %f\n", total);
printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
free(src0);
free(src0);
}
#endif /*LV_HAVE_SSE3*/

View File

@ -26,57 +26,62 @@
#ifndef LV_HAVE_SSE3
void qa_32fc_power_spectral_density_32f_aligned16::t1() {
printf("sse3 not available... no test performed\n");
void qa_32fc_power_spectral_density_32f_aligned16::t1()
{
printf("sse3 not available... no test performed\n");
}
#else
void qa_32fc_power_spectral_density_32f_aligned16::t1() {
void qa_32fc_power_spectral_density_32f_aligned16::t1()
{
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
volk_gnsssdr_environment_init();
clock_t start, end;
double total;
const int vlen = 3201;
const int ITERS = 10000;
__VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
__VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
__VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
__VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
__VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
const float scalar = vlen;
const float rbw = 1.7;
const float scalar = vlen;
const float rbw = 1.7;
float* inputLoad = (float*)input0;
for(int i = 0; i < 2*vlen; ++i)
{
inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
}
printf("32fc_power_spectral_density_32f_aligned\n");
float* inputLoad = (float*)input0;
for(int i = 0; i < 2*vlen; ++i) {
inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / static_cast<float>((RAND_MAX/2)));
}
printf("32fc_power_spectral_density_32f_aligned\n");
start = clock();
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_generic, input0, scalar, rbw, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count)
{
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, scalar, rbw, vlen, "sse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_generic, input0, scalar, rbw, vlen, "generic");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("generic_time: %f\n", total);
start = clock();
for(int count = 0; count < ITERS; ++count) {
volk_gnsssdr_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, scalar, rbw, vlen, "sse3");
}
end = clock();
total = (double)(end-start)/(double)CLOCKS_PER_SEC;
printf("sse3_time: %f\n", total);
for(int i = 0; i < 1; ++i) {
//for(int i = 0; i < 1; ++i) {
//printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
//printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
}
//}
for(int i = 0; i < vlen; ++i) {
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
}
for(int i = 0; i < vlen; ++i)
{
//printf("%d...%d\n", output0[i], output01[i]);
CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], fabs(output_generic[i]*1e-4));
}
}
#endif

View File

@ -16,8 +16,8 @@
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef VOLK_QA_UTILS_H
#define VOLK_QA_UTILS_H
#ifndef GNSS_SDR_VOLK_QA_UTILS_H
#define GNSS_SDR_VOLK_QA_UTILS_H
#include <cstdlib>
#include <string>
@ -42,22 +42,24 @@ volk_gnsssdr_type_t volk_gnsssdr_type_from_string(std::string);
float uniform(void);
void random_floats(float *buf, unsigned n);
class volk_gnsssdr_test_time_t {
public:
std::string name;
double time;
std::string units;
class volk_gnsssdr_test_time_t
{
public:
std::string name;
double time;
std::string units;
};
class volk_gnsssdr_test_results_t {
public:
std::string name;
std::string config_name;
int vlen;
int iter;
std::map<std::string, volk_gnsssdr_test_time_t> results;
std::string best_arch_a;
std::string best_arch_u;
class volk_gnsssdr_test_results_t
{
public:
std::string name;
std::string config_name;
int vlen;
int iter;
std::map<std::string, volk_gnsssdr_test_time_t> results;
std::string best_arch_a;
std::string best_arch_u;
};
bool run_volk_gnsssdr_tests(
@ -117,4 +119,4 @@ typedef void (*volk_gnsssdr_fn_12arg_s8ic)(void *, void *, void *, void *, void
//ADDED BY GNSS-SDR. END
#endif //VOLK_QA_UTILS_H
#endif // GNSS_SDR_VOLK_QA_UTILS_H

View File

@ -1,5 +1,5 @@
/* -*- c++ -*- */
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
/*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*

View File

@ -1,5 +1,5 @@
/* -*- c -*- */
/* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
/*
* Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
*
* This file is part of GNSS-SDR.
*
@ -26,7 +26,7 @@
/*
* For #defines used to determine support for allocation functions,
* see: http://linux.die.net/man/3/aligned_alloc
*/
*/
// Disabling use of aligned_alloc. This function requires that size be
// a multiple of alignment, which is too restrictive for many uses of
@ -57,20 +57,22 @@
void *volk_gnsssdr_malloc(size_t size, size_t alignment)
{
void *ptr;
int err = posix_memalign(&ptr, alignment, size);
if(err == 0) {
return ptr;
}
else {
fprintf(stderr, "VOLK: Error allocating memory (posix_memalign: %d)\n", err);
return NULL;
}
void *ptr;
int err = posix_memalign(&ptr, alignment, size);
if(err == 0)
{
return ptr;
}
else
{
fprintf(stderr, "VOLK: Error allocating memory (posix_memalign: %d)\n", err);
return NULL;
}
}
void volk_gnsssdr_free(void *ptr)
{
free(ptr);
free(ptr);
}
// _aligned_malloc has no restriction on size,
@ -79,16 +81,17 @@ void volk_gnsssdr_free(void *ptr)
void *volk_gnsssdr_malloc(size_t size, size_t alignment)
{
void *ptr = _aligned_malloc(size, alignment);
if(ptr == NULL) {
fprintf(stderr, "VOLK: Error allocating memory (_aligned_malloc)\n");
}
return ptr;
void *ptr = _aligned_malloc(size, alignment);
if(ptr == NULL)
{
fprintf(stderr, "VOLK: Error allocating memory (_aligned_malloc)\n");
}
return ptr;
}
void volk_gnsssdr_free(void *ptr)
{
_aligned_free(ptr);
_aligned_free(ptr);
}
// No standard handlers; we'll do it ourselves.
@ -96,43 +99,43 @@ void volk_gnsssdr_free(void *ptr)
struct block_info
{
void *real;
void *real;
};
void *
volk_gnsssdr_malloc(size_t size, size_t alignment)
{
void *real, *user;
struct block_info *info;
void *real, *user;
struct block_info *info;
/* At least align to sizeof our struct */
if (alignment < sizeof(struct block_info))
alignment = sizeof(struct block_info);
/* At least align to sizeof our struct */
if (alignment < sizeof(struct block_info))
alignment = sizeof(struct block_info);
/* Alloc */
real = malloc(size + (2 * alignment - 1));
/* Alloc */
real = malloc(size + (2 * alignment - 1));
/* Get pointer to the various zones */
user = (void *)((((uintptr_t) real) + sizeof(struct block_info) + alignment - 1) & ~(alignment - 1));
info = (struct block_info *)(((uintptr_t)user) - sizeof(struct block_info));
/* Get pointer to the various zones */
user = (void *)((((uintptr_t) real) + sizeof(struct block_info) + alignment - 1) & ~(alignment - 1));
info = (struct block_info *)(((uintptr_t)user) - sizeof(struct block_info));
/* Store the info for the free */
info->real = real;
/* Store the info for the free */
info->real = real;
/* Return pointer to user */
return user;
/* Return pointer to user */
return user;
}
void
volk_gnsssdr_free(void *ptr)
{
struct block_info *info;
struct block_info *info;
/* Get the real pointer */
info = (struct block_info *)(((uintptr_t)ptr) - sizeof(struct block_info));
/* Get the real pointer */
info = (struct block_info *)(((uintptr_t)ptr) - sizeof(struct block_info));
/* Release real pointer */
free(info->real);
/* Release real pointer */
free(info->real);
}
#endif // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN

View File

@ -31,10 +31,11 @@ void volk_gnsssdr_get_config_path(char *path)
char *home = NULL;
if (home == NULL) home = getenv("HOME");
if (home == NULL) home = getenv("APPDATA");
if (home == NULL){
path = NULL;
return;
}
if (home == NULL)
{
path = NULL;
return;
}
strcpy(path, home);
strcat(path, suffix);
}
@ -54,14 +55,14 @@ size_t volk_gnsssdr_load_preferences(volk_gnsssdr_arch_pref_t **prefs_res)
//reset the file pointer and write the prefs into volk_gnsssdr_arch_prefs
while(fgets(line, sizeof(line), config_file) != NULL)
{
prefs = (volk_gnsssdr_arch_pref_t *) realloc(prefs, (n_arch_prefs+1) * sizeof(*prefs));
volk_gnsssdr_arch_pref_t *p = prefs + n_arch_prefs;
if(sscanf(line, "%s %s %s", p->name, p->impl_a, p->impl_u) == 3 && !strncmp(p->name, "volk_gnsssdr_", 5))
{
n_arch_prefs++;
prefs = (volk_gnsssdr_arch_pref_t *) realloc(prefs, (n_arch_prefs+1) * sizeof(*prefs));
volk_gnsssdr_arch_pref_t *p = prefs + n_arch_prefs;
if(sscanf(line, "%s %s %s", p->name, p->impl_a, p->impl_u) == 3 && !strncmp(p->name, "volk_gnsssdr_", 5))
{
n_arch_prefs++;
}
}
}
fclose(config_file);
*prefs_res = prefs;
return n_arch_prefs;

View File

@ -24,70 +24,77 @@
#include <string.h>
#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4
#define __popcnt __builtin_popcount
#define __popcnt __builtin_popcount
#else
inline unsigned __popcnt(unsigned num)
{
unsigned pop = 0;
while(num)
inline unsigned __popcnt(unsigned num)
{
unsigned pop = 0;
while(num)
{
if (num & 0x1) pop++;
num >>= 1;
}
return pop;
}
return pop;
}
#endif
int volk_gnsssdr_get_index(
const char *impl_names[], //list of implementations by name
const size_t n_impls, //number of implementations available
const char *impl_name //the implementation name to find
){
const char *impl_names[], //list of implementations by name
const size_t n_impls, //number of implementations available
const char *impl_name //the implementation name to find
)
{
unsigned int i;
for (i = 0; i < n_impls; i++) {
if(!strncmp(impl_names[i], impl_name, 20)) {
return i;
for (i = 0; i < n_impls; i++)
{
if(!strncmp(impl_names[i], impl_name, 20))
{
return i;
}
}
}
//TODO return -1;
//something terrible should happen here
printf("Volk warning: no arch found, returning generic impl\n");
return volk_gnsssdr_get_index(impl_names, n_impls, "generic"); //but we'll fake it for now
}
int volk_gnsssdr_rank_archs(
const char *kern_name, //name of the kernel to rank
const char *impl_names[], //list of implementations by name
const int* impl_deps, //requirement mask per implementation
const bool* alignment, //alignment status of each implementation
size_t n_impls, //number of implementations available
const bool align //if false, filter aligned implementations
){
size_t i;
static volk_gnsssdr_arch_pref_t *volk_gnsssdr_arch_prefs;
static size_t n_arch_prefs = 0;
static int prefs_loaded = 0;
if(!prefs_loaded) {
n_arch_prefs = volk_gnsssdr_load_preferences(&volk_gnsssdr_arch_prefs);
prefs_loaded = 1;
}
// If we've defined VOLK_GENERIC to be anything, always return the
// 'generic' kernel. Used in GR's QA code.
char *gen_env = getenv("VOLK_GENERIC");
if(gen_env) {
return volk_gnsssdr_get_index(impl_names, n_impls, "generic");
}
int volk_gnsssdr_rank_archs(
const char *kern_name, //name of the kernel to rank
const char *impl_names[], //list of implementations by name
const int* impl_deps, //requirement mask per implementation
const bool* alignment, //alignment status of each implementation
size_t n_impls, //number of implementations available
const bool align //if false, filter aligned implementations
)
{
size_t i;
static volk_gnsssdr_arch_pref_t *volk_gnsssdr_arch_prefs;
static size_t n_arch_prefs = 0;
static int prefs_loaded = 0;
if(!prefs_loaded)
{
n_arch_prefs = volk_gnsssdr_load_preferences(&volk_gnsssdr_arch_prefs);
prefs_loaded = 1;
}
// If we've defined VOLK_GENERIC to be anything, always return the
// 'generic' kernel. Used in GR's QA code.
char *gen_env = getenv("VOLK_GENERIC");
if(gen_env)
{
return volk_gnsssdr_get_index(impl_names, n_impls, "generic");
}
//now look for the function name in the prefs list
for(i = 0; i < n_arch_prefs; i++)
{
if(!strncmp(kern_name, volk_gnsssdr_arch_prefs[i].name, sizeof(volk_gnsssdr_arch_prefs[i].name))) //found it
{
const char *impl_name = align? volk_gnsssdr_arch_prefs[i].impl_a : volk_gnsssdr_arch_prefs[i].impl_u;
return volk_gnsssdr_get_index(impl_names, n_impls, impl_name);
if(!strncmp(kern_name, volk_gnsssdr_arch_prefs[i].name, sizeof(volk_gnsssdr_arch_prefs[i].name))) //found it
{
const char *impl_name = align? volk_gnsssdr_arch_prefs[i].impl_a : volk_gnsssdr_arch_prefs[i].impl_u;
return volk_gnsssdr_get_index(impl_names, n_impls, impl_name);
}
}
}
//return the best index with the largest deps
size_t best_index_a = 0;
@ -95,19 +102,19 @@ int volk_gnsssdr_rank_archs(
int best_value_a = -1;
int best_value_u = -1;
for(i = 0; i < n_impls; i++)
{
const signed val = __popcnt(impl_deps[i]);
if (alignment[i] && val > best_value_a)
{
best_index_a = i;
best_value_a = val;
const signed val = __popcnt(impl_deps[i]);
if (alignment[i] && val > best_value_a)
{
best_index_a = i;
best_value_a = val;
}
if (!alignment[i] && val > best_value_u)
{
best_index_u = i;
best_value_u = val;
}
}
if (!alignment[i] && val > best_value_u)
{
best_index_u = i;
best_value_u = val;
}
}
//when align and we found a best aligned, use it
if (align && best_value_a != -1) return best_index_a;

View File

@ -16,8 +16,8 @@
* along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef INCLUDED_VOLK_RANK_ARCHS_H
#define INCLUDED_VOLK_RANK_ARCHS_H
#ifndef GNSS_SDR_VOLK_GNSSSDR_RANK_ARCHS_H
#define GNSS_SDR_VOLK_GNSSSDR_RANK_ARCHS_H
#include <stdlib.h>
#include <stdbool.h>
@ -44,4 +44,4 @@ int volk_gnsssdr_rank_archs(
#ifdef __cplusplus
}
#endif
#endif /*INCLUDED_VOLK_RANK_ARCHS_H*/
#endif /* GNSS_SDR_VOLK_GNSSSDR_RANK_ARCHS_H */

View File

@ -1,36 +1,34 @@
#/*!
# * \file volk_gnsssdr_8i_accumulator_s8i.orc
# * \brief ORC implementation: 8 bits (char) scalar accumulator
# * \authors <ul>
# * <li> Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that implements an accumulator of char values
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
#
# ORC implementation: 8 bits (char) scalar accumulator
#
# Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
#
# ORC code that implements an accumulator of char values
#
# -------------------------------------------------------------------------
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
# GNSS-SDR is a software defined Global Navigation
# Satellite Systems receiver
#
# This file is part of GNSS-SDR.
#
# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GNSS-SDR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
# -------------------------------------------------------------------------
#
.function volk_gnsssdr_8i_accumulator_s8i_a_orc_impl
.source 1 src1

View File

@ -1,36 +1,33 @@
#/*!
# * \file volk_gnsssdr_8i_x2_add_8i.orc
# * \brief ORC implementation: adds pairs of 8 bits (char) scalars
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that adds pairs of 8 bits (char) scalars
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
#
# ORC implementation: adds pairs of 8 bits (char) scalars
#
# Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
#
#
# -------------------------------------------------------------------------
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
# GNSS-SDR is a software defined Global Navigation
# Satellite Systems receiver
#
# This file is part of GNSS-SDR.
#
# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GNSS-SDR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
# -------------------------------------------------------------------------
#
.function volk_gnsssdr_8i_x2_add_8i_a_orc_impl
.dest 1 dst

View File

@ -1,38 +1,37 @@
#/*!
# * \file volk_gnsssdr_8ic_conjugate_8ic.orc
# * \brief ORC implementation: calculates the conjugate of a 16 bits vector
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that calculates the conjugate of a
# * 16 bits vector (8 bits the real part and 8 bits the imaginary part)
# * result = (real*real) + (imag*imag)
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
#
# ORC implementation: calculates the conjugate of a 16 bits vector
#
# Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
#
# ORC code that calculates the conjugate of a
# 16 bits vector (8 bits the real part and 8 bits the imaginary part)
# result = (real*real) + (imag*imag)
#
#
# -------------------------------------------------------------------------
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
# GNSS-SDR is a software defined Global Navigation
# Satellite Systems receiver
#
# This file is part of GNSS-SDR.
#
# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GNSS-SDR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
# -------------------------------------------------------------------------
#
.function volk_gnsssdr_8ic_conjugate_8ic_a_orc_impl
.source 2 src1

View File

@ -1,38 +1,37 @@
#/*!
# * \file volk_gnsssdr_8ic_magnitude_squared_8i.orc
# * \brief ORC implementation: calculates the magnitude squared of a 16 bits vector
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that calculates the magnitude squared of a
# * 16 bits vector (8 bits the real part and 8 bits the imaginary part)
# * result = (real*real) + (imag*imag)
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
#
# ORC implementation: calculates the magnitude squared of a 16 bits vector
# Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
#
#
# ORC code that calculates the magnitude squared of a
# 16 bits vector (8 bits the real part and 8 bits the imaginary part)
# result = (real*real) + (imag*imag)
#
#
# -------------------------------------------------------------------------
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
# GNSS-SDR is a software defined Global Navigation
# Satellite Systems receiver
#
# This file is part of GNSS-SDR.
#
# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GNSS-SDR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
# -------------------------------------------------------------------------
#
.function volk_gnsssdr_8ic_magnitude_squared_8i_a_orc_impl
.source 2 src1

View File

@ -1,37 +1,36 @@
#/*!
# * \file volk_gnsssdr_8ic_s8ic_multiply_8ic.orc
# * \brief ORC implementation: multiplies a group of 16 bits vectors by one constant vector
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that multiplies a group of 16 bits vectors
# * (8 bits the real part and 8 bits the imaginary part) by one constant vector
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
#
# ORC implementation: multiplies a group of 16 bits vectors by one constant vector
#
# Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
#
#
# ORC code that multiplies a group of 16 bits vectors
# (8 bits the real part and 8 bits the imaginary part) by one constant vector
#
# -------------------------------------------------------------------------
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
# GNSS-SDR is a software defined Global Navigation
# Satellite Systems receiver
#
# This file is part of GNSS-SDR.
#
# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GNSS-SDR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
# -------------------------------------------------------------------------
#
.function volk_gnsssdr_8ic_s8ic_multiply_8ic_a_orc_impl
.source 2 src1

View File

@ -1,37 +1,37 @@
#/*!
# * \file volk_gnsssdr_8ic_x2_dot_prod_8ic.orc
# * \brief ORC implementation: multiplies two 16 bits vectors and accumulates them
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that multiplies two 16 bits vectors (8 bits the real part
# * and 8 bits the imaginary part) and accumulates them
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
#
# ORC implementation: multiplies two 16 bits vectors and accumulates them
#
# Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
#
#
# ORC code that multiplies two 16 bits vectors (8 bits the real part
# and 8 bits the imaginary part) and accumulates them
#
#
# -------------------------------------------------------------------------
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
# GNSS-SDR is a software defined Global Navigation
# Satellite Systems receiver
#
# This file is part of GNSS-SDR.
#
# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GNSS-SDR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
# -------------------------------------------------------------------------
#
.function volk_gnsssdr_8ic_x2_dot_prod_8ic_a_orc_impl
.source 2 src1

View File

@ -1,37 +1,37 @@
#/*!
# * \file volk_gnsssdr_8ic_x2_multiply_8ic.orc
# * \brief ORC implementation: multiplies two 16 bits vectors
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that multiplies two 16 bits vectors (8 bits the real part
# * and 8 bits the imaginary part)
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
#
# ORC implementation: multiplies two 16 bits vectors
#
# Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
#
#
# ORC code that multiplies two 16 bits vectors (8 bits the real part
# and 8 bits the imaginary part)
#
#
# -------------------------------------------------------------------------
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
# GNSS-SDR is a software defined Global Navigation
# Satellite Systems receiver
#
# This file is part of GNSS-SDR.
#
# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GNSS-SDR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
# -------------------------------------------------------------------------
#
.function volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl
.source 2 src1

View File

@ -1,47 +1,46 @@
#/*!
# * \file volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3.orc
# * \brief ORC implementation: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that performs the carrier wipe-off mixing and the
# * Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
# * real part and 8 bits the imaginary part):
# * - The carrier wipe-off is done by multiplying the input signal by the
# * carrier (multiplication of 16 bits vectors) It returns the input
# * signal in base band (BB)
# * - Early values are calculated by multiplying the input signal in BB by the
# * early code (multiplication of 16 bits vectors), accumulating the results
# * - Prompt values are calculated by multiplying the input signal in BB by the
# * prompt code (multiplication of 16 bits vectors), accumulating the results
# * - Late values are calculated by multiplying the input signal in BB by the
# * late code (multiplication of 16 bits vectors), accumulating the results
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
#
# ORC implementation: performs the carrier wipe-off mixing and the Early, Prompt, and Late correlation with 16 bits vectors
# Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
#
#
# ORC code that performs the carrier wipe-off mixing and the
# Early, Prompt, and Late correlation with 16 bits vectors (8 bits the
# real part and 8 bits the imaginary part):
# - The carrier wipe-off is done by multiplying the input signal by the
# carrier (multiplication of 16 bits vectors) It returns the input
# signal in base band (BB)
# - Early values are calculated by multiplying the input signal in BB by the
# early code (multiplication of 16 bits vectors), accumulating the results
# - Prompt values are calculated by multiplying the input signal in BB by the
# prompt code (multiplication of 16 bits vectors), accumulating the results
# - Late values are calculated by multiplying the input signal in BB by the
# late code (multiplication of 16 bits vectors), accumulating the results
#
#
# -------------------------------------------------------------------------
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
# GNSS-SDR is a software defined Global Navigation
# Satellite Systems receiver
#
# This file is part of GNSS-SDR.
#
# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GNSS-SDR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
# -------------------------------------------------------------------------
#
.function volk_gnsssdr_8ic_x5_cw_epl_corr_8ic_x3_first_a_orc_impl
.source 2 input

View File

@ -1,36 +1,35 @@
#/*!
# * \file volk_gnsssdr_8u_x2_multiply_8u.orc
# * \brief ORC implementation: multiplies unsigned char values
# * \authors <ul>
# * <li> Andrés Cecilia, 2014. a.cecilia.luque(at)gmail.com
# * </ul>
# *
# * ORC code that multiplies unsigned char values (8 bits data)
# *
# * -------------------------------------------------------------------------
# *
# * Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
# *
# * GNSS-SDR is a software defined Global Navigation
# * Satellite Systems receiver
# *
# * This file is part of GNSS-SDR.
# *
# * GNSS-SDR is free software: you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation, either version 3 of the License, or
# * at your option) any later version.
# *
# * GNSS-SDR is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
# *
# * -------------------------------------------------------------------------
# */
#
# ORC implementation: multiplies unsigned char values
#
# Andres Cecilia, 2014. a.cecilia.luque(at)gmail.com
#
#
# ORC code that multiplies unsigned char values (8 bits data)
#
# -------------------------------------------------------------------------
#
# Copyright (C) 2010-2014 (see AUTHORS file for a list of contributors)
#
# GNSS-SDR is a software defined Global Navigation
# Satellite Systems receiver
#
# This file is part of GNSS-SDR.
#
# GNSS-SDR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# GNSS-SDR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNSS-SDR. If not, see <http://www.gnu.org/licenses/>.
#
# -------------------------------------------------------------------------
#
.function volk_gnsssdr_8u_x2_multiply_8u_a_orc_impl
.source 1 src1

View File

@ -1,71 +0,0 @@
########################################################################
# Patching original volk module
########################################################################
In order to fit the GNSS-SDR needs, the original volk module must be patched.
The folder containing this file has some patches to automatize the process and
modify the files quickly. To apply them you will need to run the following command:
$ patch -p5 < /Path/Of/The/Patch/nameOfThePatch.patch
The number after “-p” may change, read the patch documentation for more help.
You may need this information if you want to recreate the volk_gnsssdr module again
or you want to update the volk_gnsssdr module with the improvements introduced by GNURadio.
########################################################################
########################################################################
# Operations apply by the patches and other information (not needed if you know how to apply the patches!!!)
########################################################################
########################################################################
To create the volk module you will need to follow the following steps:
In order to understand and follow the creation and setup of the volk_gnsssdr module I will use some absolute paths: /Users/andres/Github/gnuradio => a cloned repository of the GNURadio project. /Users/andres/Github/gnss-sdr => a cloned repository of the GNSS- SDR project.
########################################################################
#FIRST STEP: using volk_modtool to create a new volk module
########################################################################
GNURadio offers a tool called volk_modtool to create and manage new volk modules and their proto-kernels. The steps to create the volk_gnsssdr module are:
1) Export the PYTHONPATH, that indicates where volk_modtool is:
$ export PYTHONPATH=/Users/andres/Github/gnuradio/volk/python
2) Go to the folder where volk_modtool executable is: $ cd /Users/andres/Github/gnuradio/volk/python/volk_modtool
3) Execute volk_modtool indicating that we want to create a new volk module (-i): $ ./volk_modtool -i
4) volk_modtool will ask us about the name of the newly created module, the destination folder where you want to store it and the base module (the base module is the volk module inside the GNURadio project): name: gnsssdr destination: /Users/andres/Github/gnss-sdr/src/algorithms/libs base: /Users/andres/github/gnuradio/volk
########################################################################
#SECOND STEP: add proto-kernels to the module
########################################################################
After creating the module you will need to add some proto-kernels to it. To accomplish it you will need to: 1) Copy your proto-kernels inside the /kernels folder. Copy the ORC implementations inside the /orc folder. Copy the macros implementations inside the /kernels/CommonMacros folder. (those folders are found in the root of the volk_gnsssdr module)
2) Add one profiling line for each of the proto-kernels inside the /apps/volk_gnsssdr_profile.cc file.
3) Add one test line for each of the proto-kernels inside the /lib/testqa.cc file. ########################################################################
#THIRD STEP: modifications to allow profiling of some proto-kernels with special parameters
######################################################################## Some of the proto-kernels that GNSS-SDR needs are not supported by the profiling environment of the volk_gnsssdr module. In order to profile them some modifications need to be done to two files: 1) Modify /src/algorithms/libs/volk_gnsssdr/lib/qa_utils.cc At the first part of this file there are defined the parameters supported by the environment. The number after run_cast_test indicates the total number of parameters passed to the proto-kernel (input +output parameters). The other part indicates the type of the data passed. Inside func(....) you will need to add the same number of buffs[ ] that the one specified after run_cast_test.
2) Modify /src/algorithms/libs/volk_gnsssdr/lib/qa_utils.h In the header you will need to add typedefs for the new definitions made in the .cc file. Take care: you will need to add the same number of void * that the one specified after run_cast_test.
########################################################################
#FOURTH STEP: optional modifications
########################################################################
1) Modify /src/algorithms/libs/volk_gnsssdr/lib/CMakeLists.txt in order to see kernel files, ORC files and macros when generating the IDE project.
2) To be able to use volk_gnsssdr and default volk functions at the same time i n the same file you will need to modify the template files that volk_gnsssdr module uses at build time to generate some headers.
The files modified are found inside /tmpl: volk_gnsssdr.tmpl.h
volk_gnsssdr_typedefs.tmpl.h
volk_gnsssdr_machines.tmpl.h
volk_gnsssdr_cpu.tmpl.h
volk_gnsssdr_config_fixed.tmpl.h The modifications consist of changing the defines of those files to different ones to allow the definition of the volk_gnsssdr functions although the default volk functions are already defined.
########################################################################
#FIFTH STEP: add volk_gnsssdr module to the GNSS-SDR project
########################################################################
In order to add the volk_gnsssdr module to the GNSS-SDR project the CMakeLists.txt global file needs to be edited.
########################################################################
#SIXTH STEP: using volk_gnsssdr functions
########################################################################
To use the proto-kernels inside volk_gnsssdr project two steps are needed: 1) in the CMakeFiles.txt you will need to add $ {VOLK_GNSSSDR_INCLUDE_DIRS} inside the include_directories function, and also add $ {VOLK_GNSSSDR_LIBRARIES} inside the target_link_libraries function.
2) Add the line #include “volk_gnsssdr.h” at the top of the file.