1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2024-09-29 15:30:52 +00:00

Merge branch 'new_volk_module' of https://github.com/gnss-sdr/gnss-sdr.git into new_volk_module

This commit is contained in:
Javier Arribas 2016-01-20 18:24:25 +01:00
commit bb39ff43a0
14 changed files with 432 additions and 846 deletions

View File

@ -59,10 +59,10 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_generic(lv_16sc_t* r
memcpy(result, result_aux[0], sizeof(lv_16sc_t)*num_points);
volk_gnsssdr_free(rem_code_phase_chips);
for(unsigned int n = 0; n < num_out_vectors; n++)
{
volk_gnsssdr_free(result_aux[n]);
}
// for(unsigned int n = 0; n < num_out_vectors; n++)
// {
// volk_gnsssdr_free(result_aux[n]);
// }
volk_gnsssdr_free(result_aux);
}
@ -85,10 +85,10 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_a_sse2(lv_16sc_t* re
memcpy(result, result_aux[0], sizeof(lv_16sc_t)*num_points);
volk_gnsssdr_free(rem_code_phase_chips);
for(unsigned int n = 0; n < num_out_vectors; n++)
{
volk_gnsssdr_free(result_aux[n]);
}
// for(unsigned int n = 0; n < num_out_vectors; n++)
// {
// volk_gnsssdr_free(result_aux[n]);
// }
volk_gnsssdr_free(result_aux);
}
@ -112,10 +112,10 @@ static inline void volk_gnsssdr_16ic_resamplerxnpuppet_16ic_u_sse2(lv_16sc_t* re
memcpy(result, result_aux[0], sizeof(lv_16sc_t)*num_points);
volk_gnsssdr_free(rem_code_phase_chips);
for(unsigned int n = 0; n < num_out_vectors; n++)
{
volk_gnsssdr_free(result_aux[n]);
}
// for(unsigned int n = 0; n < num_out_vectors; n++)
// {
// volk_gnsssdr_free(result_aux[n]);
// }
volk_gnsssdr_free(result_aux);
}

View File

@ -34,8 +34,8 @@
#include <math.h>
#include "volk_gnsssdr/volk_gnsssdr_complex.h"
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H
#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_H
#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_H
#ifdef LV_HAVE_SSE2
@ -168,11 +168,6 @@ static inline void volk_gnsssdr_32fc_convert_16ic_generic(lv_16sc_t* outputVecto
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H
#define INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H
#ifdef LV_HAVE_SSE2
@ -281,28 +276,4 @@ static inline void volk_gnsssdr_32fc_convert_16ic_a_sse(lv_16sc_t* outputVector,
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 32 integer vector (16 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_16ic_a_generic(lv_16sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
float* inputVectorPtr = (float*)inputVector;
int16_t* outputVectorPtr = (int16_t*)outputVector;
float min_val = -32768;
float max_val = 32767;
for(unsigned int i = 0; i < num_points*2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
outputVectorPtr[i] = (int16_t)rintf(inputVectorPtr[i]);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_a_H */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_16ic_H */

View File

@ -36,8 +36,8 @@
#include "volk_gnsssdr/volk_gnsssdr_complex.h"
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H
#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_H
#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_H
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@ -126,11 +126,6 @@ static inline void volk_gnsssdr_32fc_convert_8ic_generic(lv_8sc_t* outputVector,
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_u_H */
#ifndef INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H
#define INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H
#ifdef LV_HAVE_SSE2
@ -195,28 +190,4 @@ static inline void volk_gnsssdr_32fc_convert_8ic_a_sse2(lv_8sc_t* outputVector,
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Converts a float vector of 64 bits (32 bits each part) into a 16 integer vector (8 bits each part)
\param inputVector The floating point input data buffer
\param outputVector The 16 bit output data buffer
\param num_points The number of data values to be converted
*/
static inline void volk_gnsssdr_32fc_convert_8ic_a_generic(lv_8sc_t* outputVector, const lv_32fc_t* inputVector, unsigned int num_points)
{
float* inputVectorPtr = (float*)inputVector;
int8_t* outputVectorPtr = (int8_t*)outputVector;
float min_val = -128;
float max_val = 127;
for(unsigned int i = 0; i < num_points*2; i++)
{
if(inputVectorPtr[i] > max_val)
inputVectorPtr[i] = max_val;
else if(inputVectorPtr[i] < min_val)
inputVectorPtr[i] = min_val;
outputVectorPtr[i] = (int8_t)rintf(inputVectorPtr[i]);
}
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_a_H */
#endif /* INCLUDED_volk_gnsssdr_32fc_convert_8ic_H */

View File

@ -134,15 +134,6 @@ static inline void volk_gnsssdr_64f_accumulator_64f_generic(double* result,const
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_u_H */
#ifndef INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H
#define INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
@ -222,21 +213,4 @@ static inline void volk_gnsssdr_64f_accumulator_64f_a_sse3(double* result,const
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_64f_accumulator_64f_a_generic(double* result,const double* inputBuffer, unsigned int num_points){
const double* aPtr = inputBuffer;
double returnValue = 0;
for(unsigned int number = 0;number < num_points; number++){
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_a_H */
#endif /* INCLUDED_volk_gnsssdr_64f_accumulator_64f_H */

View File

@ -32,11 +32,12 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H
#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H
#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H
#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@ -99,16 +100,6 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_generic(char* result, const c
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_u_H */
#ifndef INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H
#define INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
/*!
@ -149,26 +140,6 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_a_sse3(char* result, const ch
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Accumulates the values in the input buffer
\param result The accumulated result
\param inputBuffer The buffer of data to be accumulated
\param num_points The number of values in inputBuffer to be accumulated
*/
static inline void volk_gnsssdr_8i_accumulator_s8i_a_generic(char* result, const char* inputBuffer, unsigned int num_points)
{
const char* aPtr = inputBuffer;
char returnValue = 0;
for(unsigned int number = 0;number < num_points; number++)
{
returnValue += (*aPtr++);
}
*result = returnValue;
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
\brief Accumulates the values in the input buffer
@ -190,5 +161,5 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_u_orc(char* result, const cha
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_a_H */
#endif /* INCLUDED_volk_gnsssdr_8i_accumulator_s8i_H */

View File

@ -32,237 +32,8 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H
#define INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 32;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
__m256i ones, compareResults, currentValues;
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
ones = _mm256_set1_epi8(0xFF);
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm256_lddqu_si256((__m256i*)inputPtr);
lo = _mm256_castsi256_si128(currentValues);
hi = _mm256_extractf128_si256(currentValues,1);
compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
//compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
if (!_mm256_testc_si256(compareResults, ones))
{
_mm256_storeu_si256((__m256i*)&currentValuesBuffer, currentValues);
for(unsigned int i = 0; i < 32; i++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 32;
}
for(unsigned int i = 0; i<(num_points % 32); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_AVX*/
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_lddqu_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
if (!_mm_test_all_ones(compareResults))
{
_mm_storeu_si128((__m128i*)&currentValuesBuffer, currentValues);
for(unsigned int i = 0; i < 16; i++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
for(unsigned int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE2
#include<emmintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_loadu_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
mask = _mm_movemask_epi8(compareResults);
if (mask != 0xFFFF)
{
_mm_storeu_si128((__m128i*)&currentValuesBuffer, currentValues);
mask = ~mask;
unsigned int i = 0;
while (mask > 0)
{
if ((mask & 1) == 1)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
i++;
mask >>= 1;
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
for(unsigned int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_GENERIC
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0)
{
char max = src0[0];
unsigned int index = 0;
for(unsigned int i = 1; i < num_points; ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_GENERIC*/
#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_u_H*/
#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H
#define INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H
#ifndef INCLUDED_volk_gnsssdr_8i_index_max_16u_H
#define INCLUDED_volk_gnsssdr_8i_index_max_16u_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
@ -276,62 +47,64 @@ static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, c
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 32;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
__m256i ones, compareResults, currentValues;
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
ones = _mm256_set1_epi8(0xFF);
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
static inline void volk_gnsssdr_8i_index_max_16u_u_avx(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
{
currentValues = _mm256_load_si256((__m256i*)inputPtr);
lo = _mm256_castsi256_si128(currentValues);
hi = _mm256_extractf128_si256(currentValues,1);
compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
//compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
if (!_mm256_testc_si256(compareResults, ones))
{
_mm256_store_si256((__m256i*)&currentValuesBuffer, currentValues);
for(unsigned int i = 0; i < 32; i++)
const unsigned int sse_iters = num_points / 32;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
__m256i ones, compareResults, currentValues;
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
ones = _mm256_set1_epi8(0xFF);
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
currentValues = _mm256_lddqu_si256((__m256i*)inputPtr);
lo = _mm256_castsi256_si128(currentValues);
hi = _mm256_extractf128_si256(currentValues,1);
compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
//compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo),(compareResultshi),1);
if (!_mm256_testc_si256(compareResults, ones))
{
_mm256_storeu_si256((__m256i*)&currentValuesBuffer, currentValues);
for(unsigned int i = 0; i < 32; i++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 32;
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 32;
for(unsigned int i = 0; i<(num_points % 32); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
for(unsigned int i = 0; i<(num_points % 32); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_AVX*/
@ -344,53 +117,282 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, con
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
static inline void volk_gnsssdr_8i_index_max_16u_u_sse4_1(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
{
currentValues = _mm_load_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
if (!_mm_test_all_ones(compareResults))
{
_mm_store_si128((__m128i*)&currentValuesBuffer, currentValues);
for(unsigned int i = 0; i < 16; i++)
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
currentValues = _mm_lddqu_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
if (!_mm_test_all_ones(compareResults))
{
_mm_storeu_si128((__m128i*)&currentValuesBuffer, currentValues);
for(unsigned int i = 0; i < 16; i++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
for(unsigned int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
for(unsigned int i = 0; i<(num_points % 16); ++i)
}
#endif /*LV_HAVE_SSE4_1*/
#ifdef LV_HAVE_SSE2
#include<emmintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_u_sse2(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_loadu_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
mask = _mm_movemask_epi8(compareResults);
if (mask != 0xFFFF)
{
_mm_storeu_si128((__m128i*)&currentValuesBuffer, currentValues);
mask = ~mask;
unsigned int i = 0;
while (mask > 0)
{
if ((mask & 1) == 1)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
i++;
mask >>= 1;
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
for(unsigned int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_GENERIC
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_generic(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
{
char max = src0[0];
unsigned int index = 0;
for(unsigned int i = 1; i < num_points; ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_AVX
#include <immintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_avx(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
{
const unsigned int sse_iters = num_points / 32;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(32) char currentValuesBuffer[32];
__m256i ones, compareResults, currentValues;
__m128i compareResultslo, compareResultshi, maxValues, lo, hi;
ones = _mm256_set1_epi8(0xFF);
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm256_load_si256((__m256i*)inputPtr);
lo = _mm256_castsi256_si128(currentValues);
hi = _mm256_extractf128_si256(currentValues,1);
compareResultslo = _mm_cmpgt_epi8(maxValues, lo);
compareResultshi = _mm_cmpgt_epi8(maxValues, hi);
//compareResults = _mm256_set_m128i(compareResultshi , compareResultslo); //not defined in some versions of immintrin.h
compareResults = _mm256_insertf128_si256(_mm256_castsi128_si256(compareResultslo), (compareResultshi), 1);
if (!_mm256_testc_si256(compareResults, ones))
{
_mm256_store_si256((__m256i*)&currentValuesBuffer, currentValues);
for(unsigned int i = 0; i < 32; i++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 32;
}
for(unsigned int i = 0; i<(num_points % 32); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_AVX*/
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
{
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
currentValues = _mm_load_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
if (!_mm_test_all_ones(compareResults))
{
_mm_store_si128((__m128i*)&currentValuesBuffer, currentValues);
for(unsigned int i = 0; i < 16; i++)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
}
for(unsigned int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
target[0] = index;
}
}
#endif /*LV_HAVE_SSE4_1*/
@ -403,89 +405,65 @@ static inline void volk_gnsssdr_8i_index_max_16u_a_sse4_1(unsigned int* target,
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0){
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
static inline void volk_gnsssdr_8i_index_max_16u_a_sse2(unsigned int* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
{
currentValues = _mm_load_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
mask = _mm_movemask_epi8(compareResults);
if (mask != 0xFFFF)
{
_mm_store_si128((__m128i*)&currentValuesBuffer, currentValues);
mask = ~mask;
unsigned int i = 0;
while (mask > 0)
const unsigned int sse_iters = num_points / 16;
char* basePtr = (char*)src0;
char* inputPtr = (char*)src0;
char max = src0[0];
unsigned int index = 0;
unsigned short mask;
__VOLK_ATTR_ALIGNED(16) char currentValuesBuffer[16];
__m128i maxValues, compareResults, currentValues;
maxValues = _mm_set1_epi8(max);
for(unsigned int number = 0; number < sse_iters; number++)
{
if ((mask & 1) == 1)
{
if(currentValuesBuffer[i] > max)
currentValues = _mm_load_si128((__m128i*)inputPtr);
compareResults = _mm_cmpgt_epi8(maxValues, currentValues);
mask = _mm_movemask_epi8(compareResults);
if (mask != 0xFFFF)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
_mm_store_si128((__m128i*)&currentValuesBuffer, currentValues);
mask = ~mask;
unsigned int i = 0;
while (mask > 0)
{
if ((mask & 1) == 1)
{
if(currentValuesBuffer[i] > max)
{
index = inputPtr - basePtr + i;
max = currentValuesBuffer[i];
}
}
i++;
mask >>= 1;
}
maxValues = _mm_set1_epi8(max);
}
}
i++;
mask >>= 1;
inputPtr += 16;
}
maxValues = _mm_set1_epi8(max);
}
inputPtr += 16;
for(unsigned int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
for(unsigned int i = 0; i<(num_points % 16); ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_GENERIC
/*!
\brief Returns the index of the max value in src0
\param target The index of the max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_index_max_16u_a_generic(unsigned int* target, const char* src0, unsigned int num_points) {
if(num_points > 0)
{
char max = src0[0];
unsigned int index = 0;
for(unsigned int i = 1; i < num_points; ++i)
{
if(src0[i] > max)
{
index = i;
max = src0[i];
}
}
target[0] = index;
}
}
#endif /*LV_HAVE_GENERIC*/
#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_a_H*/
#endif /*INCLUDED_volk_gnsssdr_8i_index_max_16u_H*/

View File

@ -32,11 +32,13 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_u_H
#define INCLUDED_volk_gnsssdr_8i_max_s8i_u_H
#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_H
#define INCLUDED_volk_gnsssdr_8i_max_s8i_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
@ -179,15 +181,8 @@ static inline void volk_gnsssdr_8i_max_s8i_generic(char* target, const char* src
#endif /*LV_HAVE_GENERIC*/
#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_u_H*/
#ifndef INCLUDED_volk_gnsssdr_8i_max_s8i_a_H
#define INCLUDED_volk_gnsssdr_8i_max_s8i_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE4_1
#include <smmintrin.h>
@ -304,29 +299,5 @@ static inline void volk_gnsssdr_8i_max_s8i_a_sse2(char* target, const char* src0
#endif /*LV_HAVE_SSE2*/
#ifdef LV_HAVE_GENERIC
/*!
\brief Returns the max value in src0
\param target The max value in src0
\param src0 The buffer of data to be analysed
\param num_points The number of values in src0 to be analysed
*/
static inline void volk_gnsssdr_8i_max_s8i_a_generic(char* target, const char* src0, unsigned int num_points)
{
if(num_points > 0)
{
char max = src0[0];
for(unsigned int i = 1; i < num_points; ++i)
{
if(src0[i] > max)
{
max = src0[i];
}
}
target[0] = max;
}
}
#endif /*LV_HAVE_GENERIC*/
#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_a_H*/
#endif /*INCLUDED_volk_gnsssdr_8i_max_s8i_H*/

View File

@ -32,10 +32,11 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H
#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H
#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_H
#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@ -99,14 +100,6 @@ static inline void volk_gnsssdr_8i_x2_add_8i_generic(char* cVector, const char*
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_u_H */
#ifndef INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H
#define INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@ -148,27 +141,6 @@ static inline void volk_gnsssdr_8i_x2_add_8i_a_sse2(char* cVector, const char* a
}
#endif /* LV_HAVE_SSE2 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Adds the two input vectors and store their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be added
\param bVector One of the vectors to be added
\param num_points The number of values in aVector and bVector to be added together and stored into cVector
*/
static inline void volk_gnsssdr_8i_x2_add_8i_a_generic(char* cVector, const char* aVector, const char* bVector, unsigned int num_points)
{
char* cPtr = cVector;
const char* aPtr = aVector;
const char* bPtr= bVector;
unsigned int number = 0;
for(; number < num_points; number++)
{
*cPtr++ = (*aPtr++) + (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@ -185,4 +157,4 @@ static inline void volk_gnsssdr_8i_x2_add_8i_u_orc(char* cVector, const char* aV
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_a_H */
#endif /* INCLUDED_volk_gnsssdr_8i_x2_add_8i_H */

View File

@ -33,10 +33,11 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H
#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H
#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H
#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_AVX
@ -174,15 +175,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_generic(lv_8sc_t* cVector, con
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_u_H */
#ifndef INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H
#define INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_AVX
#include <immintrin.h>
@ -299,25 +291,6 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_a_sse3(lv_8sc_t* cVector, cons
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Takes the conjugate of an unsigned char vector.
\param cVector The vector where the results will be stored
\param aVector Vector to be conjugated
\param num_points The number of unsigned char values in aVector to be conjugated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_conjugate_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, unsigned int num_points)
{
lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
unsigned int number = 0;
for(number = 0; number < num_points; number++)
{
*cPtr++ = lv_conj(*aPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@ -333,4 +306,4 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_u_orc(lv_8sc_t* cVector, const
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_a_H */
#endif /* INCLUDED_volk_gnsssdr_8ic_conjugate_8ic_H */

View File

@ -34,10 +34,11 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H
#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_u_H
#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_H
#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_H
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#ifdef LV_HAVE_SSSE3
@ -166,15 +167,6 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_generic(char* magnitude
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_u_H */
#ifndef INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H
#define INCLUDED_volk_gnsssdr_8ic_magnitude_squared_8i_a_H
#include <inttypes.h>
#include <stdio.h>
#include <math.h>
#ifdef LV_HAVE_SSSE3
#include <tmmintrin.h>
@ -281,26 +273,6 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_sse3(char* magnitudeV
//}
//#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Calculates the magnitude squared of complexVector and stores the results in magnitudeVector
\param complexVector The vector containing the complex input values
\param magnitudeVector The vector containing the real output values
\param num_points The number of complex values in complexVector to be calculated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_magnitude_squared_8i_a_generic(char* magnitudeVector, const lv_8sc_t* complexVector, unsigned int num_points)
{
const char* complexVectorPtr = (char*)complexVector;
char* magnitudeVectorPtr = magnitudeVector;
for(unsigned int number = 0; number < num_points; number++)
{
const char real = *complexVectorPtr++;
const char imag = *complexVectorPtr++;
*magnitudeVectorPtr++ = (real*real) + (imag*imag);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@ -316,4 +288,4 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_u_orc(char* magnitudeVe
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_a_H */
#endif /* INCLUDED_volk_gnsssdr_32fc_magnitude_32f_H */

View File

@ -33,10 +33,11 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H
#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_u_H
#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_H
#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
@ -143,16 +144,6 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_generic(lv_8sc_t* cVector,
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_u_H */
#ifndef INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H
#define INCLUDED_volk_gnsssdr_8ic_s8ic_multiply_8ic_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <float.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@ -215,46 +206,6 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_sse3(lv_8sc_t* cVector,
}
#endif /* LV_HAVE_SSE3 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the input vector by a scalar and stores the results in the third vector
\param cVector The vector where the results will be stored
\param aVector The vector to be multiplied
\param scalar The complex scalar to multiply aVector
\param num_points The number of complex values in aVector to be multiplied by sacalar and stored into cVector
*/
static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t scalar, unsigned int num_points)
{
/*lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
for (int i = 0; i<num_points; ++i)
{
*cPtr++ = (*aPtr++) * scalar;
}*/
lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
unsigned int number = num_points;
// unwrap loop
while (number >= 8){
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
*cPtr++ = (*aPtr++) * scalar;
number -= 8;
}
// clean up any remaining
while (number-- > 0)
*cPtr++ = *aPtr++ * scalar;
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@ -271,4 +222,4 @@ static inline void volk_gnsssdr_8ic_s8ic_multiply_8ic_u_orc(lv_8sc_t* cVector, c
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_a_H */
#endif /* INCLUDED_volk_gnsssdr_32fc_x2_multiply_32fc_H */

View File

@ -33,9 +33,10 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H
#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H
#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H
#include <stdio.h>
#include <string.h>
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
@ -251,69 +252,6 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_sse4_1(lv_8sc_t* result, c
#endif /*LV_HAVE_SSE4_1*/
#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_u_H*/
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H
#define INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H
#include <volk_gnsssdr/volk_gnsssdr_common.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#include <stdio.h>
#include <string.h>
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and accumulates them, storing the result in the third vector
\param cVector The vector where the accumulated result will be stored
\param aVector One of the vectors to be multiplied and accumulated
\param bVector One of the vectors to be multiplied and accumulated
\param num_points The number of complex values in aVector and bVector to be multiplied together, accumulated and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_a_generic(lv_8sc_t* result, const lv_8sc_t* input, const lv_8sc_t* taps, unsigned int num_points)
{
// lv_8sc_t* cPtr = result;
// const lv_8sc_t* aPtr = input;
// const lv_8sc_t* bPtr = taps;
//
// for(int number = 0; number < num_points; number++)
// {
// *cPtr += (*aPtr++) * (*bPtr++);
// }
char * res = (char*) result;
char * in = (char*) input;
char * tp = (char*) taps;
unsigned int n_2_ccomplex_blocks = num_points/2;
unsigned int isodd = num_points & 1;
char sum0[2] = {0,0};
char sum1[2] = {0,0};
unsigned int i = 0;
for(i = 0; i < n_2_ccomplex_blocks; ++i)
{
sum0[0] += in[0] * tp[0] - in[1] * tp[1];
sum0[1] += in[0] * tp[1] + in[1] * tp[0];
sum1[0] += in[2] * tp[2] - in[3] * tp[3];
sum1[1] += in[2] * tp[3] + in[3] * tp[2];
in += 4;
tp += 4;
}
res[0] = sum0[0] + sum1[0];
res[1] = sum0[1] + sum1[1];
// Cleanup if we had an odd number of points
for(i = 0; i < isodd; ++i)
{
*result += input[num_points - 1] * taps[num_points - 1];
}
}
#endif /*LV_HAVE_GENERIC*/
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@ -500,4 +438,4 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_u_orc(lv_8sc_t* result, cons
}
#endif /* LV_HAVE_ORC */
#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_a_H*/
#endif /*INCLUDED_volk_gnsssdr_8ic_x2_dot_prod_8ic_H*/

View File

@ -33,10 +33,11 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H
#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H
#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_SSE2
@ -180,15 +181,6 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_generic(lv_8sc_t* cVector, c
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_u_H */
#ifndef INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H
#define INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H
#include <inttypes.h>
#include <stdio.h>
#include <volk_gnsssdr/volk_gnsssdr_complex.h>
#ifdef LV_HAVE_SSE2
#include <emmintrin.h>
@ -310,27 +302,6 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
}
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input complex vectors and stores their results in the third vector
\param cVector The vector where the results will be stored
\param aVector One of the vectors to be multiplied
\param bVector One of the vectors to be multiplied
\param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
*/
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_generic(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
{
lv_8sc_t* cPtr = cVector;
const lv_8sc_t* aPtr = aVector;
const lv_8sc_t* bPtr = bVector;
for(unsigned int number = 0; number < num_points; number++)
{
*cPtr++ = (*aPtr++) * (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@ -347,4 +318,4 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_u_orc(lv_8sc_t* cVector, con
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_a_H */
#endif /* INCLUDED_volk_gnsssdr_8ic_x2_multiply_8ic_H */

View File

@ -32,10 +32,11 @@
* -------------------------------------------------------------------------
*/
#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H
#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H
#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H
#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@ -112,14 +113,6 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_generic(unsigned char* cChar,
}
#endif /* LV_HAVE_GENERIC */
#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_u_H */
#ifndef INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H
#define INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H
#include <inttypes.h>
#include <stdio.h>
#ifdef LV_HAVE_SSE3
#include <pmmintrin.h>
@ -176,26 +169,6 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_sse3(unsigned char* cChar, c
}
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_GENERIC
/*!
\brief Multiplies the two input unsigned char values and stores their results in the third unisgned char
\param cChar The unsigned char where the results will be stored
\param aChar One of the unsigned char to be multiplied
\param bChar One of the unsigned char to be multiplied
\param num_points The number of unsigned char values in aChar and bChar to be multiplied together and stored into cChar
*/
static inline void volk_gnsssdr_8u_x2_multiply_8u_a_generic(unsigned char* cChar, const unsigned char* aChar, const unsigned char* bChar, unsigned int num_points)
{
unsigned char* cPtr = cChar;
const unsigned char* aPtr = aChar;
const unsigned char* bPtr = bChar;
for(unsigned int number = 0; number < num_points; number++)
{
*cPtr++ = (*aPtr++) * (*bPtr++);
}
}
#endif /* LV_HAVE_GENERIC */
#ifdef LV_HAVE_ORC
/*!
@ -212,4 +185,4 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_u_orc(unsigned char* cVector,
}
#endif /* LV_HAVE_ORC */
#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_a_H */
#endif /* INCLUDED_volk_gnsssdr_8u_x2_multiply_8u_H */