gnss-sdr/src/algorithms/libs/gps_sdr_x86.cc

494 lines
9.8 KiB
C++

/*! \file x86.cpp
Emulate SIMD functionality with plain x86 crap, for older processors
*/
/************************************************************************************************
Copyright 2008 Gregory W Heckler
This file is part of the GPS Software Defined Radio (GPS-SDR)
The GPS-SDR is free software; you can redistribute it and/or modify it under the terms of the
GNU General Public License as published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GPS-SDR is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along with GPS-SDR; if not,
write to the:
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
************************************************************************************************/
#include "gps_sdr_x86.h"
#include "gr_complex.h"
#include <complex>
/*----------------------------------------------------------------------------------------------*/
void x86_add(int16 *_A, int16 *_B, int32 _cnt)
{
int16 *a = _A;
int16 *b = _B;
int32 lcv;
for(lcv = 0; lcv < _cnt; lcv++)
a[lcv] += b[lcv];
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------------------------*/
void x86_sub(int16 *_A, int16 *_B, int32 _cnt)
{
int16 *a = _A;
int16 *b = _B;
int32 lcv;
for(lcv = 0; lcv < _cnt; lcv++)
a[lcv] -= b[lcv];
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------------------------*/
void x86_mul(int16 *_A, int16 *_B, int32 _cnt)
{
int16 *a = _A;
int16 *b = _B;
int32 lcv;
for(lcv = 0; lcv < _cnt; lcv++)
a[lcv] *= b[lcv];
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------------------------*/
void x86_muls(int16 *_A, int16 *_B, int32 _cnt, int32 _shift)
{
int32 lcv;
for(lcv = 0; lcv < _cnt; lcv++)
{
_A[lcv] *= *_B;
_A[lcv] >>= _shift;
}
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------------------------*/
int32 x86_dot(int16 *_A, int16 *_B, int32 _cnt)
{
int32 accum = 0;
int16 *a = _A;
int16 *b = _B;
int32 lcv;
for(lcv = 0; lcv < _cnt; lcv++)
accum += (int32)a[lcv]*(int32)b[lcv];
return(accum);
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------------------------*/
void x86_conj(CPX *_A, int32 _cnt)
{
int32 lcv;
for(lcv = 0; lcv < _cnt; lcv++)
_A[lcv].q = -_A[lcv].q;
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------------------------*/
void x86_cmul(CPX *_A, CPX *_B, int32 _cnt)
{
int32 lcv;
int32 ai, aq;
int32 bi, bq;
int32 ti, tq;
for(lcv = 0; lcv < _cnt; lcv++)
{
ai = _A[lcv].i;
aq = _A[lcv].q;
bi = _B[lcv].i;
bq = _B[lcv].q;
ti = ai*bi-aq*bq;
tq = ai*bq+aq*bi;
_A[lcv].i = (int16)(ti);
_A[lcv].q = (int16)(tq);
}
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------------------------*/
void x86_cmuls(CPX *_A, CPX *_B, int32 _cnt, int32 _shift)
{
int32 lcv;
int32 ai, aq;
int32 bi, bq;
int32 ti, tq;
int32 shift;
int32 round;
shift = _shift;
round = 1 << (shift-1);
for(lcv = 0; lcv < _cnt; lcv++)
{
ai = _A[lcv].i;
aq = _A[lcv].q;
bi = _B[lcv].i;
bq = _B[lcv].q;
ti = ai*bi-aq*bq;
tq = ai*bq+aq*bi;
ti += round;
tq += round;
ti >>= shift;
tq >>= shift;
_A[lcv].i = (int16)ti;
_A[lcv].q = (int16)tq;
}
}
/*----------------------------------------------------------------------------------------------*/
void x86_cmulsc_complex(std::complex<float> *_A, std::complex<float> *_B, std::complex<float> *_C, int32 _cnt) {
for(int i = 0; i < _cnt; i++) {
_C[i] = _A[i] * _B[i];
}
}
/*----------------------------------------------------------------------------------------------*/
void x86_cmulsc(CPX *_A, CPX *_B, CPX *_C, int32 _cnt, int32 _shift)
{
int32 lcv;
int32 ai, aq;
int32 bi, bq;
int32 ti, tq;
int32 shift;
int32 round;
shift = _shift;
round = 1 << (shift-1);
for(lcv = 0; lcv < _cnt; lcv++)
{
ai = _A[lcv].i;
aq = _A[lcv].q;
bi = _B[lcv].i;
bq = _B[lcv].q;
ti = ai*bi-aq*bq;
tq = ai*bq+aq*bi;
ti += round;
tq += round;
ti >>= shift;
tq >>= shift;
_C[lcv].i = (int16)ti;
_C[lcv].q = (int16)tq;
}
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------------------------*/
void x86_cacc(CPX *_A, MIX *_B, int32 _cnt, int32 *_iaccum, int32 *_qaccum)
{
int32 lcv;
int32 ai, aq;
int32 ti, tq;
int32 iaccum;
int32 qaccum;
iaccum = qaccum = 0;
for(lcv = 0; lcv < _cnt; lcv++)
{
ai = _A[lcv].i;
aq = _A[lcv].q;
ti = ai*_B[lcv].i+aq*_B[lcv].nq;
tq = ai*_B[lcv].q+aq*_B[lcv].ni;
iaccum += ti;
qaccum += tq;
}
*_iaccum = iaccum;
*_qaccum = qaccum;
}
/*----------------------------------------------------------------------------------------------*/
// _A=|abs(·)|^2 overwrite the input array
void x86_gr_complex_mag(gr_complex* _A, int32 _cnt) {
float* p = (float*)_A;
for(int i=0;i<_cnt;i++) {
*p = _A[i].real()*_A[i].real() + _A[i].imag()*_A[i].imag();
p++;
}
}
/*----------------------------------------------------------------------------------------------*/
void x86_cmag(CPX *_A, int32 _cnt)
{
int32 lcv, *p;
p = (int32 *)_A;
for(lcv = 0; lcv < _cnt; lcv++)
{
*p = _A[lcv].i*_A[lcv].i + _A[lcv].q*_A[lcv].q;
p++;
}
}
/*----------------------------------------------------------------------------------------------*/
// Find the maximum float in _A[·]
void x86_float_max(float* _A, unsigned int* _index, float* _magt, unsigned int _cnt) {
unsigned int index;
float mag;
mag = index = 0;
for(int i=0; i<_cnt; i++) {
if(_A[i] > mag) {
index = i;
mag = _A[i];
}
}
*_index = index;
*_magt = mag;
}
/*----------------------------------------------------------------------------------------------*/
void x86_max(unsigned int *_A, unsigned int *_index, unsigned int *_magt, unsigned int _cnt)
{
unsigned int lcv, mag, index;
mag = index = 0;
for(lcv = 0; lcv < _cnt; lcv++)
{
if(_A[lcv] > mag)
{
index = lcv;
mag = _A[lcv];
}
}
*_index = index;
*_magt = mag;
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------------------------*/
void x86_prn_accum(CPX *A, CPX *E, CPX *P, CPX *L, int32 cnt, CPX *accum) //!< This is a long story
{
CPX Ea, Pa, La;
int32 lcv;
Ea.i = 0;
Ea.q = 0;
Pa.i = 0;
Pa.q = 0;
La.i = 0;
La.q = 0;
for(lcv = 0; lcv < cnt; lcv++)
{
if(E[lcv].i < 0)
{
Ea.i -= A[lcv].i;
Ea.q -= A[lcv].q;
}
else
{
Ea.i += A[lcv].i;
Ea.q += A[lcv].q;
}
if(P[lcv].i < 0)
{
Pa.i -= A[lcv].i;
Pa.q -= A[lcv].q;
}
else
{
Pa.i += A[lcv].i;
Pa.q += A[lcv].q;
}
if(L[lcv].i < 0)
{
La.i -= A[lcv].i;
La.q -= A[lcv].q;
}
else
{
La.i += A[lcv].i;
La.q += A[lcv].q;
}
}
accum[0].i = Ea.i;
accum[0].q = Ea.q;
accum[1].i = Pa.i;
accum[1].q = Pa.q;
accum[2].i = La.i;
accum[2].q = La.q;
}
/*----------------------------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------------------------*/
void x86_prn_accum_new(CPX *A, MIX *E, MIX *P, MIX *L, int32 cnt, CPX_ACCUM *accum)
{
CPX_ACCUM Ea, Pa, La;
int32 lcv;
Ea.i = 0; Ea.q = 0;
Pa.i = 0; Pa.q = 0;
La.i = 0; La.q = 0;
for(lcv = 0; lcv < cnt; lcv++)
{
Ea.i += A[lcv].i*E[lcv].i;
Ea.q += A[lcv].q*E[lcv].ni;
Pa.i += A[lcv].i*P[lcv].i;
Pa.q += A[lcv].q*P[lcv].ni;
La.i += A[lcv].i*L[lcv].i;
La.q += A[lcv].q*L[lcv].ni;
}
accum[0].i = Ea.i;
accum[0].q = Ea.q;
accum[1].i = Pa.i;
accum[1].q = Pa.q;
accum[2].i = La.i;
accum[2].q = La.q;
}
/*----------------------------------------------------------------------------------------------*/
//int32 x86_acc(int16 *_A, int32 _cnt)
//{
//
// int32 accum = 0;
// int16 *a = _A;
// int32 lcv;
//
// for(lcv = 0; lcv < _cnt; lcv++)
// accum += a[lcv];
//
// return(accum);
//}
//void x86_crot(CPX *_A, CPX *_B, int32 _cnt)
//{
//
// int32 lcv;
// int32 ai, aq;
// int32 bi, bq;
// int32 ti, tq;
//
// bi = _B->i;
// bq = _B->q;
//
// for(lcv = 0; lcv < _cnt; lcv++)
// {
//
// ai = _A[lcv].i;
// aq = _A[lcv].q;
//
// ti = ai*bi-aq*bq;
// tq = ai*bq+aq*bi;
//
// _A[lcv].i = (int16)ti;
// _A[lcv].q = (int16)tq;
// }
//}
//
//void x86_crot(CPX *_A, CPX *_B, int32 _shift, int32 _cnt)
//{
//
// int32 lcv;
// int32 ai, aq;
// int32 bi, bq;
// int32 ti, tq;
// int32 shift;
// int32 round;
//
// shift = _shift;
// round = 1 << (shift-1);
//
// bi = _B->i;
// bq = _B->q;
//
// for(lcv = 0; lcv < _cnt; lcv++)
// {
//
// ai = _A[lcv].i;
// aq = _A[lcv].q;
//
// ti = ai*bi-aq*bq;
// tq = ai*bq+aq*bi;
//
// ti += round;
// tq += round;
//
// ti >>= shift;
// tq >>= shift;
//
// _A[lcv].i = (int16)ti;
// _A[lcv].q = (int16)tq;
// }
//
//}