From 2be266cc71858b7e88fb87e931b0a089e2452311 Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Sat, 19 Mar 2016 21:41:19 +0100 Subject: [PATCH 1/6] adding sincos kernel --- .../volk_gnsssdr/CMakeLists.txt | 1 + .../volk_gnsssdr/volk_gnsssdr_sine_table.h | 1058 +++++++++++++++++ .../volk_gnsssdr_32f_sincos_32fc.h | 715 +++++++++++ .../volk_gnsssdr/lib/kernel_tests.h | 1 + 4 files changed, 1775 insertions(+) create mode 100644 src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_sine_table.h create mode 100644 src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt index 29f60368e..f796d7b26 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/CMakeLists.txt @@ -176,6 +176,7 @@ install(FILES ${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_config_fixed.h ${PROJECT_BINARY_DIR}/include/volk_gnsssdr/volk_gnsssdr_typedefs.h ${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_malloc.h + ${PROJECT_SOURCE_DIR}/include/volk_gnsssdr/volk_gnsssdr_sine_table.h DESTINATION include/volk_gnsssdr COMPONENT "volk_gnsssdr_devel" ) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_sine_table.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_sine_table.h new file mode 100644 index 000000000..90bd78569 --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/include/volk_gnsssdr/volk_gnsssdr_sine_table.h @@ -0,0 +1,1058 @@ +/*! + * \file volk_gnsssdr_sine_table.h + * \brief Sine table + * \author Carles Fernandez-Prades, 2015 cfernandez(at)cttc.es + * + * Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors) + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + */ + + +#ifndef INCLUDED_VOLK_GNSSSDR_SINE_TABLE_H +#define INCLUDED_VOLK_GNSSSDR_SINE_TABLE_H + +/* From gnuradio/gnuradio-runtime/lib/math/sine_table.h + * max_error = 2.353084136763606e-06 */ +static const float sine_table_10bits[1 << 10][2] = { + +{ 2.925817799165007e-09, 7.219194364267018e-09 }, + { 2.925707643778599e-09, 2.526699001579799e-07 }, + { 2.925487337153070e-09, 1.191140162167675e-06 }, + { 2.925156887582842e-09, 3.284585035595589e-06 }, + { 2.924716307509151e-09, 6.994872605695784e-06 }, + { 2.924165613519592e-09, 1.278374920658798e-05 }, + { 2.923504826347475e-09, 2.111280464718590e-05 }, + { 2.922733970871080e-09, 3.244343744537165e-05 }, + { 2.921853076112655e-09, 4.723682007436170e-05 }, + { 2.920862175237416e-09, 6.595386421935634e-05 }, + { 2.919761305552202e-09, 8.905518605213658e-05 }, + { 2.918550508504146e-09, 1.170010715193098e-04 }, + { 2.917229829679050e-09, 1.502514416517192e-04 }, + { 2.915799318799769e-09, 1.892658178912071e-04 }, + { 2.914259029724184e-09, 2.345032874456615e-04 }, + { 2.912609020443340e-09, 2.864224686607020e-04 }, + { 2.910849353079123e-09, 3.454814764261432e-04 }, + { 2.908980093882049e-09, 4.121378876027343e-04 }, + { 2.907001313228646e-09, 4.868487064877691e-04 }, + { 2.904913085618902e-09, 5.700703303049837e-04 }, + { 2.902715489673383e-09, 6.622585147355725e-04 }, + { 2.900408608130373e-09, 7.638683394782519e-04 }, + { 2.897992527842612e-09, 8.753541738578119e-04 }, + { 2.895467339774186e-09, 9.971696424604937e-04 }, + { 2.892833138996999e-09, 1.129767590823255e-03 }, + { 2.890090024687216e-09, 1.273600051161478e-03 }, + { 2.887238100121550e-09, 1.429118208142094e-03 }, + { 2.884277472673313e-09, 1.596772364709564e-03 }, + { 2.881208253808507e-09, 1.777011907950626e-03 }, + { 2.878030559081432e-09, 1.970285275029487e-03 }, + { 2.874744508130554e-09, 2.177039919152579e-03 }, + { 2.871350224673798e-09, 2.397722275614272e-03 }, + { 2.867847836504030e-09, 2.632777727878843e-03 }, + { 2.864237475484149e-09, 2.882650573737405e-03 }, + { 2.860519277542297e-09, 3.147783991507308e-03 }, + { 2.856693382666432e-09, 3.428620006328931e-03 }, + { 2.852759934899389e-09, 3.725599456482154e-03 }, + { 2.848719082333207e-09, 4.039161959812243e-03 }, + { 2.844570977103752e-09, 4.369745880190706e-03 }, + { 2.840315775384800e-09, 4.717788294077374e-03 }, + { 2.835953637382310e-09, 5.083724957128360e-03 }, + { 2.831484727328322e-09, 5.467990270896617e-03 }, + { 2.826909213474759e-09, 5.871017249604038e-03 }, + { 2.822227268087134e-09, 6.293237486988512e-03 }, + { 2.817439067438018e-09, 6.735081123237729e-03 }, + { 2.812544791800534e-09, 7.196976811989608e-03 }, + { 2.807544625441273e-09, 7.679351687456759e-03 }, + { 2.802438756613836e-09, 8.182631331563162e-03 }, + { 2.797227377551135e-09, 8.707239741274575e-03 }, + { 2.791910684458716e-09, 9.253599295902304e-03 }, + { 2.786488877507140e-09, 9.822130724578715e-03 }, + { 2.780962160824228e-09, 1.041325307382490e-02 }, + { 2.775330742487884e-09, 1.102738367513773e-02 }, + { 2.769594834517682e-09, 1.166493811278924e-02 }, + { 2.763754652867477e-09, 1.232633019159818e-02 }, + { 2.757810417416620e-09, 1.301197190494069e-02 }, + { 2.751762351962413e-09, 1.372227340270610e-02 }, + { 2.745610684210923e-09, 1.445764295952962e-02 }, + { 2.739355645769094e-09, 1.521848694296229e-02 }, + { 2.732997472135539e-09, 1.600520978188769e-02 }, + { 2.726536402691907e-09, 1.681821393496225e-02 }, + { 2.719972680693777e-09, 1.765789985920713e-02 }, + { 2.713306553261610e-09, 1.852466597868779e-02 }, + { 2.706538271371373e-09, 1.941890865333146e-02 }, + { 2.699668089844909e-09, 2.034102214787814e-02 }, + { 2.692696267340880e-09, 2.129139860085272e-02 }, + { 2.685623066344263e-09, 2.227042799383416e-02 }, + { 2.678448753157212e-09, 2.327849812064098e-02 }, + { 2.671173597888530e-09, 2.431599455681316e-02 }, + { 2.663797874443630e-09, 2.538330062913108e-02 }, + { 2.656321860514457e-09, 2.648079738524795e-02 }, + { 2.648745837568575e-09, 2.760886356354952e-02 }, + { 2.641070090839117e-09, 2.876787556300114e-02 }, + { 2.633294909313421e-09, 2.995820741329835e-02 }, + { 2.625420585722845e-09, 3.118023074495535e-02 }, + { 2.617447416531143e-09, 3.243431475972608e-02 }, + { 2.609375701923643e-09, 3.372082620101990e-02 }, + { 2.601205745795833e-09, 3.504012932452527e-02 }, + { 2.592937855741933e-09, 3.639258586895711e-02 }, + { 2.584572343043400e-09, 3.777855502693250e-02 }, + { 2.576109522656942e-09, 3.919839341605197e-02 }, + { 2.567549713203028e-09, 4.065245505002102e-02 }, + { 2.558893236953688e-09, 4.214109131001403e-02 }, + { 2.550140419820252e-09, 4.366465091617666e-02 }, + { 2.541291591341445e-09, 4.522347989919473e-02 }, + { 2.532347084670572e-09, 4.681792157215026e-02 }, + { 2.523307236563343e-09, 4.844831650239501e-02 }, + { 2.514172387364900e-09, 5.011500248369893e-02 }, + { 2.504942880997064e-09, 5.181831450849345e-02 }, + { 2.495619064945627e-09, 5.355858474024022e-02 }, + { 2.486201290246928e-09, 5.533614248606705e-02 }, + { 2.476689911475047e-09, 5.715131416942842e-02 }, + { 2.467085286727668e-09, 5.900442330315692e-02 }, + { 2.457387777613798e-09, 6.089579046229943e-02 }, + { 2.447597749239101e-09, 6.282573325755320e-02 }, + { 2.437715570192557e-09, 6.479456630859221e-02 }, + { 2.427741612532542e-09, 6.680260121764925e-02 }, + { 2.417676251773166e-09, 6.885014654319160e-02 }, + { 2.407519866869294e-09, 7.093750777401114e-02 }, + { 2.397272840203310e-09, 7.306498730310884e-02 }, + { 2.386935557569868e-09, 7.523288440214027e-02 }, + { 2.376508408161815e-09, 7.744149519577415e-02 }, + { 2.365991784555363e-09, 7.969111263635709e-02 }, + { 2.355386082695641e-09, 8.198202647865405e-02 }, + { 2.344691701881232e-09, 8.431452325495814e-02 }, + { 2.333909044749407e-09, 8.668888625021409e-02 }, + { 2.323038517261246e-09, 8.910539547731611e-02 }, + { 2.312080528685971e-09, 9.156432765274414e-02 }, + { 2.301035491585642e-09, 9.406595617227698e-02 }, + { 2.289903821799651e-09, 9.661055108691619e-02 }, + { 2.278685938428940e-09, 9.919837907903295e-02 }, + { 2.267382263820762e-09, 1.018297034385580e-01 }, + { 2.255993223551837e-09, 1.045047840397028e-01 }, + { 2.244519246413220e-09, 1.072238773174577e-01 }, + { 2.232960764393620e-09, 1.099872362446146e-01 }, + { 2.221318212663309e-09, 1.127951103088245e-01 }, + { 2.209592029557811e-09, 1.156477454898748e-01 }, + { 2.197782656561395e-09, 1.185453842371912e-01 }, + { 2.185890538290176e-09, 1.214882654476019e-01 }, + { 2.173916122475606e-09, 1.244766244431883e-01 }, + { 2.161859859947797e-09, 1.275106929493488e-01 }, + { 2.149722204618256e-09, 1.305906990731841e-01 }, + { 2.137503613462743e-09, 1.337168672820376e-01 }, + { 2.125204546504321e-09, 1.368894183821595e-01 }, + { 2.112825466795944e-09, 1.401085694976751e-01 }, + { 2.100366840402933e-09, 1.433745340497602e-01 }, + { 2.087829136385612e-09, 1.466875217359607e-01 }, + { 2.075212826781308e-09, 1.500477385098620e-01 }, + { 2.062518386587093e-09, 1.534553865607503e-01 }, + { 2.049746293741359e-09, 1.569106642937665e-01 }, + { 2.036897029106193e-09, 1.604137663100403e-01 }, + { 2.023971076449323e-09, 1.639648833871233e-01 }, + { 2.010968922425217e-09, 1.675642024598467e-01 }, + { 1.997891056557933e-09, 1.712119066008896e-01 }, + { 1.984737971221581e-09, 1.749081750021970e-01 }, + { 1.971510161622434e-09, 1.786531829561379e-01 }, + { 1.958208125780130e-09, 1.824471018371070e-01 }, + { 1.944832364508511e-09, 1.862900990834311e-01 }, + { 1.931383381397782e-09, 1.901823381790926e-01 }, + { 1.917861682794392e-09, 1.941239786363039e-01 }, + { 1.904267777782611e-09, 1.981151759777950e-01 }, + { 1.890602178165317e-09, 2.021560817195309e-01 }, + { 1.876865398444616e-09, 2.062468433536743e-01 }, + { 1.863057955802572e-09, 2.103876043317229e-01 }, + { 1.849180370081465e-09, 2.145785040479915e-01 }, + { 1.835233163764673e-09, 2.188196778231083e-01 }, + { 1.821216861956509e-09, 2.231112568880342e-01 }, + { 1.807131992362945e-09, 2.274533683680190e-01 }, + { 1.792979085271234e-09, 2.318461352671018e-01 }, + { 1.778758673530482e-09, 2.362896764525300e-01 }, + { 1.764471292530943e-09, 2.407841066397789e-01 }, + { 1.750117480184598e-09, 2.453295363773890e-01 }, + { 1.735697776904342e-09, 2.499260720324433e-01 }, + { 1.721212725583874e-09, 2.545738157760434e-01 }, + { 1.706662871577097e-09, 2.592728655691494e-01 }, + { 1.692048762677849e-09, 2.640233151485341e-01 }, + { 1.677370949099090e-09, 2.688252540131204e-01 }, + { 1.662629983452104e-09, 2.736787674105404e-01 }, + { 1.647826420726167e-09, 2.785839363237506e-01 }, + { 1.632960818266680e-09, 2.835408374583758e-01 }, + { 1.618033735755429e-09, 2.885495432295704e-01 }, + { 1.603045735188609e-09, 2.936101217498361e-01 }, + { 1.587997380855918e-09, 2.987226368167127e-01 }, + { 1.572889239319430e-09, 3.038871479007593e-01 }, + { 1.557721879392051e-09, 3.091037101339017e-01 }, + { 1.542495872116447e-09, 3.143723742978435e-01 }, + { 1.527211790743024e-09, 3.196931868130269e-01 }, + { 1.511870210708909e-09, 3.250661897274744e-01 }, + { 1.496471709615926e-09, 3.304914207062036e-01 }, + { 1.481016867208896e-09, 3.359689130207621e-01 }, + { 1.465506265353924e-09, 3.414986955389885e-01 }, + { 1.449940488016384e-09, 3.470807927151147e-01 }, + { 1.434320121238994e-09, 3.527152245800635e-01 }, + { 1.418645753119802e-09, 3.584020067320109e-01 }, + { 1.402917973789838e-09, 3.641411503272979e-01 }, + { 1.387137375391042e-09, 3.699326620714776e-01 }, + { 1.371304552054134e-09, 3.757765442106153e-01 }, + { 1.355420099875958e-09, 3.816727945230153e-01 }, + { 1.339484616897137e-09, 3.876214063110671e-01 }, + { 1.323498703079580e-09, 3.936223683933865e-01 }, + { 1.307462960283922e-09, 3.996756650972121e-01 }, + { 1.291377992246768e-09, 4.057812762511174e-01 }, + { 1.275244404558188e-09, 4.119391771778626e-01 }, + { 1.259062804638585e-09, 4.181493386877248e-01 }, + { 1.242833801715929e-09, 4.244117270719281e-01 }, + { 1.226558006803155e-09, 4.307263040962509e-01 }, + { 1.210236032674760e-09, 4.370930269951803e-01 }, + { 1.193868493843725e-09, 4.435118484661861e-01 }, + { 1.177456006538695e-09, 4.499827166641340e-01 }, + { 1.160999188680582e-09, 4.565055751961679e-01 }, + { 1.144498659859216e-09, 4.630803631168164e-01 }, + { 1.127955041310214e-09, 4.697070149232604e-01 }, + { 1.111368955891417e-09, 4.763854605510119e-01 }, + { 1.094741028059551e-09, 4.831156253697562e-01 }, + { 1.078071883846871e-09, 4.898974301794375e-01 }, + { 1.061362150836978e-09, 4.967307912069362e-01 }, + { 1.044612458142151e-09, 5.036156201023686e-01 }, + { 1.027823436378632e-09, 5.105518239364775e-01 }, + { 1.010995717643647e-09, 5.175393051975563e-01 }, + { 9.941299354913699e-10, 5.245779617890562e-01 }, + { 9.772267249089968e-10, 5.316676870274011e-01 }, + { 9.602867222926046e-10, 5.388083696401416e-01 }, + { 9.433105654240147e-10, 5.459998937639375e-01 }, + { 9.262988934458084e-10, 5.532421389435711e-01 }, + { 9.092523468378193e-10, 5.605349801305876e-01 }, + { 8.921715673928355e-10, 5.678782876825250e-01 }, + { 8.750571981926701e-10, 5.752719273622372e-01 }, + { 8.579098835836508e-10, 5.827157603377209e-01 }, + { 8.407302691522673e-10, 5.902096431821322e-01 }, + { 8.235190017016133e-10, 5.977534278737073e-01 }, + { 8.062767292259225e-10, 6.053469617967722e-01 }, + { 7.890041008871165e-10, 6.129900877421282e-01 }, + { 7.717017669898175e-10, 6.206826439083659e-01 }, + { 7.543703789572603e-10, 6.284244639030392e-01 }, + { 7.370105893063053e-10, 6.362153767444958e-01 }, + { 7.196230516231919e-10, 6.440552068636356e-01 }, + { 7.022084205389746e-10, 6.519437741060674e-01 }, + { 6.847673517046416e-10, 6.598808937346672e-01 }, + { 6.673005017664976e-10, 6.678663764322770e-01 }, + { 6.498085283416530e-10, 6.759000283046127e-01 }, + { 6.322920899929834e-10, 6.839816508836737e-01 }, + { 6.147518462045659e-10, 6.921110411311926e-01 }, + { 5.971884573565851e-10, 7.002879914425926e-01 }, + { 5.796025847007168e-10, 7.085122896509806e-01 }, + { 5.619948903351406e-10, 7.167837190315758e-01 }, + { 5.443660371796048e-10, 7.251020583063744e-01 }, + { 5.267166889504394e-10, 7.334670816491009e-01 }, + { 5.090475101356742e-10, 7.418785586903696e-01 }, + { 4.913591659698399e-10, 7.503362545232619e-01 }, + { 4.736523224091392e-10, 7.588399297089872e-01 }, + { 4.559276461062478e-10, 7.673893402829834e-01 }, + { 4.381858043851147e-10, 7.759842377612828e-01 }, + { 4.204274652161870e-10, 7.846243691469355e-01 }, + { 4.026532971908398e-10, 7.933094769370790e-01 }, + { 3.848639694963359e-10, 8.020392991300200e-01 }, + { 3.670601518910503e-10, 8.108135692324444e-01 }, + { 3.492425146784233e-10, 8.196320162675177e-01 }, + { 3.314117286825031e-10, 8.284943647824689e-01 }, + { 3.135684652223755e-10, 8.374003348569865e-01 }, + { 2.957133960867535e-10, 8.463496421118015e-01 }, + { 2.778471935089361e-10, 8.553419977173513e-01 }, + { 2.599705301412391e-10, 8.643771084029740e-01 }, + { 2.420840790301135e-10, 8.734546764660205e-01 }, + { 2.241885135902046e-10, 8.825743997817682e-01 }, + { 2.062845075795238e-10, 8.917359718130367e-01 }, + { 1.883727350736140e-10, 9.009390816205823e-01 }, + { 1.704538704408269e-10, 9.101834138731877e-01 }, + { 1.525285883160648e-10, 9.194686488588080e-01 }, + { 1.345975635762696e-10, 9.287944624950824e-01 }, + { 1.166614713141648e-10, 9.381605263410157e-01 }, + { 9.872098681369190e-11, 9.475665076080466e-01 }, + { 8.077678552380464e-11, 9.570120691722380e-01 }, + { 6.282954303364090e-11, 9.664968695860140e-01 }, + { 4.487993504668797e-11, 9.760205630906909e-01 }, + { 2.692863735553042e-11, 9.855827996289697e-01 }, + { 8.976325816439114e-12, 9.951832248577780e-01 }, + { -8.976323676304494e-12, 1.004821480161519e+00 }, + { -2.692863521550168e-11, 1.014497202665280e+00 }, + { -4.487993290681805e-11, 1.024210025248670e+00 }, + { -6.282954089398273e-11, 1.033959576559617e+00 }, + { -8.077678338451706e-11, 1.043745481028715e+00 }, + { -9.872098467477489e-11, 1.053567358883467e+00 }, + { -1.166614691757772e-10, 1.063424826163223e+00 }, + { -1.345975614383584e-10, 1.073317494734013e+00 }, + { -1.525285861788948e-10, 1.083244972303963e+00 }, + { -1.704538683042922e-10, 1.093206862438572e+00 }, + { -1.883727329379793e-10, 1.103202764576806e+00 }, + { -2.062845054446831e-10, 1.113232274046796e+00 }, + { -2.241885114563697e-10, 1.123294982082432e+00 }, + { -2.420840768973375e-10, 1.133390475839767e+00 }, + { -2.599705280096278e-10, 1.143518338413855e+00 }, + { -2.778471913784365e-10, 1.153678148855860e+00 }, + { -2.957133939575774e-10, 1.163869482190458e+00 }, + { -3.135684630945758e-10, 1.174091909433296e+00 }, + { -3.314117265561857e-10, 1.184344997608959e+00 }, + { -3.492425125535882e-10, 1.194628309769018e+00 }, + { -3.670601497678034e-10, 1.204941405010466e+00 }, + { -3.848639673748360e-10, 1.215283838494269e+00 }, + { -4.026532950710339e-10, 1.225655161464298e+00 }, + { -4.204274630982869e-10, 1.236054921266445e+00 }, + { -4.381858022691734e-10, 1.246482661367958e+00 }, + { -4.559276439922654e-10, 1.256937921377146e+00 }, + { -4.736523202972214e-10, 1.267420237063216e+00 }, + { -4.913591638600925e-10, 1.277929140376502e+00 }, + { -5.090475080282032e-10, 1.288464159468706e+00 }, + { -5.267166868452449e-10, 1.299024818713528e+00 }, + { -5.443660350768455e-10, 1.309610638727845e+00 }, + { -5.619948882348695e-10, 1.320221136392390e+00 }, + { -5.796025826029868e-10, 1.330855824873457e+00 }, + { -5.971884552615020e-10, 1.341514213644420e+00 }, + { -6.147518441122357e-10, 1.352195808507556e+00 }, + { -6.322920879034590e-10, 1.362900111616144e+00 }, + { -6.498085262549874e-10, 1.373626621496939e+00 }, + { -6.673004996827436e-10, 1.384374833072571e+00 }, + { -6.847673496239581e-10, 1.395144237684605e+00 }, + { -7.022084184613616e-10, 1.405934323116231e+00 }, + { -7.196230495488082e-10, 1.416744573616104e+00 }, + { -7.370105872352039e-10, 1.427574469921397e+00 }, + { -7.543703768894941e-10, 1.438423489281758e+00 }, + { -7.717017649255453e-10, 1.449291105483472e+00 }, + { -7.890040988262324e-10, 1.460176788873383e+00 }, + { -8.062767271686383e-10, 1.471080006383765e+00 }, + { -8.235189996479819e-10, 1.482000221556656e+00 }, + { -8.407302671024475e-10, 1.492936894569018e+00 }, + { -8.579098815375368e-10, 1.503889482257845e+00 }, + { -8.750571961505266e-10, 1.514857438145604e+00 }, + { -8.921715653546624e-10, 1.525840212465756e+00 }, + { -9.092523448036167e-10, 1.536837252188703e+00 }, + { -9.262988914157881e-10, 1.547848001047890e+00 }, + { -9.433105633981766e-10, 1.558871899565883e+00 }, + { -9.602867202711075e-10, 1.569908385081254e+00 }, + { -9.772267228916820e-10, 1.580956891774897e+00 }, + { -9.941299334786078e-10, 1.592016850697478e+00 }, + { -1.010995715635332e-09, 1.603087689796053e+00 }, + { -1.027823434374870e-09, 1.614168833942028e+00 }, + { -1.044612456143047e-09, 1.625259704958335e+00 }, + { -1.061362148842745e-09, 1.636359721647526e+00 }, + { -1.078071881857297e-09, 1.647468299819543e+00 }, + { -1.094741026074900e-09, 1.658584852320419e+00 }, + { -1.111368953911690e-09, 1.669708789060341e+00 }, + { -1.127955039335462e-09, 1.680839517042381e+00 }, + { -1.144498657889600e-09, 1.691976440391624e+00 }, + { -1.160999186716154e-09, 1.703118960383971e+00 }, + { -1.177456004579561e-09, 1.714266475475616e+00 }, + { -1.193868491889832e-09, 1.725418381332405e+00 }, + { -1.210236030726319e-09, 1.736574070859850e+00 }, + { -1.226558004860220e-09, 1.747732934232508e+00 }, + { -1.242833799778447e-09, 1.758894358924547e+00 }, + { -1.259062802706714e-09, 1.770057729740021e+00 }, + { -1.275244402631982e-09, 1.781222428842935e+00 }, + { -1.291377990326492e-09, 1.792387835788660e+00 }, + { -1.307462958369363e-09, 1.803553327553897e+00 }, + { -1.323498701170897e-09, 1.814718278568759e+00 }, + { -1.339484614994490e-09, 1.825882060747428e+00 }, + { -1.355420097979292e-09, 1.837044043519582e+00 }, + { -1.371304550163662e-09, 1.848203593862598e+00 }, + { -1.387137373506711e-09, 1.859360076332671e+00 }, + { -1.402917971911754e-09, 1.870512853097495e+00 }, + { -1.418645751248018e-09, 1.881661283967967e+00 }, + { -1.434320119373722e-09, 1.892804726431080e+00 }, + { -1.449940486157623e-09, 1.903942535681972e+00 }, + { -1.465506263501516e-09, 1.915074064656886e+00 }, + { -1.481016865363264e-09, 1.926198664066737e+00 }, + { -1.496471707776859e-09, 1.937315682428795e+00 }, + { -1.511870208876724e-09, 1.948424466101625e+00 }, + { -1.527211788917509e-09, 1.959524359317042e+00 }, + { -1.542495870297867e-09, 1.970614704215133e+00 }, + { -1.557721877580406e-09, 1.981694840876775e+00 }, + { -1.572889237514880e-09, 1.992764107358707e+00 }, + { -1.587997379058514e-09, 2.003821839726753e+00 }, + { -1.603045733398246e-09, 2.014867372090665e+00 }, + { -1.618033733972424e-09, 2.025900036638798e+00 }, + { -1.632960816490822e-09, 2.036919163671778e+00 }, + { -1.647826418957721e-09, 2.047924081638631e+00 }, + { -1.662629981691070e-09, 2.058914117170269e+00 }, + { -1.677370947345626e-09, 2.069888595116115e+00 }, + { -1.692048760931849e-09, 2.080846838577820e+00 }, + { -1.706662869838827e-09, 2.091788168946183e+00 }, + { -1.721212723853279e-09, 2.102711905935372e+00 }, + { -1.735697775181424e-09, 2.113617367619504e+00 }, + { -1.750117478469621e-09, 2.124503870468520e+00 }, + { -1.764471290823748e-09, 2.135370729383332e+00 }, + { -1.778758671831281e-09, 2.146217257733207e+00 }, + { -1.792979083579974e-09, 2.157042767390815e+00 }, + { -1.807131990679890e-09, 2.167846568770014e+00 }, + { -1.821216860281448e-09, 2.178627970860822e+00 }, + { -1.835233162097977e-09, 2.189386281268046e+00 }, + { -1.849180368423027e-09, 2.200120806246095e+00 }, + { -1.863057954152340e-09, 2.210830850737588e+00 }, + { -1.876865396802907e-09, 2.221515718409926e+00 }, + { -1.890602176531920e-09, 2.232174711691990e+00 }, + { -1.904267776157843e-09, 2.242807131812679e+00 }, + { -1.917861681178094e-09, 2.253412278837029e+00 }, + { -1.931383379790273e-09, 2.263989451705295e+00 }, + { -1.944832362909578e-09, 2.274537948269257e+00 }, + { -1.958208124189984e-09, 2.285057065331676e+00 }, + { -1.971510160041235e-09, 2.295546098682665e+00 }, + { -1.984737969649064e-09, 2.306004343138794e+00 }, + { -1.997891054994522e-09, 2.316431092581699e+00 }, + { -2.010968920870647e-09, 2.326825639994779e+00 }, + { -2.023971074903858e-09, 2.337187277503834e+00 }, + { -2.036897027569834e-09, 2.347515296413520e+00 }, + { -2.049746292214264e-09, 2.357808987247877e+00 }, + { -2.062518385069210e-09, 2.368067639787542e+00 }, + { -2.075212825272584e-09, 2.378290543109652e+00 }, + { -2.087829134886364e-09, 2.388476985626922e+00 }, + { -2.100366838912949e-09, 2.398626255125417e+00 }, + { -2.112825465315542e-09, 2.408737638805759e+00 }, + { -2.125204545033289e-09, 2.418810423320288e+00 }, + { -2.137503612001452e-09, 2.428843894814472e+00 }, + { -2.149722203166389e-09, 2.438837338964302e+00 }, + { -2.161859858505829e-09, 2.448790041018174e+00 }, + { -2.173916121043380e-09, 2.458701285834241e+00 }, + { -2.185890536867478e-09, 2.468570357921585e+00 }, + { -2.197782655148702e-09, 2.478396541480230e+00 }, + { -2.209592028154913e-09, 2.488179120439544e+00 }, + { -2.221318211270522e-09, 2.497917378500214e+00 }, + { -2.232960763010574e-09, 2.507610599172123e+00 }, + { -2.244519245040444e-09, 2.517258065817044e+00 }, + { -2.255993222189014e-09, 2.526859061686102e+00 }, + { -2.267382262468209e-09, 2.536412869962689e+00 }, + { -2.278685937086658e-09, 2.545918773800664e+00 }, + { -2.289903820467374e-09, 2.555376056366064e+00 }, + { -2.301035490263848e-09, 2.564784000877677e+00 }, + { -2.312080527374447e-09, 2.574141890646339e+00 }, + { -2.323038515960257e-09, 2.583449009117307e+00 }, + { -2.333909043458635e-09, 2.592704639909166e+00 }, + { -2.344691700601153e-09, 2.601908066856634e+00 }, + { -2.355386081425938e-09, 2.611058574048749e+00 }, + { -2.365991783296513e-09, 2.620155445872768e+00 }, + { -2.376508406913500e-09, 2.629197967052127e+00 }, + { -2.386935556332088e-09, 2.638185422689490e+00 }, + { -2.397272838976436e-09, 2.647117098307332e+00 }, + { -2.407519865653114e-09, 2.655992279887846e+00 }, + { -2.417676250567891e-09, 2.664810253915885e+00 }, + { -2.427741611338014e-09, 2.673570307418169e+00 }, + { -2.437715569009093e-09, 2.682271728006635e+00 }, + { -2.447597748066437e-09, 2.690913803917100e+00 }, + { -2.457387776452357e-09, 2.699495824053297e+00 }, + { -2.467085285577292e-09, 2.708017078025636e+00 }, + { -2.476689910335470e-09, 2.716476856194105e+00 }, + { -2.486201289118733e-09, 2.724874449709689e+00 }, + { -2.495619063828443e-09, 2.733209150554255e+00 }, + { -2.504942879891263e-09, 2.741480251583985e+00 }, + { -2.514172386270163e-09, 2.749687046568741e+00 }, + { -2.523307235480146e-09, 2.757828830235740e+00 }, + { -2.532347083598520e-09, 2.765904898308531e+00 }, + { -2.541291590280960e-09, 2.773914547551261e+00 }, + { -2.550140418771202e-09, 2.781857075807392e+00 }, + { -2.558893235915887e-09, 2.789731782043156e+00 }, + { -2.567549712176927e-09, 2.797537966388929e+00 }, + { -2.576109521642196e-09, 2.805274930179221e+00 }, + { -2.584572342040407e-09, 2.812941975996573e+00 }, + { -2.592937854750428e-09, 2.820538407710556e+00 }, + { -2.601205744816134e-09, 2.828063530521908e+00 }, + { -2.609375700955458e-09, 2.835516651001539e+00 }, + { -2.617447415574869e-09, 2.842897077134583e+00 }, + { -2.625420584778350e-09, 2.850204118359573e+00 }, + { -2.633294908380520e-09, 2.857437085611509e+00 }, + { -2.641070089918234e-09, 2.864595291363663e+00 }, + { -2.648745836659391e-09, 2.871678049666939e+00 }, + { -2.656321859617343e-09, 2.878684676194483e+00 }, + { -2.663797873558322e-09, 2.885614488280000e+00 }, + { -2.671173597015318e-09, 2.892466804962122e+00 }, + { -2.678448752295859e-09, 2.899240947023252e+00 }, + { -2.685623065495139e-09, 2.905936237033475e+00 }, + { -2.692696266503800e-09, 2.912551999389617e+00 }, + { -2.699668089019767e-09, 2.919087560358171e+00 }, + { -2.706538270558513e-09, 2.925542248116882e+00 }, + { -2.713306552460767e-09, 2.931915392794031e+00 }, + { -2.719972679905295e-09, 2.938206326512581e+00 }, + { -2.726536401915442e-09, 2.944414383428562e+00 }, + { -2.732997471371516e-09, 2.950538899775061e+00 }, + { -2.739355645017194e-09, 2.956579213900666e+00 }, + { -2.745610683471516e-09, 2.962534666313284e+00 }, + { -2.751762351235315e-09, 2.968404599718795e+00 }, + { -2.757810416701751e-09, 2.974188359063684e+00 }, + { -2.763754652165128e-09, 2.979885291576143e+00 }, + { -2.769594833827588e-09, 2.985494746805227e+00 }, + { -2.775330741810390e-09, 2.991016076664491e+00 }, + { -2.780962160159068e-09, 2.996448635469842e+00 }, + { -2.786488876854607e-09, 3.001791779983262e+00 }, + { -2.791910683818570e-09, 3.007044869450794e+00 }, + { -2.797227376923695e-09, 3.012207265645876e+00 }, + { -2.802438755998943e-09, 3.017278332907412e+00 }, + { -2.807544624838820e-09, 3.022257438182037e+00 }, + { -2.812544791210840e-09, 3.027143951064684e+00 }, + { -2.817439066860792e-09, 3.031937243837070e+00 }, + { -2.822227267522746e-09, 3.036636691510884e+00 }, + { -2.826909212922864e-09, 3.041241671864994e+00 }, + { -2.831484726789317e-09, 3.045751565488710e+00 }, + { -2.835953636855826e-09, 3.050165755818853e+00 }, + { -2.840315774871260e-09, 3.054483629182857e+00 }, + { -2.844570976602957e-09, 3.058704574835744e+00 }, + { -2.848719081844986e-09, 3.062827985002047e+00 }, + { -2.852759934424164e-09, 3.066853254915581e+00 }, + { -2.856693382203833e-09, 3.070779782857041e+00 }, + { -2.860519277092708e-09, 3.074606970196721e+00 }, + { -2.864237475047239e-09, 3.078334221430809e+00 }, + { -2.867847836080156e-09, 3.081960944223928e+00 }, + { -2.871350224262603e-09, 3.085486549445314e+00 }, + { -2.874744507732462e-09, 3.088910451211251e+00 }, + { -2.878030558696270e-09, 3.092232066921130e+00 }, + { -2.881208253436038e-09, 3.095450817298478e+00 }, + { -2.884277472313999e-09, 3.098566126429974e+00 }, + { -2.887238099774968e-09, 3.101577421802070e+00 }, + { -2.890090024353816e-09, 3.104484134342861e+00 }, + { -2.892833138676371e-09, 3.107285698457308e+00 }, + { -2.895467339466766e-09, 3.109981552069083e+00 }, + { -2.897992527547963e-09, 3.112571136655481e+00 }, + { -2.900408607848946e-09, 3.115053897289195e+00 }, + { -2.902715489404992e-09, 3.117429282673042e+00 }, + { -2.904913085363323e-09, 3.119696745180238e+00 }, + { -2.907001312986328e-09, 3.121855740892224e+00 }, + { -2.908980093652563e-09, 3.123905729634218e+00 }, + { -2.910849352862924e-09, 3.125846175016163e+00 }, + { -2.912609020239985e-09, 3.127676544466606e+00 }, + { -2.914259029534118e-09, 3.129396309273659e+00 }, + { -2.915799318622574e-09, 3.131004944618667e+00 }, + { -2.917229829515169e-09, 3.132501929616775e+00 }, + { -2.918550508353347e-09, 3.133886747350606e+00 }, + { -2.919761305414294e-09, 3.135158884909254e+00 }, + { -2.920862175112829e-09, 3.136317833424958e+00 }, + { -2.921853076000972e-09, 3.137363088107359e+00 }, + { -2.922733970772719e-09, 3.138294148283254e+00 }, + { -2.923504826262027e-09, 3.139110517429204e+00 }, + { -2.924165613447473e-09, 3.139811703211207e+00 }, + { -2.924716307449950e-09, 3.140397217517018e+00 }, + { -2.925156887536978e-09, 3.140866576495489e+00 }, + { -2.925487337120335e-09, 3.141219300588825e+00 }, + { -2.925707643758784e-09, 3.141454914570261e+00 }, + { -2.925817799158535e-09, 3.141572947579352e+00 }, + { -2.925817799171455e-09, 3.141572933154836e+00 }, + { -2.925707643798390e-09, 3.141454409272987e+00 }, + { -2.925487337185779e-09, 3.141216918378770e+00 }, + { -2.925156887628892e-09, 3.140860007424112e+00 }, + { -2.924716307568119e-09, 3.140383227898687e+00 }, + { -2.924165613591896e-09, 3.139786135867868e+00 }, + { -2.923504826432903e-09, 3.139068292003385e+00 }, + { -2.922733970969412e-09, 3.138229261619561e+00 }, + { -2.921853076224321e-09, 3.137268614707029e+00 }, + { -2.920862175361976e-09, 3.136185925964038e+00 }, + { -2.919761305690083e-09, 3.134980774833275e+00 }, + { -2.918550508654911e-09, 3.133652745531368e+00 }, + { -2.917229829843137e-09, 3.132201427085629e+00 }, + { -2.915799318976726e-09, 3.130626413363146e+00 }, + { -2.914259029914435e-09, 3.128927303107136e+00 }, + { -2.912609020646661e-09, 3.127103699965947e+00 }, + { -2.910849353295315e-09, 3.125155212527586e+00 }, + { -2.908980094111509e-09, 3.123081454351802e+00 }, + { -2.907001313470937e-09, 3.120882043999591e+00 }, + { -2.904913085874448e-09, 3.118556605068443e+00 }, + { -2.902715489941767e-09, 3.116104766219928e+00 }, + { -2.900408608411958e-09, 3.113526161214776e+00 }, + { -2.897992528137022e-09, 3.110820428940251e+00 }, + { -2.895467340081818e-09, 3.107987213444579e+00 }, + { -2.892833139317615e-09, 3.105026163964191e+00 }, + { -2.890090025020589e-09, 3.101936934956479e+00 }, + { -2.887238100468092e-09, 3.098719186130021e+00 }, + { -2.884277473032614e-09, 3.095372582472161e+00 }, + { -2.881208254180937e-09, 3.091896794282404e+00 }, + { -2.878030559466594e-09, 3.088291497198199e+00 }, + { -2.874744508528832e-09, 3.084556372228054e+00 }, + { -2.871350225084755e-09, 3.080691105776848e+00 }, + { -2.867847836928063e-09, 3.076695389678615e+00 }, + { -2.864237475921086e-09, 3.072568921221621e+00 }, + { -2.860519277991847e-09, 3.068311403179147e+00 }, + { -2.856693383129018e-09, 3.063922543837792e+00 }, + { -2.852759935374575e-09, 3.059402057023109e+00 }, + { -2.848719082821403e-09, 3.054749662130841e+00 }, + { -2.844570977604520e-09, 3.049965084150782e+00 }, + { -2.840315775898525e-09, 3.045048053697736e+00 }, + { -2.835953637908582e-09, 3.039998307034967e+00 }, + { -2.831484727867511e-09, 3.034815586104635e+00 }, + { -2.826909214026628e-09, 3.029499638550941e+00 }, + { -2.822227268651470e-09, 3.024050217748861e+00 }, + { -2.817439068015245e-09, 3.018467082830179e+00 }, + { -2.812544792390175e-09, 3.012749998707001e+00 }, + { -2.807544626043751e-09, 3.006898736100911e+00 }, + { -2.802438757228650e-09, 3.000913071564665e+00 }, + { -2.797227378178760e-09, 2.994792787510961e+00 }, + { -2.791910685098702e-09, 2.988537672233504e+00 }, + { -2.786488878159805e-09, 2.982147519935565e+00 }, + { -2.780962161489413e-09, 2.975622130750641e+00 }, + { -2.775330743165298e-09, 2.968961310769028e+00 }, + { -2.769594835207775e-09, 2.962164872061613e+00 }, + { -2.763754653569747e-09, 2.955232632701135e+00 }, + { -2.757810418131543e-09, 2.948164416789036e+00 }, + { -2.751762352689432e-09, 2.940960054474719e+00 }, + { -2.745610684950541e-09, 2.933619381982341e+00 }, + { -2.739355646520809e-09, 2.926142241629213e+00 }, + { -2.732997472899722e-09, 2.918528481852205e+00 }, + { -2.726536403468318e-09, 2.910777957226018e+00 }, + { -2.719972681482232e-09, 2.902890528487386e+00 }, + { -2.713306554062453e-09, 2.894866062556452e+00 }, + { -2.706538272184154e-09, 2.886704432555728e+00 }, + { -2.699668090670078e-09, 2.878405517834426e+00 }, + { -2.692696268177908e-09, 2.869969203985464e+00 }, + { -2.685623067193599e-09, 2.861395382869544e+00 }, + { -2.678448754018380e-09, 2.852683952631486e+00 }, + { -2.671173598761847e-09, 2.843834817723832e+00 }, + { -2.663797875328991e-09, 2.834847888922988e+00 }, + { -2.656321861411517e-09, 2.825723083350459e+00 }, + { -2.648745838477759e-09, 2.816460324492298e+00 }, + { -2.641070091759922e-09, 2.807059542215146e+00 }, + { -2.633294910246296e-09, 2.797520672788269e+00 }, + { -2.625420586667340e-09, 2.787843658897949e+00 }, + { -2.617447417487602e-09, 2.778028449668942e+00 }, + { -2.609375702891616e-09, 2.768075000678399e+00 }, + { -2.601205746775692e-09, 2.757983273976943e+00 }, + { -2.592937856733464e-09, 2.747753238101915e+00 }, + { -2.584572344046340e-09, 2.737384868096553e+00 }, + { -2.576109523671634e-09, 2.726878145526201e+00 }, + { -2.567549714229129e-09, 2.716233058492422e+00 }, + { -2.558893237991435e-09, 2.705449601651722e+00 }, + { -2.550140420869302e-09, 2.694527776227857e+00 }, + { -2.541291592402089e-09, 2.683467590030445e+00 }, + { -2.532347085742440e-09, 2.672269057466213e+00 }, + { -2.523307237646751e-09, 2.660932199557362e+00 }, + { -2.514172388459584e-09, 2.649457043952206e+00 }, + { -2.504942882102813e-09, 2.637843624941622e+00 }, + { -2.495619066062810e-09, 2.626091983472908e+00 }, + { -2.486201291375123e-09, 2.614202167160335e+00 }, + { -2.476689912614465e-09, 2.602174230302269e+00 }, + { -2.467085287878098e-09, 2.590008233889805e+00 }, + { -2.457387778775451e-09, 2.577704245623143e+00 }, + { -2.447597750411553e-09, 2.565262339920002e+00 }, + { -2.437715571376127e-09, 2.552682597931055e+00 }, + { -2.427741613727123e-09, 2.539965107548168e+00 }, + { -2.417676252978335e-09, 2.527109963417675e+00 }, + { -2.407519868085581e-09, 2.514117266951687e+00 }, + { -2.397272841430131e-09, 2.500987126335739e+00 }, + { -2.386935558807595e-09, 2.487719656543254e+00 }, + { -2.376508409410024e-09, 2.474314979341178e+00 }, + { -2.365991785814531e-09, 2.460773223303822e+00 }, + { -2.355386083965131e-09, 2.447094523817833e+00 }, + { -2.344691703161363e-09, 2.433279023095734e+00 }, + { -2.333909046040126e-09, 2.419326870180582e+00 }, + { -2.323038518562289e-09, 2.405238220956597e+00 }, + { -2.312080529997549e-09, 2.391013238157397e+00 }, + { -2.301035492907384e-09, 2.376652091371587e+00 }, + { -2.289903823131822e-09, 2.362154957053137e+00 }, + { -2.278685939771276e-09, 2.347522018525197e+00 }, + { -2.267382265173420e-09, 2.332753465990296e+00 }, + { -2.255993224914501e-09, 2.317849496533128e+00 }, + { -2.244519247786155e-09, 2.302810314130351e+00 }, + { -2.232960765776561e-09, 2.287636129652823e+00 }, + { -2.221318214056095e-09, 2.272327160873552e+00 }, + { -2.209592030960763e-09, 2.256883632472565e+00 }, + { -2.197782657974034e-09, 2.241305776039511e+00 }, + { -2.185890539712767e-09, 2.225593830081461e+00 }, + { -2.173916123907886e-09, 2.209748040023618e+00 }, + { -2.161859861389976e-09, 2.193768658216360e+00 }, + { -2.149722206070124e-09, 2.177655943935795e+00 }, + { -2.137503614923981e-09, 2.161410163388424e+00 }, + { -2.125204547975352e-09, 2.145031589714984e+00 }, + { -2.112825468276292e-09, 2.128520502989477e+00 }, + { -2.100366841892917e-09, 2.111877190225612e+00 }, + { -2.087829137884807e-09, 2.095101945374541e+00 }, + { -2.075212828290086e-09, 2.078195069329960e+00 }, + { -2.062518388104923e-09, 2.061156869925600e+00 }, + { -2.049746295268559e-09, 2.043987661939897e+00 }, + { -2.036897030642658e-09, 2.026687767092888e+00 }, + { -2.023971077994576e-09, 2.009257514048162e+00 }, + { -2.010968923979840e-09, 1.991697238413571e+00 }, + { -1.997891058121344e-09, 1.974007282737320e+00 }, + { -1.984737972794098e-09, 1.956187996511354e+00 }, + { -1.971510163203686e-09, 1.938239736166060e+00 }, + { -1.958208127370276e-09, 1.920162865072273e+00 }, + { -1.944832366107339e-09, 1.901957753535934e+00 }, + { -1.931383383005451e-09, 1.883624778799427e+00 }, + { -1.917861684410531e-09, 1.865164325035177e+00 }, + { -1.904267779407432e-09, 1.846576783346324e+00 }, + { -1.890602179798714e-09, 1.827862551760622e+00 }, + { -1.876865400086483e-09, 1.809022035228338e+00 }, + { -1.863057957452539e-09, 1.790055645617624e+00 }, + { -1.849180371740008e-09, 1.770963801711725e+00 }, + { -1.835233165431475e-09, 1.751746929201178e+00 }, + { -1.821216863631569e-09, 1.732405460681919e+00 }, + { -1.807131994045840e-09, 1.712939835648088e+00 }, + { -1.792979086962494e-09, 1.693350500488565e+00 }, + { -1.778758675229683e-09, 1.673637908477153e+00 }, + { -1.764471294238191e-09, 1.653802519770021e+00 }, + { -1.750117481899733e-09, 1.633844801396848e+00 }, + { -1.735697778626995e-09, 1.613765227254186e+00 }, + { -1.721212727314574e-09, 1.593564278099856e+00 }, + { -1.706662873315474e-09, 1.573242441540939e+00 }, + { -1.692048764423848e-09, 1.552800212030258e+00 }, + { -1.677370950852395e-09, 1.532238090855187e+00 }, + { -1.662629985213192e-09, 1.511556586131055e+00 }, + { -1.647826422494560e-09, 1.490756212788764e+00 }, + { -1.632960820042537e-09, 1.469837492568651e+00 }, + { -1.618033737538645e-09, 1.448800954008929e+00 }, + { -1.603045736978760e-09, 1.427647132435469e+00 }, + { -1.587997382653428e-09, 1.406376569953373e+00 }, + { -1.572889241124034e-09, 1.384989815432507e+00 }, + { -1.557721881203696e-09, 1.363487424499449e+00 }, + { -1.542495873934815e-09, 1.341869959524515e+00 }, + { -1.527211792568486e-09, 1.320137989611176e+00 }, + { -1.511870212541253e-09, 1.298292090581491e+00 }, + { -1.496471711454994e-09, 1.276332844965754e+00 }, + { -1.481016869054634e-09, 1.254260841988828e+00 }, + { -1.465506267206068e-09, 1.232076677556547e+00 }, + { -1.449940489875303e-09, 1.209780954243628e+00 }, + { -1.434320123104372e-09, 1.187374281276747e+00 }, + { -1.418645754991533e-09, 1.164857274523495e+00 }, + { -1.402917975667710e-09, 1.142230556475749e+00 }, + { -1.387137377275425e-09, 1.119494756236361e+00 }, + { -1.371304553944712e-09, 1.096650509501278e+00 }, + { -1.355420101772623e-09, 1.073698458546610e+00 }, + { -1.339484618799891e-09, 1.050639252211352e+00 }, + { -1.323498704988051e-09, 1.027473545880543e+00 }, + { -1.307462962198534e-09, 1.004202001471034e+00 }, + { -1.291377994167204e-09, 9.808252874104182e-01 }, + { -1.275244406484394e-09, 9.573440786237052e-01 }, + { -1.259062806570190e-09, 9.337590565128454e-01 }, + { -1.242833803653464e-09, 9.100709089414796e-01 }, + { -1.226558008746195e-09, 8.862803302125812e-01 }, + { -1.210236034623253e-09, 8.623880210538113e-01 }, + { -1.193868495797618e-09, 8.383946885959868e-01 }, + { -1.177456008497777e-09, 8.143010463544786e-01 }, + { -1.160999190645010e-09, 7.901078142102129e-01 }, + { -1.144498661828833e-09, 7.658157183877095e-01 }, + { -1.127955043284965e-09, 7.414254914366063e-01 }, + { -1.111368957870986e-09, 7.169378722095157e-01 }, + { -1.094741030044308e-09, 6.923536058430697e-01 }, + { -1.078071885836393e-09, 6.676734437331688e-01 }, + { -1.061362152831423e-09, 6.428981435165511e-01 }, + { -1.044612460141255e-09, 6.180284690466404e-01 }, + { -1.027823438382183e-09, 5.930651903718045e-01 }, + { -1.010995719652015e-09, 5.680090837138436e-01 }, + { -9.941299375042378e-10, 5.428609314418970e-01 }, + { -9.772267269262058e-10, 5.176215220520872e-01 }, + { -9.602867243141016e-10, 4.922916501421032e-01 }, + { -9.433105674499058e-10, 4.668721163885412e-01 }, + { -9.262988954758817e-10, 4.413637275202624e-01 }, + { -9.092523488719689e-10, 4.157672962958654e-01 }, + { -8.921715694311144e-10, 3.900836414778084e-01 }, + { -8.750572002347607e-10, 3.643135878065193e-01 }, + { -8.579098856296589e-10, 3.384579659762392e-01 }, + { -8.407302712022458e-10, 3.125176126069478e-01 }, + { -8.235190037551917e-10, 2.864933702193017e-01 }, + { -8.062767312831008e-10, 2.603860872080448e-01 }, + { -7.890041029479477e-10, 2.341966178147619e-01 }, + { -7.717017690542486e-10, 2.079258220999725e-01 }, + { -7.543703810250266e-10, 1.815745659161734e-01 }, + { -7.370105913774597e-10, 1.551437208801425e-01 }, + { -7.196230536974697e-10, 1.286341643433767e-01 }, + { -7.022084226165876e-10, 1.020467793657360e-01 }, + { -6.847673537853251e-10, 7.538245468350446e-02 }, + { -6.673005038502516e-10, 4.864208468284503e-02 }, + { -6.498085304282128e-10, 2.182656936863137e-02 }, + { -6.322920920826137e-10, -5.063185663820913e-03 }, + { -6.147518482969490e-10, -3.202626926150343e-02 }, + { -5.971884594516681e-10, -5.906176474160862e-02 }, + { -5.796025867984469e-10, -8.616874992366363e-02 }, + { -5.619948924353588e-10, -1.133462971605448e-01 }, + { -5.443660392823640e-10, -1.405934733692621e-01 }, + { -5.267166910556339e-10, -1.679093400638023e-01 }, + { -5.090475122431451e-10, -1.952929533862739e-01 }, + { -4.913591680795342e-10, -2.227433641394564e-01 }, + { -4.736523245210571e-10, -2.502596178194491e-01 }, + { -4.559276482202303e-10, -2.778407546490776e-01 }, + { -4.381858065011618e-10, -3.054858096104932e-01 }, + { -4.204274673340870e-10, -3.331938124792702e-01 }, + { -4.026532993105397e-10, -3.609637878577768e-01 }, + { -3.848639716178888e-10, -3.887947552098022e-01 }, + { -3.670601540142443e-10, -4.166857288948674e-01 }, + { -3.492425168032583e-10, -4.446357182029681e-01 }, + { -3.314117308088734e-10, -4.726437273896633e-01 }, + { -3.135684673501752e-10, -5.007087557112619e-01 }, + { -2.957133982159296e-10, -5.288297974607742e-01 }, + { -2.778471956393828e-10, -5.570058420037128e-01 }, + { -2.599705322729564e-10, -5.852358738143247e-01 }, + { -2.420840811628366e-10, -6.135188725122560e-01 }, + { -2.241885157240923e-10, -6.418538128986450e-01 }, + { -2.062845097142585e-10, -6.702396649949099e-01 }, + { -1.883727372093546e-10, -6.986753940779493e-01 }, + { -1.704538725773087e-10, -7.271599607197149e-01 }, + { -1.525285904532877e-10, -7.556923208240308e-01 }, + { -1.345975657140748e-10, -7.842714256651911e-01 }, + { -1.166614734526054e-10, -8.128962219265712e-01 }, + { -9.872098895260891e-11, -8.415656517393372e-01 }, + { -8.077678766314517e-11, -8.702786527215916e-01 }, + { -6.282954517324612e-11, -8.990341580176152e-01 }, + { -4.487993718655790e-11, -9.278310963373758e-01 }, + { -2.692863949561210e-11, -9.566683919968972e-01 }, + { -8.976327956520795e-12, -9.855449649582175e-01 }, + { 8.976321536169872e-12, -1.014459730869357e+00 }, + { 2.692863307547294e-11, -1.043411601105914e+00 }, + { 4.487993076694813e-11, -1.072399482811314e+00 }, + { 6.282953875437751e-11, -1.101422278938424e+00 }, + { 8.077678124517653e-11, -1.130478888291020e+00 }, + { 9.872098253591082e-11, -1.159568205565684e+00 }, + { 1.166614670373367e-10, -1.188689121393192e+00 }, + { 1.345975593005002e-10, -1.217840522381901e+00 }, + { 1.525285840416718e-10, -1.247021291159495e+00 }, + { 1.704538661678104e-10, -1.276230306415868e+00 }, + { 1.883727308022916e-10, -1.305466442946703e+00 }, + { 2.062845033098954e-10, -1.334728571696106e+00 }, + { 2.241885093225349e-10, -1.364015559800721e+00 }, + { 2.420840747645085e-10, -1.393326270633325e+00 }, + { 2.599705258779635e-10, -1.422659563847049e+00 }, + { 2.778471892479898e-10, -1.452014295419243e+00 }, + { 2.957133918284542e-10, -1.481389317696831e+00 }, + { 3.135684609667761e-10, -1.510783479440191e+00 }, + { 3.314117244297624e-10, -1.540195625869043e+00 }, + { 3.492425104288060e-10, -1.569624598707558e+00 }, + { 3.670601476445565e-10, -1.599069236228850e+00 }, + { 3.848639652533361e-10, -1.628528373302631e+00 }, + { 4.026532929512281e-10, -1.658000841439269e+00 }, + { 4.204274609803869e-10, -1.687485468837799e+00 }, + { 4.381858001531792e-10, -1.716981080430596e+00 }, + { 4.559276418782829e-10, -1.746486497931567e+00 }, + { 4.736523181853565e-10, -1.776000539882225e+00 }, + { 4.913591617503452e-10, -1.805522021699094e+00 }, + { 5.090475059206794e-10, -1.835049755721194e+00 }, + { 5.267166847401562e-10, -1.864582551257262e+00 }, + { 5.443660329740862e-10, -1.894119214633676e+00 }, + { 5.619948861345454e-10, -1.923658549242818e+00 }, + { 5.796025805053097e-10, -1.953199355591180e+00 }, + { 5.971884531664190e-10, -1.982740431347091e+00 }, + { 6.147518420199055e-10, -2.012280571390674e+00 }, + { 6.322920858139346e-10, -2.041818567861395e+00 }, + { 6.498085241682158e-10, -2.071353210208005e+00 }, + { 6.673004975990425e-10, -2.100883285238127e+00 }, + { 6.847673475432746e-10, -2.130407577166309e+00 }, + { 7.022084163838545e-10, -2.159924867664933e+00 }, + { 7.196230474743716e-10, -2.189433935913779e+00 }, + { 7.370105851640495e-10, -2.218933558650552e+00 }, + { 7.543703748217808e-10, -2.248422510220072e+00 }, + { 7.717017628611672e-10, -2.277899562625407e+00 }, + { 7.890040967654542e-10, -2.307363485579104e+00 }, + { 8.062767251113011e-10, -2.336813046552684e+00 }, + { 8.235189975944034e-10, -2.366247010829556e+00 }, + { 8.407302650525749e-10, -2.395664141553858e+00 }, + { 8.579098794915287e-10, -2.425063199784153e+00 }, + { 8.750571941082773e-10, -2.454442944543319e+00 }, + { 8.921715633164894e-10, -2.483802132872044e+00 }, + { 9.092523427695200e-10, -2.513139519878584e+00 }, + { 9.262988893857148e-10, -2.542453858792682e+00 }, + { 9.433105613723914e-10, -2.571743901017465e+00 }, + { 9.602867182493987e-10, -2.601008396180870e+00 }, + { 9.772267208744730e-10, -2.630246092190425e+00 }, + { 9.941299314658458e-10, -2.659455735283526e+00 }, + { 1.010995713627070e-09, -2.688636070081818e+00 }, + { 1.027823432371055e-09, -2.717785839644439e+00 }, + { 1.044612454143997e-09, -2.746903785521352e+00 }, + { 1.061362146848353e-09, -2.775988647805256e+00 }, + { 1.078071879867828e-09, -2.805039165187255e+00 }, + { 1.094741024090249e-09, -2.834054075009077e+00 }, + { 1.111368951931856e-09, -2.863032113318052e+00 }, + { 1.127955037360817e-09, -2.891972014920939e+00 }, + { 1.144498655920037e-09, -2.920872513436805e+00 }, + { 1.160999184751779e-09, -2.949732341353290e+00 }, + { 1.177456002620215e-09, -2.978550230079517e+00 }, + { 1.193868489936097e-09, -3.007324910002949e+00 }, + { 1.210236028777826e-09, -3.036055110540183e+00 }, + { 1.226558002917232e-09, -3.064739560196251e+00 }, + { 1.242833797841123e-09, -3.093376986616735e+00 }, + { 1.259062800774685e-09, -3.121966116643377e+00 }, + { 1.275244400705935e-09, -3.150505676371791e+00 }, + { 1.291377988406056e-09, -3.178994391202159e+00 }, + { 1.307462956454857e-09, -3.207430985899192e+00 }, + { 1.323498699262108e-09, -3.235814184645077e+00 }, + { 1.339484613091842e-09, -3.264142711097884e+00 }, + { 1.355420096082785e-09, -3.292415288443373e+00 }, + { 1.371304548273191e-09, -3.320630639454825e+00 }, + { 1.387137371622433e-09, -3.348787486547389e+00 }, + { 1.402917970033511e-09, -3.376884551834256e+00 }, + { 1.418645749376393e-09, -3.404920557184582e+00 }, + { 1.434320117508396e-09, -3.432894224276359e+00 }, + { 1.449940484298756e-09, -3.460804274656981e+00 }, + { 1.465506261649108e-09, -3.488649429796768e+00 }, + { 1.481016863517580e-09, -3.516428411149154e+00 }, + { 1.496471705937951e-09, -3.544139940202303e+00 }, + { 1.511870207044433e-09, -3.571782738540999e+00 }, + { 1.527211787092206e-09, -3.599355527901174e+00 }, + { 1.542495868479076e-09, -3.626857030226671e+00 }, + { 1.557721875768920e-09, -3.654285967729458e+00 }, + { 1.572889235710329e-09, -3.681641062941412e+00 }, + { 1.587997377261005e-09, -3.708921038776707e+00 }, + { 1.603045731607830e-09, -3.736124618586623e+00 }, + { 1.618033732189314e-09, -3.763250526218862e+00 }, + { 1.632960814715177e-09, -3.790297486071938e+00 }, + { 1.647826417189275e-09, -3.817264223155802e+00 }, + { 1.662629979930247e-09, -3.844149463148589e+00 }, + { 1.677370945591844e-09, -3.870951932452996e+00 }, + { 1.692048759186008e-09, -3.897670358257890e+00 }, + { 1.706662868100504e-09, -3.924303468590212e+00 }, + { 1.721212722122685e-09, -3.950849992378278e+00 }, + { 1.735697773458400e-09, -3.977308659506432e+00 }, + { 1.750117476754591e-09, -4.003678200876669e+00 }, + { 1.764471289116712e-09, -4.029957348461003e+00 }, + { 1.778758670132079e-09, -4.056144835364877e+00 }, + { 1.792979081888926e-09, -4.082239395882965e+00 }, + { 1.807131988996465e-09, -4.108239765556996e+00 }, + { 1.821216858606652e-09, -4.134144681236933e+00 }, + { 1.835233160431175e-09, -4.159952881133585e+00 }, + { 1.849180366764537e-09, -4.185663104882633e+00 }, + { 1.863057952502055e-09, -4.211274093599509e+00 }, + { 1.876865395161145e-09, -4.236784589940537e+00 }, + { 1.890602174898734e-09, -4.262193338157148e+00 }, + { 1.904267774533022e-09, -4.287499084158302e+00 }, + { 1.917861679562008e-09, -4.312700575567174e+00 }, + { 1.931383378182392e-09, -4.337796561778708e+00 }, + { 1.944832361310856e-09, -4.362785794021793e+00 }, + { 1.958208122599839e-09, -4.387667025411434e+00 }, + { 1.971510158459931e-09, -4.412439011013396e+00 }, + { 1.984737968076495e-09, -4.437100507898339e+00 }, + { 1.997891053431005e-09, -4.461650275204912e+00 }, + { 2.010968919316289e-09, -4.486087074191693e+00 }, + { 2.023971073358447e-09, -4.510409668301784e+00 }, + { 2.036897026033634e-09, -4.534616823217992e+00 }, + { 2.049746290686799e-09, -4.558707306921882e+00 }, + { 2.062518383551274e-09, -4.582679889754607e+00 }, + { 2.075212823764071e-09, -4.606533344469879e+00 }, + { 2.087829133387063e-09, -4.630266446298172e+00 }, + { 2.100366837422912e-09, -4.653877973001258e+00 }, + { 2.112825463835087e-09, -4.677366704934605e+00 }, + { 2.125204543562522e-09, -4.700731425099899e+00 }, + { 2.137503610540056e-09, -4.723970919208608e+00 }, + { 2.149722201714786e-09, -4.747083975738060e+00 }, + { 2.161859857063438e-09, -4.770069385989595e+00 }, + { 2.173916119610994e-09, -4.792925944149308e+00 }, + { 2.185890535445098e-09, -4.815652447340950e+00 }, + { 2.197782653735957e-09, -4.838247695689436e+00 }, + { 2.209592026751962e-09, -4.860710492376411e+00 }, + { 2.221318209877576e-09, -4.883039643700314e+00 }, + { 2.232960761627846e-09, -4.905233959130168e+00 }, + { 2.244519243667616e-09, -4.927292251368517e+00 }, + { 2.255993220826402e-09, -4.949213336406265e+00 }, + { 2.267382261115285e-09, -4.970996033581527e+00 }, + { 2.278685935744269e-09, -4.992639165639563e+00 }, + { 2.289903819135414e-09, -5.014141558784778e+00 }, + { 2.301035488942000e-09, -5.035502042744443e+00 }, + { 2.312080526062763e-09, -5.056719450823151e+00 }, + { 2.323038514659161e-09, -5.077792619963239e+00 }, + { 2.333909042168180e-09, -5.098720390796817e+00 }, + { 2.344691699320969e-09, -5.119501607709159e+00 }, + { 2.355386080156553e-09, -5.140135118892792e+00 }, + { 2.365991782037187e-09, -5.160619776404897e+00 }, + { 2.376508405665132e-09, -5.180954436227641e+00 }, + { 2.386935555094626e-09, -5.201137958319343e+00 }, + { 2.397272837749508e-09, -5.221169206676762e+00 }, + { 2.407519864436774e-09, -5.241047049389645e+00 }, + { 2.417676249362563e-09, -5.260770358700167e+00 }, + { 2.427741610143750e-09, -5.280338011053974e+00 }, + { 2.437715567825576e-09, -5.299748887163106e+00 }, + { 2.447597746894037e-09, -5.319001872058887e+00 }, + { 2.457387775290440e-09, -5.338095855149190e+00 }, + { 2.467085284426756e-09, -5.357029730277389e+00 }, + { 2.476689909196263e-09, -5.375802395772283e+00 }, + { 2.486201287990485e-09, -5.394412754510426e+00 }, + { 2.495619062711154e-09, -5.412859713968929e+00 }, + { 2.504942878785408e-09, -5.431142186284682e+00 }, + { 2.514172385175743e-09, -5.449259088303476e+00 }, + { 2.523307234396791e-09, -5.467209341642627e+00 }, + { 2.532347082526785e-09, -5.484991872743321e+00 }, + { 2.541291589219998e-09, -5.502605612925014e+00 }, + { 2.550140417722072e-09, -5.520049498445633e+00 }, + { 2.558893234878378e-09, -5.537322470548212e+00 }, + { 2.567549711150773e-09, -5.554423475524196e+00 }, + { 2.576109520627371e-09, -5.571351464763084e+00 }, + { 2.584572341037361e-09, -5.588105394812198e+00 }, + { 2.592937853759161e-09, -5.604684227423386e+00 }, + { 2.601205743836355e-09, -5.621086929615246e+00 }, + { 2.609375699987564e-09, -5.637312473723475e+00 }, + { 2.617447414618146e-09, -5.653359837454964e+00 }, + { 2.625420583833750e-09, -5.669228003945694e+00 }, + { 2.633294907447937e-09, -5.684915961806963e+00 }, + { 2.641070088997271e-09, -5.700422705186584e+00 }, + { 2.648745835750128e-09, -5.715747233817712e+00 }, + { 2.656321858720176e-09, -5.730888553077074e+00 }, + { 2.663797872673252e-09, -5.745845674030161e+00 }, + { 2.671173596142054e-09, -5.760617613492118e+00 }, + { 2.678448751434797e-09, -5.775203394076705e+00 }, + { 2.685623064645538e-09, -5.789602044248679e+00 }, + { 2.692696265666640e-09, -5.803812598380606e+00 }, + { 2.699668088194915e-09, -5.817834096797069e+00 }, + { 2.706538269745573e-09, -5.831665585834668e+00 }, + { 2.713306551659817e-09, -5.845306117889361e+00 }, + { 2.719972679116734e-09, -5.858754751472542e+00 }, + { 2.726536401139295e-09, -5.872010551255358e+00 }, + { 2.732997470607439e-09, -5.885072588127400e+00 }, + { 2.739355644265558e-09, -5.897939939244211e+00 }, + { 2.745610682731633e-09, -5.910611688078208e+00 }, + { 2.751762350508137e-09, -5.923086924473290e+00 }, + { 2.757810415987146e-09, -5.935364744687794e+00 }, + { 2.763754651462700e-09, -5.947444251452243e+00 }, + { 2.769594833137415e-09, -5.959324554015538e+00 }, + { 2.775330741132843e-09, -5.971004768198829e+00 }, + { 2.780962159494174e-09, -5.982484016437981e+00 }, + { 2.786488876202047e-09, -5.993761427840588e+00 }, + { 2.791910683178690e-09, -6.004836138231525e+00 }, + { 2.797227376295779e-09, -6.015707290202086e+00 }, + { 2.802438755383971e-09, -6.026374033162623e+00 }, + { 2.807544624236659e-09, -6.036835523383457e+00 }, + { 2.812544790621093e-09, -6.047090924050914e+00 }, + { 2.817439066283459e-09, -6.057139405311101e+00 }, + { 2.822227266958278e-09, -6.066980144322601e+00 }, + { 2.826909212371261e-09, -6.076612325295799e+00 }, + { 2.831484726250221e-09, -6.086035139548830e+00 }, + { 2.835953636329660e-09, -6.095247785550617e+00 }, + { 2.840315774357203e-09, -6.104249468967751e+00 }, + { 2.844570976102082e-09, -6.113039402715685e+00 }, + { 2.848719081357095e-09, -6.121616806996519e+00 }, + { 2.852759933948860e-09, -6.129980909353977e+00 }, + { 2.856693381741114e-09, -6.138130944714082e+00 }, + { 2.860519276643053e-09, -6.146066155436312e+00 }, + { 2.864237474610633e-09, -6.153785791350256e+00 }, + { 2.867847835656203e-09, -6.161289109809551e+00 }, + { 2.871350223851726e-09, -6.168575375732642e+00 }, + { 2.874744507333867e-09, -6.175643861647406e+00 }, + { 2.878030558310989e-09, -6.182493847739853e+00 }, + { 2.881208253063899e-09, -6.189124621889823e+00 }, + { 2.884277471954592e-09, -6.195535479723423e+00 }, + { 2.887238099428306e-09, -6.201725724651554e+00 }, + { 2.890090024020323e-09, -6.207694667918394e+00 }, + { 2.892833138356060e-09, -6.213441628635915e+00 }, + { 2.895467339159240e-09, -6.218965933835304e+00 }, + { 2.897992527253659e-09, -6.224266918505075e+00 }, + { 2.900408607567016e-09, -6.229343925633495e+00 }, + { 2.902715489136496e-09, -6.234196306254763e+00 }, + { 2.904913085108075e-09, -6.238823419482017e+00 }, + { 2.907001312743911e-09, -6.243224632557377e+00 }, + { 2.908980093422997e-09, -6.247399320887848e+00 }, + { 2.910849352646620e-09, -6.251346868091392e+00 }, + { 2.912609020036956e-09, -6.255066666028537e+00 }, + { 2.914259029343965e-09, -6.258558114851525e+00 }, + { 2.915799318445710e-09, -6.261820623039620e+00 }, + { 2.917229829350759e-09, -6.264853607438842e+00 }, + { 2.918550508202463e-09, -6.267656493305673e+00 }, + { 2.919761305276718e-09, -6.270228714337005e+00 }, + { 2.920862174988150e-09, -6.272569712717951e+00 }, + { 2.921853075889193e-09, -6.274678939154603e+00 }, + { 2.922733970674264e-09, -6.276555852917634e+00 }, + { 2.923504826176907e-09, -6.278199921870962e+00 }, + { 2.924165613375264e-09, -6.279610622518139e+00 }, + { 2.924716307391075e-09, -6.280787440034993e+00 }, + { 2.925156887490598e-09, -6.281729868306345e+00 }, + { 2.925487337087508e-09, -6.282437409966992e+00 }, + { 2.925707643739298e-09, -6.282909576428774e+00 }, + { 2.925817799151970e-09, -6.283145887925411e+00 }, +}; + +#endif diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h new file mode 100644 index 000000000..32532f4ab --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_32f_sincos_32fc.h @@ -0,0 +1,715 @@ +/*! + * \file volk_gnsssdr_32f_sincos_32fc.h + * \brief VOLK_GNSSSDR kernel: Computes the sine and cosine of a vector of floats. + * \authors + * + * VOLK_GNSSSDR kernel that computes the sine and cosine of a vector of floats. + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +/*! + * \page volk_gnsssdr_32f_sincos_32fc + * + * \b Overview + * + * Computes the sine and cosine of a vector of floats, providing the output in a complex vector (cosine, sine) + * + * Dispatcher Prototype + * \code + * void volk_gnsssdr_32f_sincos_32fc(lv_32fc_t* out, const float* in, unsigned int num_points) + * \endcode + * + * \b Inputs + * \li in: Vector of floats, in radians. + * \li num_points: Number of components in \p in to be computed. + * + * \b Outputs + * \li out: Vector of the form lv_32fc_t out[n] = lv_cmake(cos(in[n]), sin(in[n])) + * + */ + +#ifndef INCLUDED_volk_gnsssdr_32f_sincos_32fc_H +#define INCLUDED_volk_gnsssdr_32f_sincos_32fc_H + +#include +#include +#include + +#ifdef LV_HAVE_SSE4_1 +#include +/* Adapted from the original VOLK. + * In turn based on algorithms from: + * Naoki Shibata, "Efficient Evaluation Methods of Elementary Functions Suitable for SIMD Computation," + * Computer Science Research and Development, May 2010, Volume 25, Issue 1, pp 25-32. DOI 10.1007/s00450-010-0108-2 */ +static inline void volk_gnsssdr_32f_sincos_32fc_u_sse4_1(lv_32fc_t* out, const float* in, unsigned int num_points) +{ + lv_32fc_t* bPtr = out; + const float* aPtr = in; + + unsigned int number = 0; + unsigned int quarterPoints = num_points / 4; + unsigned int i = 0; + + __m128 aVal, s, m4pi, pio4A, pio4B, cp1, cp2, cp3, cp4, cp5, ffours, ftwos, fones, fzeroes; + __m128 sine, cosine, condition1, condition2, condition3, cplxValue; + __m128i q, r, ones, twos, fours; + + m4pi = _mm_set1_ps(1.273239545); + pio4A = _mm_set1_ps(0.78515625); + pio4B = _mm_set1_ps(0.241876e-3); + ffours = _mm_set1_ps(4.0); + ftwos = _mm_set1_ps(2.0); + fones = _mm_set1_ps(1.0); + fzeroes = _mm_setzero_ps(); + ones = _mm_set1_epi32(1); + twos = _mm_set1_epi32(2); + fours = _mm_set1_epi32(4); + + cp1 = _mm_set1_ps(1.0); + cp2 = _mm_set1_ps(0.83333333e-1); + cp3 = _mm_set1_ps(0.2777778e-2); + cp4 = _mm_set1_ps(0.49603e-4); + cp5 = _mm_set1_ps(0.551e-6); + + for(;number < quarterPoints; number++) + { + aVal = _mm_loadu_ps(aPtr); + __builtin_prefetch(aPtr + 8); + s = _mm_sub_ps(aVal, _mm_and_ps(_mm_mul_ps(aVal, ftwos), _mm_cmplt_ps(aVal, fzeroes))); + q = _mm_cvtps_epi32(_mm_floor_ps(_mm_mul_ps(s, m4pi))); + r = _mm_add_epi32(q, _mm_and_si128(q, ones)); + + s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4A)); + s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4B)); + + s = _mm_div_ps(s, _mm_set1_ps(8.0)); // The constant is 2^N, for 3 times argument reduction + s = _mm_mul_ps(s, s); + // Evaluate Taylor series + s = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(s, cp5), cp4), s), cp3), s), cp2), s), cp1), s); + + for(i = 0; i < 3; i++) + { + s = _mm_mul_ps(s, _mm_sub_ps(ffours, s)); + } + s = _mm_div_ps(s, ftwos); + + sine = _mm_sqrt_ps(_mm_mul_ps(_mm_sub_ps(ftwos, s), s)); + cosine = _mm_sub_ps(fones, s); + + condition1 = _mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, ones), twos)), fzeroes); + condition2 = _mm_cmpneq_ps(_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(q, fours)), fzeroes), _mm_cmplt_ps(aVal, fzeroes)); + condition3 = _mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, twos), fours)), fzeroes); + + cplxValue = sine; + sine = _mm_add_ps(sine, _mm_and_ps(_mm_sub_ps(cosine, sine), condition1)); + sine = _mm_sub_ps(sine, _mm_and_ps(_mm_mul_ps(sine, _mm_set1_ps(2.0f)), condition2)); + + cosine = _mm_add_ps(cosine, _mm_and_ps(_mm_sub_ps(cplxValue, cosine), condition1)); + cosine = _mm_sub_ps(cosine, _mm_and_ps(_mm_mul_ps(cosine, _mm_set1_ps(2.0f)), condition3)); + + cplxValue = _mm_unpacklo_ps(cosine, sine); + + _mm_storeu_ps((float*)bPtr, cplxValue); + bPtr += 2; + cplxValue = _mm_unpackhi_ps(cosine, sine); + _mm_storeu_ps((float*)bPtr, cplxValue); + bPtr += 2; + aPtr += 4; + } + + number = quarterPoints * 4; + for(;number < num_points; number++) + { + float _in = *aPtr++; + *bPtr++ = lv_cmake(cos(_in), sin(_in)); + } +} + +#endif /* LV_HAVE_SSE4_1 for unaligned */ + + +#ifdef LV_HAVE_SSE4_1 +#include +/* Adapted from the original VOLK. + * In turn based on algorithms from: + * Naoki Shibata, "Efficient Evaluation Methods of Elementary Functions Suitable for SIMD Computation," + * Computer Science Research and Development, May 2010, Volume 25, Issue 1, pp 25-32. DOI 10.1007/s00450-010-0108-2 */ +static inline void volk_gnsssdr_32f_sincos_32fc_a_sse4_1(lv_32fc_t* out, const float* in, unsigned int num_points) +{ + lv_32fc_t* bPtr = out; + const float* aPtr = in; + + unsigned int number = 0; + unsigned int quarterPoints = num_points / 4; + unsigned int i = 0; + + __m128 aVal, s, m4pi, pio4A, pio4B, cp1, cp2, cp3, cp4, cp5, ffours, ftwos, fones, fzeroes; + __m128 sine, cosine, condition1, condition2, condition3, cplxValue; + __m128i q, r, ones, twos, fours; + + m4pi = _mm_set1_ps(1.273239545); + pio4A = _mm_set1_ps(0.78515625); + pio4B = _mm_set1_ps(0.241876e-3); + ffours = _mm_set1_ps(4.0); + ftwos = _mm_set1_ps(2.0); + fones = _mm_set1_ps(1.0); + fzeroes = _mm_setzero_ps(); + ones = _mm_set1_epi32(1); + twos = _mm_set1_epi32(2); + fours = _mm_set1_epi32(4); + + cp1 = _mm_set1_ps(1.0); + cp2 = _mm_set1_ps(0.83333333e-1); + cp3 = _mm_set1_ps(0.2777778e-2); + cp4 = _mm_set1_ps(0.49603e-4); + cp5 = _mm_set1_ps(0.551e-6); + + for(;number < quarterPoints; number++) + { + aVal = _mm_load_ps(aPtr); + __builtin_prefetch(aPtr + 8); + s = _mm_sub_ps(aVal, _mm_and_ps(_mm_mul_ps(aVal, ftwos), _mm_cmplt_ps(aVal, fzeroes))); + q = _mm_cvtps_epi32(_mm_floor_ps(_mm_mul_ps(s, m4pi))); + r = _mm_add_epi32(q, _mm_and_si128(q, ones)); + + s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4A)); + s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4B)); + + s = _mm_div_ps(s, _mm_set1_ps(8.0)); // The constant is 2^N, for 3 times argument reduction + s = _mm_mul_ps(s, s); + // Evaluate Taylor series + s = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(s, cp5), cp4), s), cp3), s), cp2), s), cp1), s); + + for(i = 0; i < 3; i++) + { + s = _mm_mul_ps(s, _mm_sub_ps(ffours, s)); + } + s = _mm_div_ps(s, ftwos); + + sine = _mm_sqrt_ps(_mm_mul_ps(_mm_sub_ps(ftwos, s), s)); + cosine = _mm_sub_ps(fones, s); + + condition1 = _mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, ones), twos)), fzeroes); + condition2 = _mm_cmpneq_ps(_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(q, fours)), fzeroes), _mm_cmplt_ps(aVal, fzeroes)); + condition3 = _mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, twos), fours)), fzeroes); + + cplxValue = sine; + sine = _mm_add_ps(sine, _mm_and_ps(_mm_sub_ps(cosine, sine), condition1)); + sine = _mm_sub_ps(sine, _mm_and_ps(_mm_mul_ps(sine, _mm_set1_ps(2.0f)), condition2)); + + cosine = _mm_add_ps(cosine, _mm_and_ps(_mm_sub_ps(cplxValue, cosine), condition1)); + cosine = _mm_sub_ps(cosine, _mm_and_ps(_mm_mul_ps(cosine, _mm_set1_ps(2.0f)), condition3)); + + cplxValue = _mm_unpacklo_ps(cosine, sine); + + _mm_store_ps((float*)bPtr, cplxValue); + bPtr += 2; + cplxValue = _mm_unpackhi_ps(cosine, sine); + _mm_store_ps((float*)bPtr, cplxValue); + bPtr += 2; + aPtr += 4; + } + + number = quarterPoints * 4; + for(;number < num_points; number++) + { + float _in = *aPtr++; + *bPtr++ = lv_cmake(cos(_in), sin(_in)); + } +} + +#endif /* LV_HAVE_SSE4_1 for aligned */ + + +#ifdef LV_HAVE_SSE2 +#include +/* Adapted from http://gruntthepeon.free.fr/ssemath/sse_mathfun.h, original code from Julien Pommier */ +/* Based on algorithms from the cephes library http://www.netlib.org/cephes/ */ +static inline void volk_gnsssdr_32f_sincos_32fc_a_sse2(lv_32fc_t* out, const float* in, unsigned int num_points) +{ + lv_32fc_t* bPtr = out; + const float* aPtr = in; + + const unsigned int sse_iters = num_points / 4; + unsigned int number = 0; + float _in; + + __m128 sine, cosine, aux, x; + __m128 xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y; + + __m128i emm0, emm2, emm4; + + /* declare some SSE constants */ + static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; + static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; + + static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; + static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; + static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; + static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; + static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; + + static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; + static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; + static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; + static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; + static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; + static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; + static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; + static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; + static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; + static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; + static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; + + for(;number < sse_iters; number++) + { + x = _mm_load_ps(aPtr); + __builtin_prefetch(aPtr + 8); + + sign_bit_sin = x; + /* take the absolute value */ + x = _mm_and_ps(x, *(__m128*)_ps_inv_sign_mask); + /* extract the sign bit (upper one) */ + sign_bit_sin = _mm_and_ps(sign_bit_sin, *(__m128*)_ps_sign_mask); + + /* scale by 4/Pi */ + y = _mm_mul_ps(x, *(__m128*)_ps_cephes_FOPI); + + /* store the integer part of y in emm2 */ + emm2 = _mm_cvttps_epi32(y); + + /* j=(j+1) & (~1) (see the cephes sources) */ + emm2 = _mm_add_epi32(emm2, *(__m128i *)_pi32_1); + emm2 = _mm_and_si128(emm2, *(__m128i *)_pi32_inv1); + y = _mm_cvtepi32_ps(emm2); + + emm4 = emm2; + + /* get the swap sign flag for the sine */ + emm0 = _mm_and_si128(emm2, *(__m128i *)_pi32_4); + emm0 = _mm_slli_epi32(emm0, 29); + __m128 swap_sign_bit_sin = _mm_castsi128_ps(emm0); + + /* get the polynom selection mask for the sine*/ + emm2 = _mm_and_si128(emm2, *(__m128i *)_pi32_2); + emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); + __m128 poly_mask = _mm_castsi128_ps(emm2); + + /* The magic pass: "Extended precision modular arithmetic” + x = ((x - y * DP1) - y * DP2) - y * DP3; */ + xmm1 = *(__m128*)_ps_minus_cephes_DP1; + xmm2 = *(__m128*)_ps_minus_cephes_DP2; + xmm3 = *(__m128*)_ps_minus_cephes_DP3; + xmm1 = _mm_mul_ps(y, xmm1); + xmm2 = _mm_mul_ps(y, xmm2); + xmm3 = _mm_mul_ps(y, xmm3); + x = _mm_add_ps(x, xmm1); + x = _mm_add_ps(x, xmm2); + x = _mm_add_ps(x, xmm3); + + emm4 = _mm_sub_epi32(emm4, *(__m128i *)_pi32_2); + emm4 = _mm_andnot_si128(emm4, *(__m128i *)_pi32_4); + emm4 = _mm_slli_epi32(emm4, 29); + __m128 sign_bit_cos = _mm_castsi128_ps(emm4); + + sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin); + + /* Evaluate the first polynom (0 <= x <= Pi/4) */ + __m128 z = _mm_mul_ps(x,x); + y = *(__m128*)_ps_coscof_p0; + + y = _mm_mul_ps(y, z); + y = _mm_add_ps(y, *(__m128*)_ps_coscof_p1); + y = _mm_mul_ps(y, z); + y = _mm_add_ps(y, *(__m128*)_ps_coscof_p2); + y = _mm_mul_ps(y, z); + y = _mm_mul_ps(y, z); + __m128 tmp = _mm_mul_ps(z, *(__m128*)_ps_0p5); + y = _mm_sub_ps(y, tmp); + y = _mm_add_ps(y, *(__m128*)_ps_1); + + /* Evaluate the second polynom (Pi/4 <= x <= 0) */ + + __m128 y2 = *(__m128*)_ps_sincof_p0; + y2 = _mm_mul_ps(y2, z); + y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p1); + y2 = _mm_mul_ps(y2, z); + y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p2); + y2 = _mm_mul_ps(y2, z); + y2 = _mm_mul_ps(y2, x); + y2 = _mm_add_ps(y2, x); + + /* select the correct result from the two polynoms */ + xmm3 = poly_mask; + __m128 ysin2 = _mm_and_ps(xmm3, y2); + __m128 ysin1 = _mm_andnot_ps(xmm3, y); + y2 = _mm_sub_ps(y2,ysin2); + y = _mm_sub_ps(y, ysin1); + + xmm1 = _mm_add_ps(ysin1,ysin2); + xmm2 = _mm_add_ps(y,y2); + + /* update the sign */ + sine = _mm_xor_ps(xmm1, sign_bit_sin); + cosine = _mm_xor_ps(xmm2, sign_bit_cos); + + /* write the output */ + aux = _mm_unpacklo_ps(cosine, sine); + _mm_store_ps((float*)bPtr, aux); + bPtr += 2; + aux = _mm_unpackhi_ps(cosine, sine); + _mm_store_ps((float*)bPtr, aux); + bPtr += 2; + + aPtr += 4; + } + + for(number = sse_iters * 4; number < num_points; number++) + { + _in = *aPtr++; + *bPtr++ = lv_cmake((float)cos(_in), (float)sin(_in) ); + } + +} +#endif /* LV_HAVE_SSE2 */ + + +#ifdef LV_HAVE_SSE2 +#include +/* Adapted from http://gruntthepeon.free.fr/ssemath/sse_mathfun.h, original code from Julien Pommier */ +/* Based on algorithms from the cephes library http://www.netlib.org/cephes/ */ +static inline void volk_gnsssdr_32f_sincos_32fc_u_sse2(lv_32fc_t* out, const float* in, unsigned int num_points) +{ + lv_32fc_t* bPtr = out; + const float* aPtr = in; + + const unsigned int sse_iters = num_points / 4; + unsigned int number = 0; + float _in; + + __m128 sine, cosine, aux, x; + __m128 xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y; + + __m128i emm0, emm2, emm4; + + /* declare some SSE constants */ + static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; + static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; + + static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; + static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; + static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; + static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; + static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; + + static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; + static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; + static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; + static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; + static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; + static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; + static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; + static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; + static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; + static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; + static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; + + for(;number < sse_iters; number++) + { + x = _mm_loadu_ps(aPtr); + __builtin_prefetch(aPtr + 8); + + sign_bit_sin = x; + /* take the absolute value */ + x = _mm_and_ps(x, *(__m128*)_ps_inv_sign_mask); + /* extract the sign bit (upper one) */ + sign_bit_sin = _mm_and_ps(sign_bit_sin, *(__m128*)_ps_sign_mask); + + /* scale by 4/Pi */ + y = _mm_mul_ps(x, *(__m128*)_ps_cephes_FOPI); + + /* store the integer part of y in emm2 */ + emm2 = _mm_cvttps_epi32(y); + + /* j=(j+1) & (~1) (see the cephes sources) */ + emm2 = _mm_add_epi32(emm2, *(__m128i *)_pi32_1); + emm2 = _mm_and_si128(emm2, *(__m128i *)_pi32_inv1); + y = _mm_cvtepi32_ps(emm2); + + emm4 = emm2; + + /* get the swap sign flag for the sine */ + emm0 = _mm_and_si128(emm2, *(__m128i *)_pi32_4); + emm0 = _mm_slli_epi32(emm0, 29); + __m128 swap_sign_bit_sin = _mm_castsi128_ps(emm0); + + /* get the polynom selection mask for the sine*/ + emm2 = _mm_and_si128(emm2, *(__m128i *)_pi32_2); + emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); + __m128 poly_mask = _mm_castsi128_ps(emm2); + + /* The magic pass: "Extended precision modular arithmetic” + x = ((x - y * DP1) - y * DP2) - y * DP3; */ + xmm1 = *(__m128*)_ps_minus_cephes_DP1; + xmm2 = *(__m128*)_ps_minus_cephes_DP2; + xmm3 = *(__m128*)_ps_minus_cephes_DP3; + xmm1 = _mm_mul_ps(y, xmm1); + xmm2 = _mm_mul_ps(y, xmm2); + xmm3 = _mm_mul_ps(y, xmm3); + x = _mm_add_ps(x, xmm1); + x = _mm_add_ps(x, xmm2); + x = _mm_add_ps(x, xmm3); + + emm4 = _mm_sub_epi32(emm4, *(__m128i *)_pi32_2); + emm4 = _mm_andnot_si128(emm4, *(__m128i *)_pi32_4); + emm4 = _mm_slli_epi32(emm4, 29); + __m128 sign_bit_cos = _mm_castsi128_ps(emm4); + + sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin); + + /* Evaluate the first polynom (0 <= x <= Pi/4) */ + __m128 z = _mm_mul_ps(x,x); + y = *(__m128*)_ps_coscof_p0; + + y = _mm_mul_ps(y, z); + y = _mm_add_ps(y, *(__m128*)_ps_coscof_p1); + y = _mm_mul_ps(y, z); + y = _mm_add_ps(y, *(__m128*)_ps_coscof_p2); + y = _mm_mul_ps(y, z); + y = _mm_mul_ps(y, z); + __m128 tmp = _mm_mul_ps(z, *(__m128*)_ps_0p5); + y = _mm_sub_ps(y, tmp); + y = _mm_add_ps(y, *(__m128*)_ps_1); + + /* Evaluate the second polynom (Pi/4 <= x <= 0) */ + + __m128 y2 = *(__m128*)_ps_sincof_p0; + y2 = _mm_mul_ps(y2, z); + y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p1); + y2 = _mm_mul_ps(y2, z); + y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p2); + y2 = _mm_mul_ps(y2, z); + y2 = _mm_mul_ps(y2, x); + y2 = _mm_add_ps(y2, x); + + /* select the correct result from the two polynoms */ + xmm3 = poly_mask; + __m128 ysin2 = _mm_and_ps(xmm3, y2); + __m128 ysin1 = _mm_andnot_ps(xmm3, y); + y2 = _mm_sub_ps(y2,ysin2); + y = _mm_sub_ps(y, ysin1); + + xmm1 = _mm_add_ps(ysin1,ysin2); + xmm2 = _mm_add_ps(y,y2); + + /* update the sign */ + sine = _mm_xor_ps(xmm1, sign_bit_sin); + cosine = _mm_xor_ps(xmm2, sign_bit_cos); + + /* write the output */ + aux = _mm_unpacklo_ps(cosine, sine); + _mm_storeu_ps((float*)bPtr, aux); + bPtr += 2; + aux = _mm_unpackhi_ps(cosine, sine); + _mm_storeu_ps((float*)bPtr, aux); + bPtr += 2; + + aPtr += 4; + } + + for(number = sse_iters * 4; number < num_points; number++) + { + _in = *aPtr++; + *bPtr++ = lv_cmake((float)cos(_in), (float)sin(_in) ); + } + +} +#endif /* LV_HAVE_SSE2 */ + + +#ifdef LV_HAVE_GENERIC + +static inline void volk_gnsssdr_32f_sincos_32fc_generic(lv_32fc_t* out, const float* in, unsigned int num_points) +{ + float _in; + for(unsigned int i = 0; i < num_points; i++) + { + _in = *in++; + *out++ = lv_cmake((float)cos(_in), (float)sin(_in) ); + } +} + +#endif /* LV_HAVE_GENERIC */ + + +#ifdef LV_HAVE_GENERIC +#include +#include +static inline void volk_gnsssdr_32f_sincos_32fc_generic_fxpt(lv_32fc_t* out, const float* in, unsigned int num_points) +{ + float _in, s, c; + int32_t x, sin_index, cos_index, d; + const float PI = 3.14159265358979323846; + const float TWO_TO_THE_31_DIV_PI = 2147483648.0 / PI; + const float TWO_PI = PI * 2; + const int32_t bitlength = 32; + const int32_t Nbits = 10; + const int32_t diffbits = bitlength - Nbits; + uint32_t ux; + + for(unsigned int i = 0; i < num_points; i++) + { + _in = *in++; + d = (int32_t)floor(_in / TWO_PI + 0.5); + _in -= d * TWO_PI; + x = (int32_t) ((float) _in * TWO_TO_THE_31_DIV_PI); + + ux = x; + sin_index = ux >> diffbits; + s = sine_table_10bits[sin_index][0] * (ux >> 1) + sine_table_10bits[sin_index][1]; + + ux = x + 0x40000000; + cos_index = ux >> diffbits; + c = sine_table_10bits[cos_index][0] * (ux >> 1) + sine_table_10bits[cos_index][1]; + + *out++ = lv_cmake((float)c, (float)s ); + } +} + +#endif /* LV_HAVE_GENERIC */ + + +#ifdef LV_HAVE_NEON +#include +/* Adapted from http://gruntthepeon.free.fr/ssemath/neon_mathfun.h, original code from Julien Pommier */ +/* Based on algorithms from the cephes library http://www.netlib.org/cephes/ */ +static inline void volk_gnsssdr_32f_sincos_32fc_neon(lv_32fc_t* out, const float* in, unsigned int num_points) +{ + lv_32fc_t* bPtr = out; + const float* aPtr = in; + const unsigned int neon_iters = num_points / 4; + + const float32_t c_minus_cephes_DP1 = -0.78515625; + const float32_t c_minus_cephes_DP2 = -2.4187564849853515625e-4; + const float32_t c_minus_cephes_DP3 = -3.77489497744594108e-8; + const float32_t c_sincof_p0 = -1.9515295891E-4; + const float32_t c_sincof_p1 = 8.3321608736E-3; + const float32_t c_sincof_p2 = -1.6666654611E-1; + const float32_t c_coscof_p0 = 2.443315711809948E-005; + const float32_t c_coscof_p1 = -1.388731625493765E-003; + const float32_t c_coscof_p2 = 4.166664568298827E-002; + const float32_t c_cephes_FOPI = 1.27323954473516; + + unsigned int number = 0; + float _in; + + float32x4_t x, xmm1, xmm2, xmm3, y, y1, y2, ys, yc, z; + float32x4x2_t result; + + uint32x4_t emm2, poly_mask, sign_mask_sin, sign_mask_cos; + + for(;number < neon_iters; number++) + { + x = vld1q_f32(aPtr); + __builtin_prefetch(aPtr + 8); + + sign_mask_sin = vcltq_f32(x, vdupq_n_f32(0)); + x = vabsq_f32(x); + + /* scale by 4/Pi */ + y = vmulq_f32(x, vdupq_n_f32(c_cephes_FOPI)); + + /* store the integer part of y in mm0 */ + emm2 = vcvtq_u32_f32(y); + /* j=(j+1) & (~1) (see the cephes sources) */ + emm2 = vaddq_u32(emm2, vdupq_n_u32(1)); + emm2 = vandq_u32(emm2, vdupq_n_u32(~1)); + y = vcvtq_f32_u32(emm2); + + /* get the polynom selection mask + there is one polynom for 0 <= x <= Pi/4 + and another one for Pi/4 init_test_list(volk_gnsssdr_test_params_t (VOLK_INIT_TEST(volk_gnsssdr_8ic_s8ic_multiply_8ic, test_params)) (VOLK_INIT_TEST(volk_gnsssdr_8u_x2_multiply_8u, test_params_more_iters)) (VOLK_INIT_TEST(volk_gnsssdr_64f_accumulator_64f, test_params)) + (VOLK_INIT_TEST(volk_gnsssdr_32f_sincos_32fc, test_params_inacc)) (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_8ic, test_params)) (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_16ic, test_params_more_iters)) (VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_dot_prod_16ic, test_params)) From 9c8fc9436e65e77341473f71405c267c9ec2b4bc Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Sun, 20 Mar 2016 01:45:01 +0100 Subject: [PATCH 2/6] Adding and integrating sincos kernel --- ...o_e5a_noncoherent_iq_acquisition_caf_cc.cc | 7 +- .../galileo_pcps_8ms_acquisition_cc.cc | 7 +- .../gnuradio_blocks/pcps_acquisition_cc.cc | 16 +- .../pcps_acquisition_fine_doppler_cc.cc | 11 +- .../gnuradio_blocks/pcps_acquisition_sc.cc | 15 +- .../pcps_assisted_acquisition_cc.cc | 4 +- .../pcps_cccwsr_acquisition_cc.cc | 8 +- .../pcps_multithread_acquisition_cc.cc | 7 +- .../pcps_opencl_acquisition_cc.cc | 10 +- .../pcps_quicksync_acquisition_cc.cc | 10 +- .../pcps_tong_acquisition_cc.cc | 7 +- .../volk_gnsssdr_s32f_sincos_32fc.h | 543 ++++++++++++++++++ .../volk_gnsssdr/lib/kernel_tests.h | 3 + 13 files changed, 591 insertions(+), 57 deletions(-) create mode 100644 src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h diff --git a/src/algorithms/acquisition/gnuradio_blocks/galileo_e5a_noncoherent_iq_acquisition_caf_cc.cc b/src/algorithms/acquisition/gnuradio_blocks/galileo_e5a_noncoherent_iq_acquisition_caf_cc.cc index 77bdcd4d4..d037339a3 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/galileo_e5a_noncoherent_iq_acquisition_caf_cc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/galileo_e5a_noncoherent_iq_acquisition_caf_cc.cc @@ -40,7 +40,7 @@ #include #include #include -#include "gnss_signal_processing.h" +#include #include "control_message_factory.h" using google::LogMessage; @@ -277,6 +277,7 @@ void galileo_e5a_noncoherentIQ_acquisition_caf_cc::init() d_gnss_synchro->Acq_samplestamp_samples = 0; d_mag = 0.0; d_input_power = 0.0; + const double GALILEO_TWO_PI = 6.283185307179600; // Count the number of bins d_num_doppler_bins = 0; @@ -293,7 +294,9 @@ void galileo_e5a_noncoherentIQ_acquisition_caf_cc::init() { d_grid_doppler_wipeoffs[doppler_index] = static_cast(volk_malloc(d_fft_size * sizeof(gr_complex), volk_get_alignment())); int doppler = -static_cast(d_doppler_max) + d_doppler_step * doppler_index; - complex_exp_gen_conj(d_grid_doppler_wipeoffs[doppler_index], d_freq + doppler, d_fs_in, d_fft_size); + float phase_step_rad = GALILEO_TWO_PI * (d_freq + doppler) / static_cast(d_fs_in); + + volk_gnsssdr_s32f_sincos_32fc(d_grid_doppler_wipeoffs[doppler_index], - phase_step_rad, d_fft_size); } /* CAF Filtering to resolve doppler ambiguity. Phase and quadrature must be processed diff --git a/src/algorithms/acquisition/gnuradio_blocks/galileo_pcps_8ms_acquisition_cc.cc b/src/algorithms/acquisition/gnuradio_blocks/galileo_pcps_8ms_acquisition_cc.cc index eb44a9121..ed65c50ca 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/galileo_pcps_8ms_acquisition_cc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/galileo_pcps_8ms_acquisition_cc.cc @@ -34,7 +34,7 @@ #include #include #include -#include "gnss_signal_processing.h" +#include #include "control_message_factory.h" using google::LogMessage; @@ -157,7 +157,7 @@ void galileo_pcps_8ms_acquisition_cc::init() d_gnss_synchro->Acq_samplestamp_samples = 0; d_mag = 0.0; d_input_power = 0.0; - + const double GALILEO_TWO_PI = 6.283185307179600; // Count the number of bins d_num_doppler_bins = 0; for (int doppler = static_cast(-d_doppler_max); @@ -173,7 +173,8 @@ void galileo_pcps_8ms_acquisition_cc::init() { d_grid_doppler_wipeoffs[doppler_index] = static_cast(volk_malloc(d_fft_size * sizeof(gr_complex), volk_get_alignment())); int doppler = -static_cast(d_doppler_max) + d_doppler_step * doppler_index; - complex_exp_gen_conj(d_grid_doppler_wipeoffs[doppler_index], d_freq + doppler, d_fs_in, d_fft_size); + float phase_step_rad = static_cast(GALILEO_TWO_PI) * (d_freq + doppler) / static_cast(d_fs_in); + volk_gnsssdr_s32f_sincos_32fc(d_grid_doppler_wipeoffs[doppler_index], - phase_step_rad, d_fft_size); } } diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_cc.cc b/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_cc.cc index e99760c25..6b9d57837 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_cc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_cc.cc @@ -38,9 +38,9 @@ #include #include #include +#include #include "gnss_signal_processing.h" #include "control_message_factory.h" -#include // fixed point sine and cosine #include "GPS_L1_CA.h" //GPS_TWO_PI @@ -174,18 +174,8 @@ void pcps_acquisition_cc::set_local_code(std::complex * code) void pcps_acquisition_cc::update_local_carrier(gr_complex* carrier_vector, int correlator_length_samples, float freq) { - float sin_f, cos_f; - float phase_step_rad= GPS_TWO_PI * freq/ static_cast(d_fs_in); - - int phase_step_rad_i = gr::fxpt::float_to_fixed(phase_step_rad); - int phase_rad_i = 0; - - for(int i = 0; i < correlator_length_samples; i++) - { - gr::fxpt::sincos(phase_rad_i, &sin_f, &cos_f); - carrier_vector[i] = gr_complex(cos_f, -sin_f); - phase_rad_i += phase_step_rad_i; - } + float phase_step_rad = GPS_TWO_PI * freq / static_cast(d_fs_in); + volk_gnsssdr_s32f_sincos_32fc(carrier_vector, - phase_step_rad, correlator_length_samples); } void pcps_acquisition_cc::init() diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_fine_doppler_cc.cc b/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_fine_doppler_cc.cc index 9be375cba..ecdf80206 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_fine_doppler_cc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_fine_doppler_cc.cc @@ -36,7 +36,7 @@ #include #include #include -#include "nco_lib.h" +#include #include "concurrent_map.h" #include "gnss_signal_processing.h" #include "gps_sdr_signal_processing.h" @@ -184,14 +184,16 @@ void pcps_acquisition_fine_doppler_cc::forecast (int noutput_items, void pcps_acquisition_fine_doppler_cc::reset_grid() { d_well_count = 0; - for (int i=0; i(GPS_TWO_PI) * ( d_freq + doppler_hz ) / static_cast(d_fs_in); d_grid_doppler_wipeoffs[doppler_index] = new gr_complex[d_fft_size]; - fxp_nco(d_grid_doppler_wipeoffs[doppler_index], d_fft_size,0, phase_step_rad); + volk_gnsssdr_s32f_sincos_32fc(d_grid_doppler_wipeoffs[doppler_index], - phase_step_rad, d_fft_size); } } diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_sc.cc b/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_sc.cc index 7c917c96c..8664c49a9 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_sc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_acquisition_sc.cc @@ -41,7 +41,6 @@ #include "gnss_signal_processing.h" #include "control_message_factory.h" #include -#include // fixed point sine and cosine #include "GPS_L1_CA.h" //GPS_TWO_PI using google::LogMessage; @@ -177,18 +176,8 @@ void pcps_acquisition_sc::set_local_code(std::complex * code) void pcps_acquisition_sc::update_local_carrier(gr_complex* carrier_vector, int correlator_length_samples, float freq) { - float sin_f, cos_f; - float phase_step_rad= GPS_TWO_PI * freq/ static_cast(d_fs_in); - - int phase_step_rad_i = gr::fxpt::float_to_fixed(phase_step_rad); - int phase_rad_i = 0; - - for(int i = 0; i < correlator_length_samples; i++) - { - gr::fxpt::sincos(phase_rad_i, &sin_f, &cos_f); - carrier_vector[i] = gr_complex(cos_f, -sin_f); - phase_rad_i += phase_step_rad_i; - } + float phase_step_rad = GPS_TWO_PI * freq / static_cast(d_fs_in); + volk_gnsssdr_s32f_sincos_32fc(carrier_vector, - phase_step_rad, correlator_length_samples); } void pcps_acquisition_sc::init() diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_assisted_acquisition_cc.cc b/src/algorithms/acquisition/gnuradio_blocks/pcps_assisted_acquisition_cc.cc index b1ecea8af..c10310fcf 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_assisted_acquisition_cc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_assisted_acquisition_cc.cc @@ -35,7 +35,7 @@ #include #include #include -#include "nco_lib.h" +#include #include "concurrent_map.h" #include "gnss_signal_processing.h" #include "control_message_factory.h" @@ -252,7 +252,7 @@ void pcps_assisted_acquisition_cc::redefine_grid() // compute the carrier doppler wipe-off signal and store it phase_step_rad = static_cast(GPS_TWO_PI) * doppler_hz / static_cast(d_fs_in); d_grid_doppler_wipeoffs[doppler_index] = new gr_complex[d_fft_size]; - fxp_nco(d_grid_doppler_wipeoffs[doppler_index], d_fft_size, 0, phase_step_rad); + volk_gnsssdr_s32f_sincos_32fc(d_grid_doppler_wipeoffs[doppler_index], - phase_step_rad, d_fft_size); } } diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_cccwsr_acquisition_cc.cc b/src/algorithms/acquisition/gnuradio_blocks/pcps_cccwsr_acquisition_cc.cc index 20b491529..954063c5a 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_cccwsr_acquisition_cc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_cccwsr_acquisition_cc.cc @@ -39,8 +39,9 @@ #include #include #include -#include "gnss_signal_processing.h" +#include #include "control_message_factory.h" +#include "GPS_L1_CA.h" //GPS_TWO_PI using google::LogMessage; @@ -188,8 +189,9 @@ void pcps_cccwsr_acquisition_cc::init() d_grid_doppler_wipeoffs[doppler_index] = static_cast(volk_malloc(d_fft_size * sizeof(gr_complex), volk_get_alignment())); int doppler = -static_cast(d_doppler_max) + d_doppler_step * doppler_index; - complex_exp_gen_conj(d_grid_doppler_wipeoffs[doppler_index], - d_freq + doppler, d_fs_in, d_fft_size); + float phase_step_rad = GPS_TWO_PI * (d_freq + doppler) / static_cast(d_fs_in); + + volk_gnsssdr_s32f_sincos_32fc(d_grid_doppler_wipeoffs[doppler_index], - phase_step_rad, d_fft_size); } } diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_multithread_acquisition_cc.cc b/src/algorithms/acquisition/gnuradio_blocks/pcps_multithread_acquisition_cc.cc index 6aa66c880..d2efe4b92 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_multithread_acquisition_cc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_multithread_acquisition_cc.cc @@ -39,8 +39,9 @@ #include #include #include -#include "gnss_signal_processing.h" +#include #include "control_message_factory.h" +#include "GPS_L1_CA.h" //GPS_TWO_PI using google::LogMessage; @@ -174,8 +175,8 @@ void pcps_multithread_acquisition_cc::init() d_grid_doppler_wipeoffs[doppler_index] = static_cast(volk_malloc(d_fft_size * sizeof(gr_complex), volk_get_alignment())); int doppler = -(int)d_doppler_max + d_doppler_step * doppler_index; - complex_exp_gen_conj(d_grid_doppler_wipeoffs[doppler_index], - d_freq + doppler, d_fs_in, d_fft_size); + float phase_step_rad = static_cast(GPS_TWO_PI) * (d_freq + doppler) / static_cast(d_fs_in); + volk_gnsssdr_s32f_sincos_32fc(d_grid_doppler_wipeoffs[doppler_index], - phase_step_rad, d_fft_size); } } diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_opencl_acquisition_cc.cc b/src/algorithms/acquisition/gnuradio_blocks/pcps_opencl_acquisition_cc.cc index 81dea6d07..dcfeac968 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_opencl_acquisition_cc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_opencl_acquisition_cc.cc @@ -56,11 +56,11 @@ #include #include #include -#include "gnss_signal_processing.h" +#include #include "control_message_factory.h" #include "fft_base_kernels.h" #include "fft_internal.h" - +#include "GPS_L1_CA.h" //GPS_TWO_PI using google::LogMessage; @@ -315,9 +315,9 @@ void pcps_opencl_acquisition_cc::init() { d_grid_doppler_wipeoffs[doppler_index] = static_cast(volk_malloc(d_fft_size * sizeof(gr_complex), volk_get_alignment())); - int doppler= -static_cast(d_doppler_max) + d_doppler_step * doppler_index; - complex_exp_gen_conj(d_grid_doppler_wipeoffs[doppler_index], - d_freq + doppler, d_fs_in, d_fft_size); + int doppler = -static_cast(d_doppler_max) + d_doppler_step * doppler_index; + float phase_step_rad = static_cast(GPS_TWO_PI) * (d_freq + doppler) / static_cast(d_fs_in); + volk_gnsssdr_s32f_sincos_32fc(d_grid_doppler_wipeoffs[doppler_index], - phase_step_rad, d_fft_size); if (d_opencl == 0) { diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_quicksync_acquisition_cc.cc b/src/algorithms/acquisition/gnuradio_blocks/pcps_quicksync_acquisition_cc.cc index 44a9a09fa..4e6d18b64 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_quicksync_acquisition_cc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_quicksync_acquisition_cc.cc @@ -34,9 +34,9 @@ #include #include #include +#include #include "control_message_factory.h" -#include "gnss_signal_processing.h" - +#include "GPS_L1_CA.h" using google::LogMessage; @@ -220,9 +220,9 @@ void pcps_quicksync_acquisition_cc::init() { d_grid_doppler_wipeoffs[doppler_index] = static_cast(volk_malloc(d_samples_per_code * d_folding_factor * sizeof(gr_complex), volk_get_alignment())); int doppler = -static_cast(d_doppler_max) + d_doppler_step * doppler_index; - complex_exp_gen_conj(d_grid_doppler_wipeoffs[doppler_index], - d_freq + doppler, d_fs_in, - d_samples_per_code * d_folding_factor); + float phase_step_rad = GPS_TWO_PI * (d_freq + doppler) / static_cast(d_fs_in); + + volk_gnsssdr_s32f_sincos_32fc(d_grid_doppler_wipeoffs[doppler_index], - phase_step_rad, d_samples_per_code * d_folding_factor); } // DLOG(INFO) << "end init"; } diff --git a/src/algorithms/acquisition/gnuradio_blocks/pcps_tong_acquisition_cc.cc b/src/algorithms/acquisition/gnuradio_blocks/pcps_tong_acquisition_cc.cc index 82c99f3ee..402a7c9df 100644 --- a/src/algorithms/acquisition/gnuradio_blocks/pcps_tong_acquisition_cc.cc +++ b/src/algorithms/acquisition/gnuradio_blocks/pcps_tong_acquisition_cc.cc @@ -53,8 +53,9 @@ #include #include #include +#include #include "control_message_factory.h" -#include "gnss_signal_processing.h" +#include "GPS_L1_CA.h" //GPS_TWO_PI using google::LogMessage; @@ -185,9 +186,9 @@ void pcps_tong_acquisition_cc::init() d_grid_doppler_wipeoffs[doppler_index] = static_cast(volk_malloc(d_fft_size * sizeof(gr_complex), volk_get_alignment())); int doppler = -static_cast(d_doppler_max) + d_doppler_step * doppler_index; + float phase_step_rad = GPS_TWO_PI * (d_freq + doppler) / static_cast(d_fs_in); - complex_exp_gen_conj(d_grid_doppler_wipeoffs[doppler_index], - d_freq + doppler, d_fs_in, d_fft_size); + volk_gnsssdr_s32f_sincos_32fc(d_grid_doppler_wipeoffs[doppler_index], - phase_step_rad, d_fft_size); d_grid_data[doppler_index] = static_cast(volk_malloc(d_fft_size * sizeof(float), volk_get_alignment())); diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h new file mode 100644 index 000000000..e6a92b97d --- /dev/null +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h @@ -0,0 +1,543 @@ +/*! + * \file volk_gnsssdr_s32f_sincos_32fc.h + * \brief VOLK_GNSSSDR kernel: Computes the sine and cosine of a vector of floats. + * \authors
    + *
  • Carles Fernandez-Prades, 2016. cfernandez(at)cttc.es + *
+ * + * VOLK_GNSSSDR kernel that computes the sine and cosine of a vector of floats. + * + * ------------------------------------------------------------------------- + * + * Copyright (C) 2010-2015 (see AUTHORS file for a list of contributors) + * + * GNSS-SDR is a software defined Global Navigation + * Satellite Systems receiver + * + * This file is part of GNSS-SDR. + * + * GNSS-SDR is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GNSS-SDR is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNSS-SDR. If not, see . + * + * ------------------------------------------------------------------------- + */ + +/*! + * \page volk_gnsssdr_s32f_sincos_32fc + * + * \b Overview + * + * VOLK_GNSSSDR kernel that computes the sine and cosine with a fixed + * phase increment \p phase_inc per sample, providing the output in a complex vector (cosine, sine) + * + * Dispatcher Prototype + * \code + * void volk_gnsssdr_s32f_sincos_32fc(lv_32fc_t* out, const float phase_inc, unsigned int num_points) + * \endcode + * + * \b Inputs + * \li phase_inc: Phase increment per sample, in radians. + * \li num_points: Number of components in \p in to be computed. + * + * \b Outputs + * \li out: Vector of the form lv_32fc_t out[n] = lv_cmake(cos(in[n]), sin(in[n])) + * + */ + + +#ifndef INCLUDED_volk_gnsssdr_s32f_sincos_32fc_H +#define INCLUDED_volk_gnsssdr_s32f_sincos_32fc_H + +#include +#include +#include + + +#ifdef LV_HAVE_SSE2 +#include +/* Adapted from http://gruntthepeon.free.fr/ssemath/sse_mathfun.h, original code from Julien Pommier */ +/* Based on algorithms from the cephes library http://www.netlib.org/cephes/ */ +static inline void volk_gnsssdr_s32f_sincos_32fc_a_sse2(lv_32fc_t* out, const float phase_inc, unsigned int num_points) +{ + lv_32fc_t* bPtr = out; + + const unsigned int sse_iters = num_points / 4; + unsigned int number = 0; + float _phase; + + __m128 sine, cosine, aux, x, four_phases_reg; + __m128 xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y; + __m128i emm0, emm2, emm4; + + /* declare some SSE constants */ + static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; + static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; + + static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; + static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; + static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; + static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; + static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; + + static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; + static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; + static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; + static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; + static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; + static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; + static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; + static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; + static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; + static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; + static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; + + float four_phases[4] __attribute__((aligned(16))) = { 0.0f, phase_inc, 2 * phase_inc, 3 * phase_inc }; + float four_phases_inc[4] __attribute__((aligned(16))) = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc }; + four_phases_reg = _mm_load_ps(four_phases); + const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc); + + for(;number < sse_iters; number++) + { + x = four_phases_reg; + + sign_bit_sin = x; + /* take the absolute value */ + x = _mm_and_ps(x, *(__m128*)_ps_inv_sign_mask); + /* extract the sign bit (upper one) */ + sign_bit_sin = _mm_and_ps(sign_bit_sin, *(__m128*)_ps_sign_mask); + + /* scale by 4/Pi */ + y = _mm_mul_ps(x, *(__m128*)_ps_cephes_FOPI); + + /* store the integer part of y in emm2 */ + emm2 = _mm_cvttps_epi32(y); + + /* j=(j+1) & (~1) (see the cephes sources) */ + emm2 = _mm_add_epi32(emm2, *(__m128i *)_pi32_1); + emm2 = _mm_and_si128(emm2, *(__m128i *)_pi32_inv1); + y = _mm_cvtepi32_ps(emm2); + + emm4 = emm2; + + /* get the swap sign flag for the sine */ + emm0 = _mm_and_si128(emm2, *(__m128i *)_pi32_4); + emm0 = _mm_slli_epi32(emm0, 29); + __m128 swap_sign_bit_sin = _mm_castsi128_ps(emm0); + + /* get the polynom selection mask for the sine*/ + emm2 = _mm_and_si128(emm2, *(__m128i *)_pi32_2); + emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); + __m128 poly_mask = _mm_castsi128_ps(emm2); + + /* The magic pass: "Extended precision modular arithmetic” + x = ((x - y * DP1) - y * DP2) - y * DP3; */ + xmm1 = *(__m128*)_ps_minus_cephes_DP1; + xmm2 = *(__m128*)_ps_minus_cephes_DP2; + xmm3 = *(__m128*)_ps_minus_cephes_DP3; + xmm1 = _mm_mul_ps(y, xmm1); + xmm2 = _mm_mul_ps(y, xmm2); + xmm3 = _mm_mul_ps(y, xmm3); + x = _mm_add_ps(x, xmm1); + x = _mm_add_ps(x, xmm2); + x = _mm_add_ps(x, xmm3); + + emm4 = _mm_sub_epi32(emm4, *(__m128i *)_pi32_2); + emm4 = _mm_andnot_si128(emm4, *(__m128i *)_pi32_4); + emm4 = _mm_slli_epi32(emm4, 29); + __m128 sign_bit_cos = _mm_castsi128_ps(emm4); + + sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin); + + /* Evaluate the first polynom (0 <= x <= Pi/4) */ + __m128 z = _mm_mul_ps(x,x); + y = *(__m128*)_ps_coscof_p0; + + y = _mm_mul_ps(y, z); + y = _mm_add_ps(y, *(__m128*)_ps_coscof_p1); + y = _mm_mul_ps(y, z); + y = _mm_add_ps(y, *(__m128*)_ps_coscof_p2); + y = _mm_mul_ps(y, z); + y = _mm_mul_ps(y, z); + __m128 tmp = _mm_mul_ps(z, *(__m128*)_ps_0p5); + y = _mm_sub_ps(y, tmp); + y = _mm_add_ps(y, *(__m128*)_ps_1); + + /* Evaluate the second polynom (Pi/4 <= x <= 0) */ + __m128 y2 = *(__m128*)_ps_sincof_p0; + y2 = _mm_mul_ps(y2, z); + y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p1); + y2 = _mm_mul_ps(y2, z); + y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p2); + y2 = _mm_mul_ps(y2, z); + y2 = _mm_mul_ps(y2, x); + y2 = _mm_add_ps(y2, x); + + /* select the correct result from the two polynoms */ + xmm3 = poly_mask; + __m128 ysin2 = _mm_and_ps(xmm3, y2); + __m128 ysin1 = _mm_andnot_ps(xmm3, y); + y2 = _mm_sub_ps(y2,ysin2); + y = _mm_sub_ps(y, ysin1); + + xmm1 = _mm_add_ps(ysin1,ysin2); + xmm2 = _mm_add_ps(y,y2); + + /* update the sign */ + sine = _mm_xor_ps(xmm1, sign_bit_sin); + cosine = _mm_xor_ps(xmm2, sign_bit_cos); + + /* write the output */ + aux = _mm_unpacklo_ps(cosine, sine); + _mm_store_ps((float*)bPtr, aux); + bPtr += 2; + aux = _mm_unpackhi_ps(cosine, sine); + _mm_store_ps((float*)bPtr, aux); + bPtr += 2; + + four_phases_reg = _mm_add_ps(four_phases_reg, four_phases_inc_reg); + } + + _phase = phase_inc * (sse_iters * 4); + for(number = sse_iters * 4; number < num_points; number++) + { + *bPtr++ = lv_cmake((float)cos(_phase), (float)sin(_phase) ); + _phase += phase_inc; + } +} + +#endif /* LV_HAVE_SSE2 */ + + +#ifdef LV_HAVE_SSE2 +#include +/* Adapted from http://gruntthepeon.free.fr/ssemath/sse_mathfun.h, original code from Julien Pommier */ +/* Based on algorithms from the cephes library http://www.netlib.org/cephes/ */ +static inline void volk_gnsssdr_s32f_sincos_32fc_u_sse2(lv_32fc_t* out, const float phase_inc, unsigned int num_points) +{ + lv_32fc_t* bPtr = out; + + const unsigned int sse_iters = num_points / 4; + unsigned int number = 0; + float _phase; + + __m128 sine, cosine, aux, x, four_phases_reg; + __m128 xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y; + __m128i emm0, emm2, emm4; + + /* declare some SSE constants */ + static const int _ps_inv_sign_mask[4] __attribute__((aligned(16))) = { ~0x80000000, ~0x80000000, ~0x80000000, ~0x80000000 }; + static const int _ps_sign_mask[4] __attribute__((aligned(16))) = { (int)0x80000000, (int)0x80000000, (int)0x80000000, (int)0x80000000 }; + + static const float _ps_cephes_FOPI[4] __attribute__((aligned(16))) = { 1.27323954473516, 1.27323954473516, 1.27323954473516, 1.27323954473516 }; + static const int _pi32_1[4] __attribute__((aligned(16))) = { 1, 1, 1, 1 }; + static const int _pi32_inv1[4] __attribute__((aligned(16))) = { ~1, ~1, ~1, ~1 }; + static const int _pi32_2[4] __attribute__((aligned(16))) = { 2, 2, 2, 2}; + static const int _pi32_4[4] __attribute__((aligned(16))) = { 4, 4, 4, 4}; + + static const float _ps_minus_cephes_DP1[4] __attribute__((aligned(16))) = { -0.78515625, -0.78515625, -0.78515625, -0.78515625 }; + static const float _ps_minus_cephes_DP2[4] __attribute__((aligned(16))) = { -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4, -2.4187564849853515625e-4 }; + static const float _ps_minus_cephes_DP3[4] __attribute__((aligned(16))) = { -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8, -3.77489497744594108e-8 }; + static const float _ps_coscof_p0[4] __attribute__((aligned(16))) = { 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005, 2.443315711809948E-005 }; + static const float _ps_coscof_p1[4] __attribute__((aligned(16))) = { -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003, -1.388731625493765E-003 }; + static const float _ps_coscof_p2[4] __attribute__((aligned(16))) = { 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002, 4.166664568298827E-002 }; + static const float _ps_sincof_p0[4] __attribute__((aligned(16))) = { -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4, -1.9515295891E-4 }; + static const float _ps_sincof_p1[4] __attribute__((aligned(16))) = { 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3, 8.3321608736E-3 }; + static const float _ps_sincof_p2[4] __attribute__((aligned(16))) = { -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1, -1.6666654611E-1 }; + static const float _ps_0p5[4] __attribute__((aligned(16))) = { 0.5f, 0.5f, 0.5f, 0.5f }; + static const float _ps_1[4] __attribute__((aligned(16))) = { 1.0f, 1.0f, 1.0f, 1.0f }; + + float four_phases[4] __attribute__((aligned(16))) = { 0.0f, phase_inc, 2 * phase_inc, 3 * phase_inc }; + float four_phases_inc[4] __attribute__((aligned(16))) = { 4 * phase_inc, 4 * phase_inc, 4 * phase_inc, 4 * phase_inc }; + four_phases_reg = _mm_load_ps(four_phases); + const __m128 four_phases_inc_reg = _mm_load_ps(four_phases_inc); + + for(;number < sse_iters; number++) + { + x = four_phases_reg; + + sign_bit_sin = x; + /* take the absolute value */ + x = _mm_and_ps(x, *(__m128*)_ps_inv_sign_mask); + /* extract the sign bit (upper one) */ + sign_bit_sin = _mm_and_ps(sign_bit_sin, *(__m128*)_ps_sign_mask); + + /* scale by 4/Pi */ + y = _mm_mul_ps(x, *(__m128*)_ps_cephes_FOPI); + + /* store the integer part of y in emm2 */ + emm2 = _mm_cvttps_epi32(y); + + /* j=(j+1) & (~1) (see the cephes sources) */ + emm2 = _mm_add_epi32(emm2, *(__m128i *)_pi32_1); + emm2 = _mm_and_si128(emm2, *(__m128i *)_pi32_inv1); + y = _mm_cvtepi32_ps(emm2); + + emm4 = emm2; + + /* get the swap sign flag for the sine */ + emm0 = _mm_and_si128(emm2, *(__m128i *)_pi32_4); + emm0 = _mm_slli_epi32(emm0, 29); + __m128 swap_sign_bit_sin = _mm_castsi128_ps(emm0); + + /* get the polynom selection mask for the sine*/ + emm2 = _mm_and_si128(emm2, *(__m128i *)_pi32_2); + emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128()); + __m128 poly_mask = _mm_castsi128_ps(emm2); + + /* The magic pass: "Extended precision modular arithmetic” + x = ((x - y * DP1) - y * DP2) - y * DP3; */ + xmm1 = *(__m128*)_ps_minus_cephes_DP1; + xmm2 = *(__m128*)_ps_minus_cephes_DP2; + xmm3 = *(__m128*)_ps_minus_cephes_DP3; + xmm1 = _mm_mul_ps(y, xmm1); + xmm2 = _mm_mul_ps(y, xmm2); + xmm3 = _mm_mul_ps(y, xmm3); + x = _mm_add_ps(x, xmm1); + x = _mm_add_ps(x, xmm2); + x = _mm_add_ps(x, xmm3); + + emm4 = _mm_sub_epi32(emm4, *(__m128i *)_pi32_2); + emm4 = _mm_andnot_si128(emm4, *(__m128i *)_pi32_4); + emm4 = _mm_slli_epi32(emm4, 29); + __m128 sign_bit_cos = _mm_castsi128_ps(emm4); + + sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin); + + /* Evaluate the first polynom (0 <= x <= Pi/4) */ + __m128 z = _mm_mul_ps(x,x); + y = *(__m128*)_ps_coscof_p0; + + y = _mm_mul_ps(y, z); + y = _mm_add_ps(y, *(__m128*)_ps_coscof_p1); + y = _mm_mul_ps(y, z); + y = _mm_add_ps(y, *(__m128*)_ps_coscof_p2); + y = _mm_mul_ps(y, z); + y = _mm_mul_ps(y, z); + __m128 tmp = _mm_mul_ps(z, *(__m128*)_ps_0p5); + y = _mm_sub_ps(y, tmp); + y = _mm_add_ps(y, *(__m128*)_ps_1); + + /* Evaluate the second polynom (Pi/4 <= x <= 0) */ + __m128 y2 = *(__m128*)_ps_sincof_p0; + y2 = _mm_mul_ps(y2, z); + y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p1); + y2 = _mm_mul_ps(y2, z); + y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p2); + y2 = _mm_mul_ps(y2, z); + y2 = _mm_mul_ps(y2, x); + y2 = _mm_add_ps(y2, x); + + /* select the correct result from the two polynoms */ + xmm3 = poly_mask; + __m128 ysin2 = _mm_and_ps(xmm3, y2); + __m128 ysin1 = _mm_andnot_ps(xmm3, y); + y2 = _mm_sub_ps(y2,ysin2); + y = _mm_sub_ps(y, ysin1); + + xmm1 = _mm_add_ps(ysin1,ysin2); + xmm2 = _mm_add_ps(y,y2); + + /* update the sign */ + sine = _mm_xor_ps(xmm1, sign_bit_sin); + cosine = _mm_xor_ps(xmm2, sign_bit_cos); + + /* write the output */ + aux = _mm_unpacklo_ps(cosine, sine); + _mm_storeu_ps((float*)bPtr, aux); + bPtr += 2; + aux = _mm_unpackhi_ps(cosine, sine); + _mm_storeu_ps((float*)bPtr, aux); + bPtr += 2; + + four_phases_reg = _mm_add_ps(four_phases_reg, four_phases_inc_reg); + } + + _phase = phase_inc * (sse_iters * 4); + for(number = sse_iters * 4; number < num_points; number++) + { + *bPtr++ = lv_cmake((float)cos(_phase), (float)sin(_phase) ); + _phase += phase_inc; + } +} + +#endif /* LV_HAVE_SSE2 */ + +#ifdef LV_HAVE_GENERIC + +static inline void volk_gnsssdr_s32f_sincos_32fc_generic(lv_32fc_t* out, const float phase_inc, unsigned int num_points) +{ + float _phase = 0.0; + for(unsigned int i = 0; i < num_points; i++) + { + *out++ = lv_cmake((float)cos(_phase), (float)sin(_phase) ); + _phase += phase_inc; + } +} + +#endif /* LV_HAVE_GENERIC */ + + +#ifdef LV_HAVE_GENERIC +#include +#include +static inline void volk_gnsssdr_s32f_sincos_32fc_generic_fxpt(lv_32fc_t* out, const float phase_inc, unsigned int num_points) +{ + float _in, s, c; + int32_t x, sin_index, cos_index, d; + const float PI = 3.14159265358979323846; + const float TWO_TO_THE_31_DIV_PI = 2147483648.0 / PI; + const float TWO_PI = PI * 2; + const int32_t bitlength = 32; + const int32_t Nbits = 10; + const int32_t diffbits = bitlength - Nbits; + uint32_t ux; + float _phase = 0.0; + for(unsigned int i = 0; i < num_points; i++) + { + _in = _phase; + d = (int32_t)floor(_in / TWO_PI + 0.5); + _in -= d * TWO_PI; + x = (int32_t) ((float)_in * TWO_TO_THE_31_DIV_PI); + + ux = x; + sin_index = ux >> diffbits; + s = sine_table_10bits[sin_index][0] * (ux >> 1) + sine_table_10bits[sin_index][1]; + + ux = x + 0x40000000; + cos_index = ux >> diffbits; + c = sine_table_10bits[cos_index][0] * (ux >> 1) + sine_table_10bits[cos_index][1]; + + *out++ = lv_cmake((float)c, (float)s ); + _phase += phase_inc; + } +} + +#endif /* LV_HAVE_GENERIC */ + + +#ifdef LV_HAVE_NEON +#include +/* Adapted from http://gruntthepeon.free.fr/ssemath/neon_mathfun.h, original code from Julien Pommier */ +/* Based on algorithms from the cephes library http://www.netlib.org/cephes/ */ +static inline void volk_gnsssdr_s32f_sincos_32fc_neon(lv_32fc_t* out, const float phase_inc, unsigned int num_points) +{ + lv_32fc_t* bPtr = out; + const unsigned int neon_iters = num_points / 4; + + __VOLK_ATTR_ALIGNED(16) float32_t four_phases[4] = { 0.0f , phase_inc, 2 * phase_inc, 3 * phase_inc }; + float four_inc = 4 * phase_inc; + __VOLK_ATTR_ALIGNED(16) float32_t four_phases_inc[4] = { four_inc, four_inc, four_inc, four_inc }; + + float32x4_t four_phases_reg = vld1q_f32(four_phases); + float32x4_t four_phases_inc_reg = vld1q_f32(four_phases_inc); + + const float32_t c_minus_cephes_DP1 = -0.78515625; + const float32_t c_minus_cephes_DP2 = -2.4187564849853515625e-4; + const float32_t c_minus_cephes_DP3 = -3.77489497744594108e-8; + const float32_t c_sincof_p0 = -1.9515295891E-4; + const float32_t c_sincof_p1 = 8.3321608736E-3; + const float32_t c_sincof_p2 = -1.6666654611E-1; + const float32_t c_coscof_p0 = 2.443315711809948E-005; + const float32_t c_coscof_p1 = -1.388731625493765E-003; + const float32_t c_coscof_p2 = 4.166664568298827E-002; + const float32_t c_cephes_FOPI = 1.27323954473516; + + unsigned int number = 0; + float _phase; + + float32x4_t x, xmm1, xmm2, xmm3, y, y1, y2, ys, yc, z; + float32x4x2_t result; + + uint32x4_t emm2, poly_mask, sign_mask_sin, sign_mask_cos; + + for(;number < neon_iters; number++) + { + x = four_phases_reg; + __builtin_prefetch(aPtr + 8); + + sign_mask_sin = vcltq_f32(x, vdupq_n_f32(0)); + x = vabsq_f32(x); + + /* scale by 4/Pi */ + y = vmulq_f32(x, vdupq_n_f32(c_cephes_FOPI)); + + /* store the integer part of y in mm0 */ + emm2 = vcvtq_u32_f32(y); + /* j=(j+1) & (~1) (see the cephes sources) */ + emm2 = vaddq_u32(emm2, vdupq_n_u32(1)); + emm2 = vandq_u32(emm2, vdupq_n_u32(~1)); + y = vcvtq_f32_u32(emm2); + + /* get the polynom selection mask + there is one polynom for 0 <= x <= Pi/4 + and another one for Pi/4 init_test_list(volk_gnsssdr_test_params_t // ... or more tolerance ***** ADDED BY GNSS-SDR volk_gnsssdr_test_params_t test_params_int16 = volk_gnsssdr_test_params_t(16, test_params.scalar(), test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex()); + volk_gnsssdr_test_params_t test_params_inacc2 = volk_gnsssdr_test_params_t(2e-1, test_params.scalar(), + test_params.vlen(), test_params.iter(), test_params.benchmark_mode(), test_params.kernel_regex()); std::vector test_cases = boost::assign::list_of @@ -76,6 +78,7 @@ std::vector init_test_list(volk_gnsssdr_test_params_t (VOLK_INIT_TEST(volk_gnsssdr_8u_x2_multiply_8u, test_params_more_iters)) (VOLK_INIT_TEST(volk_gnsssdr_64f_accumulator_64f, test_params)) (VOLK_INIT_TEST(volk_gnsssdr_32f_sincos_32fc, test_params_inacc)) + (VOLK_INIT_TEST(volk_gnsssdr_s32f_sincos_32fc, test_params_inacc2)) (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_8ic, test_params)) (VOLK_INIT_TEST(volk_gnsssdr_32fc_convert_16ic, test_params_more_iters)) (VOLK_INIT_TEST(volk_gnsssdr_16ic_x2_dot_prod_16ic, test_params)) From fa292961c12b296c1c5dbdd2cbfbe2b3ff9e4120 Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Sun, 20 Mar 2016 01:50:04 +0100 Subject: [PATCH 3/6] Fix neon protokernel --- .../kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h index e6a92b97d..8e5e44387 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_s32f_sincos_32fc.h @@ -463,7 +463,6 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_neon(lv_32fc_t* out, const floa for(;number < neon_iters; number++) { x = four_phases_reg; - __builtin_prefetch(aPtr + 8); sign_mask_sin = vcltq_f32(x, vdupq_n_f32(0)); x = vabsq_f32(x); From 9cb43ef84acbdd9c12cc209e5bf7c116b1eeb803 Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Sun, 20 Mar 2016 02:46:15 +0100 Subject: [PATCH 4/6] Adding missing library link --- src/algorithms/signal_generator/gnuradio_blocks/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/algorithms/signal_generator/gnuradio_blocks/CMakeLists.txt b/src/algorithms/signal_generator/gnuradio_blocks/CMakeLists.txt index 375a0141d..1d2c74789 100644 --- a/src/algorithms/signal_generator/gnuradio_blocks/CMakeLists.txt +++ b/src/algorithms/signal_generator/gnuradio_blocks/CMakeLists.txt @@ -32,7 +32,7 @@ include_directories( file(GLOB SIGNAL_GENERATOR_BLOCK_HEADERS "*.h") add_library(signal_generator_blocks ${SIGNAL_GENERATOR_BLOCK_SOURCES} ${SIGNAL_GENERATOR_BLOCK_HEADERS}) source_group(Headers FILES ${SIGNAL_GENERATOR_BLOCK_HEADERS}) -target_link_libraries(signal_generator_blocks gnss_system_parameters +target_link_libraries(signal_generator_blocks gnss_system_parameters gnss_sp_libs ${GNURADIO_RUNTIME_LIBRARIES} ${GNURADIO_FFT_LIBRARIES} ${VOLK_LIBRARIES} ${ORC_LIBRARIES} From 883cf629d1d865f25e9e784d19a10607a9e7571f Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Sun, 20 Mar 2016 12:23:45 +0100 Subject: [PATCH 5/6] Adding new NEON protokernel Try another strategy based on multiply-and-accumulate for the dot product. In all SIMD protokernels, managing memory with volk_gnsssdr_malloc and volk_gnsssdr_free instead of calloc and free --- ...gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h | 246 +++++++++++++++--- 1 file changed, 215 insertions(+), 31 deletions(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h index 8f9c5ad37..cebacb8e3 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h @@ -71,8 +71,9 @@ #include #include +#include #include -#include +//#include #ifdef LV_HAVE_GENERIC @@ -184,13 +185,14 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; - //todo dyn mem reg + __m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); + __m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); - __m128i* realcacc; - __m128i* imagcacc; - - realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 - imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + realcacc[n_vec] = _mm_setzero_si128(); + imagcacc[n_vec] = _mm_setzero_si128(); + } __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl; @@ -308,8 +310,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3(lv_16sc_ } _out[n_vec] = dotProduct; } - free(realcacc); - free(imagcacc); + volk_gnsssdr_free(realcacc); + volk_gnsssdr_free(imagcacc); tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); tmp2 = _mm_hadd_ps(tmp1, tmp1); @@ -356,13 +358,14 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; - //todo dyn mem reg + __m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); + __m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); - __m128i* realcacc; - __m128i* imagcacc; - - realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 - imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + realcacc[n_vec] = _mm_setzero_si128(); + imagcacc[n_vec] = _mm_setzero_si128(); + } __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl; @@ -550,8 +553,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_a_sse3_reload(l _out[n_vec] = dotProduct; } - free(realcacc); - free(imagcacc); + volk_gnsssdr_free(realcacc); + volk_gnsssdr_free(imagcacc); tmp1 = _mm_mul_ps(two_phase_acc_reg, two_phase_acc_reg); tmp2 = _mm_hadd_ps(tmp1, tmp1); @@ -598,13 +601,14 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ lv_16sc_t* _out = result; __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; - //todo dyn mem reg + __m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); + __m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); - __m128i* realcacc; - __m128i* imagcacc; - - realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 - imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + realcacc[n_vec] = _mm_setzero_si128(); + imagcacc[n_vec] = _mm_setzero_si128(); + } __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag, imag1, imag2, b_sl, a_sl; @@ -722,8 +726,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_u_sse3(lv_16sc_ } _out[n_vec] = dotProduct; } - free(realcacc); - free(imagcacc); + volk_gnsssdr_free(realcacc); + volk_gnsssdr_free(imagcacc); _mm_storeu_ps((float*)two_phase_acc, two_phase_acc_reg); (*phase) = two_phase_acc[0]; @@ -792,8 +796,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* float32x4x2_t tmp32f, tmp32_real, tmp32_imag; float32x4_t sign, PlusHalf, Round; - int16x4x2_t* accumulator; - accumulator = (int16x4x2_t*)calloc(num_a_vectors, sizeof(int16x4x2_t)); + int16x4x2_t* accumulator = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) { @@ -904,7 +907,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon(lv_16sc_t* } _out[n_vec] = dotProduct; } - free(accumulator); + volk_gnsssdr_free(accumulator); vst1q_f32((float32_t*)__phase_real, _phase_real); vst1q_f32((float32_t*)__phase_imag, _phase_imag); @@ -976,8 +979,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s float32x4x2_t tmp32f, tmp32_real, tmp32_imag; float32x4_t sign, PlusHalf, Round; - int16x4x2_t* accumulator; - accumulator = (int16x4x2_t*)calloc(num_a_vectors, sizeof(int16x4x2_t)); + int16x4x2_t* accumulator = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) { @@ -1095,7 +1097,189 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_vma(lv_16s } _out[n_vec] = dotProduct; } - free(accumulator); + volk_gnsssdr_free(accumulator); + + vst1q_f32((float32_t*)__phase_real, _phase_real); + vst1q_f32((float32_t*)__phase_imag, _phase_imag); + + (*phase) = lv_cmake((float32_t)__phase_real[0], (float32_t)__phase_imag[0]); + } + + for (unsigned int n = neon_iters * 4; n < num_points; n++) + { + tmp16_ = in_common[n]; //printf("neon phase %i: %f,%f\n", n,lv_creal(*phase),lv_cimag(*phase)); + tmp32_ = lv_cmake((float32_t)lv_creal(tmp16_), (float32_t)lv_cimag(tmp16_)) * (*phase); + tmp16_ = lv_cmake((int16_t)rintf(lv_creal(tmp32_)), (int16_t)rintf(lv_cimag(tmp32_))); + (*phase) *= phase_inc; + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + tmp = tmp16_ * in_a[n_vec][n]; + _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); + } + } +} + +#endif /* LV_HAVE_NEON */ + + +#ifdef LV_HAVE_NEON +#include +#include + +static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_neon_optvma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_32fc_t phase_inc, lv_32fc_t* phase, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) +{ + const unsigned int neon_iters = num_points / 4; + + const lv_16sc_t** _in_a = in_a; + const lv_16sc_t* _in_common = in_common; + lv_16sc_t* _out = result; + + lv_16sc_t tmp16_, tmp; + lv_32fc_t tmp32_; + + if (neon_iters > 0) + { + lv_16sc_t dotProduct = lv_cmake(0,0); + float arg_phase0 = cargf(*phase); + float arg_phase_inc = cargf(phase_inc); + float phase_est; + + lv_32fc_t ___phase4 = phase_inc * phase_inc * phase_inc * phase_inc; + __VOLK_ATTR_ALIGNED(16) float32_t __phase4_real[4] = { lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4), lv_creal(___phase4) }; + __VOLK_ATTR_ALIGNED(16) float32_t __phase4_imag[4] = { lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4), lv_cimag(___phase4) }; + + float32x4_t _phase4_real = vld1q_f32(__phase4_real); + float32x4_t _phase4_imag = vld1q_f32(__phase4_imag); + + lv_32fc_t phase2 = (lv_32fc_t)(*phase) * phase_inc; + lv_32fc_t phase3 = phase2 * phase_inc; + lv_32fc_t phase4 = phase3 * phase_inc; + + __VOLK_ATTR_ALIGNED(16) float32_t __phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) }; + __VOLK_ATTR_ALIGNED(16) float32_t __phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) }; + + float32x4_t _phase_real = vld1q_f32(__phase_real); + float32x4_t _phase_imag = vld1q_f32(__phase_imag); + + int16x4x2_t a_val, b_val; + __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; + float32x4_t half = vdupq_n_f32(0.5f); + int32x4x2_t tmp32i; + + float32x4x2_t tmp32f, tmp32_real, tmp32_imag; + float32x4_t sign, PlusHalf, Round; + + int16x4x2_t* accumulator1 = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); + int16x4x2_t* accumulator2 = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); + + for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + accumulator1[n_vec].val[0] = vdup_n_s16(0); + accumulator1[n_vec].val[1] = vdup_n_s16(0); + accumulator2[n_vec].val[0] = vdup_n_s16(0); + accumulator2[n_vec].val[1] = vdup_n_s16(0); + } + + for(unsigned int number = 0; number < neon_iters; number++) + { + /* load 4 complex numbers (int 16 bits each component) */ + b_val = vld2_s16((int16_t*)_in_common); + __builtin_prefetch(_in_common + 8); + _in_common += 4; + + /* promote them to int 32 bits */ + tmp32i.val[0] = vmovl_s16(b_val.val[0]); + tmp32i.val[1] = vmovl_s16(b_val.val[1]); + + /* promote them to float 32 bits */ + tmp32f.val[0] = vcvtq_f32_s32(tmp32i.val[0]); + tmp32f.val[1] = vcvtq_f32_s32(tmp32i.val[1]); + + /* complex multiplication of four complex samples (float 32 bits each component) */ + tmp32_real.val[0] = vmulq_f32(tmp32f.val[0], _phase_real); + tmp32_real.val[1] = vmulq_f32(tmp32f.val[1], _phase_imag); + tmp32_imag.val[0] = vmulq_f32(tmp32f.val[0], _phase_imag); + tmp32_imag.val[1] = vmulq_f32(tmp32f.val[1], _phase_real); + + tmp32f.val[0] = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]); + tmp32f.val[1] = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]); + + /* downcast results to int32 */ + /* in __aarch64__ we can do that with vcvtaq_s32_f32(ret1); vcvtaq_s32_f32(ret2); */ + sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[0]), 31))); + PlusHalf = vaddq_f32(tmp32f.val[0], half); + Round = vsubq_f32(PlusHalf, sign); + tmp32i.val[0] = vcvtq_s32_f32(Round); + + sign = vcvtq_f32_u32((vshrq_n_u32(vreinterpretq_u32_f32(tmp32f.val[1]), 31))); + PlusHalf = vaddq_f32(tmp32f.val[1], half); + Round = vsubq_f32(PlusHalf, sign); + tmp32i.val[1] = vcvtq_s32_f32(Round); + + /* downcast results to int16 */ + b_val.val[0] = vqmovn_s32(tmp32i.val[0]); + b_val.val[1] = vqmovn_s32(tmp32i.val[1]); + + /* compute next four phases */ + tmp32_real.val[0] = vmulq_f32(_phase_real, _phase4_real); + tmp32_real.val[1] = vmulq_f32(_phase_imag, _phase4_imag); + tmp32_imag.val[0] = vmulq_f32(_phase_real, _phase4_imag); + tmp32_imag.val[1] = vmulq_f32(_phase_imag, _phase4_real); + + _phase_real = vsubq_f32(tmp32_real.val[0], tmp32_real.val[1]); + _phase_imag = vaddq_f32(tmp32_imag.val[0], tmp32_imag.val[1]); + + // Regenerate phase + if ((number % 256) == 0) + { + //printf("computed phase: %f\n", cos(cargf(lv_cmake(_phase_real[0],_phase_imag[0])))); + phase_est = arg_phase0 + (number + 1) * 4 * arg_phase_inc; + //printf("Estimated phase: %f\n\n", cos(phase_est)); + + *phase = lv_cmake(cos(phase_est), sin(phase_est)); + phase2 = (lv_32fc_t)(*phase) * phase_inc; + phase3 = phase2 * phase_inc; + phase4 = phase3 * phase_inc; + + __VOLK_ATTR_ALIGNED(16) float32_t ____phase_real[4] = { lv_creal((*phase)), lv_creal(phase2), lv_creal(phase3), lv_creal(phase4) }; + __VOLK_ATTR_ALIGNED(16) float32_t ____phase_imag[4] = { lv_cimag((*phase)), lv_cimag(phase2), lv_cimag(phase3), lv_cimag(phase4) }; + + _phase_real = vld1q_f32(____phase_real); + _phase_imag = vld1q_f32(____phase_imag); + } + + vst1q_f32((float32_t*)__phase_real, _phase_real); + vst1q_f32((float32_t*)__phase_imag, _phase_imag); + + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); + + // use 2 accumulators to remove inter-instruction data dependencies + accumulator1[n_vec].val[0] = vmla_s16(accumulator1[n_vec].val[0], a_val.val[0], b_val.val[0]); + accumulator1[n_vec].val[1] = vmla_s16(accumulator1[n_vec].val[1], a_val.val[0], b_val.val[1]); + accumulator2[n_vec].val[0] = vmls_s16(accumulator2[n_vec].val[0], a_val.val[1], b_val.val[1]); + accumulator2[n_vec].val[1] = vmla_s16(accumulator2[n_vec].val[1], a_val.val[1], b_val.val[0]); + } + } + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + accumulator1[n_vec].val[0] = vqadd_s16(accumulator1[n_vec].val[0], accumulator2[n_vec].val[0]); + accumulator1[n_vec].val[1] = vqadd_s16(accumulator1[n_vec].val[1], accumulator2[n_vec].val[1]); + } + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + vst2_s16((int16_t*)dotProductVector, accumulator1[n_vec]); // Store the results back into the dot product vector + dotProduct = lv_cmake(0,0); + for (int i = 0; i < 4; ++i) + { + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + } + _out[n_vec] = dotProduct; + } + volk_gnsssdr_free(accumulator1); + volk_gnsssdr_free(accumulator2); vst1q_f32((float32_t*)__phase_real, _phase_real); vst1q_f32((float32_t*)__phase_imag, _phase_imag); From 485a405bab9f2ae06fb7edd19ceda084039f213f Mon Sep 17 00:00:00 2001 From: Carles Fernandez Date: Sun, 20 Mar 2016 13:11:53 +0100 Subject: [PATCH 6/6] Adding new neon kernel and solving x86 issues Managing memory with volk_gnsssdr instead of malloc and free. This seems to solve runtime problems (segmentation faults) in i386 (32 bit) architectures. --- .../volk_gnsssdr_16ic_x2_dot_prod_16ic.h | 57 +++++++- .../volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h | 125 +++++++++++++++--- ...olk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h | 28 +++- ...gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h | 3 +- 4 files changed, 187 insertions(+), 26 deletions(-) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h index 5f760c2c7..4ec49d645 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic.h @@ -336,8 +336,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_vma(lv_16sc_t* out, c tmp.val[0] = vmls_s16(tmp.val[0], a_val.val[1], b_val.val[1]); tmp.val[1] = vmla_s16(tmp.val[1], a_val.val[0], b_val.val[1]); - accumulator.val[0] = vadd_s16(accumulator.val[0], tmp.val[0]); - accumulator.val[1] = vadd_s16(accumulator.val[1], tmp.val[1]); + accumulator.val[0] = vqadd_s16(accumulator.val[0], tmp.val[0]); + accumulator.val[1] = vqadd_s16(accumulator.val[1], tmp.val[1]); a_ptr += 4; b_ptr += 4; @@ -355,4 +355,57 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_vma(lv_16sc_t* out, c #endif /* LV_HAVE_NEON */ + +#ifdef LV_HAVE_NEON +#include + +static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_neon_optvma(lv_16sc_t* out, const lv_16sc_t* in_a, const lv_16sc_t* in_b, unsigned int num_points) +{ + unsigned int quarter_points = num_points / 4; + unsigned int number; + + lv_16sc_t* a_ptr = (lv_16sc_t*) in_a; + lv_16sc_t* b_ptr = (lv_16sc_t*) in_b; + // for 2-lane vectors, 1st lane holds the real part, + // 2nd lane holds the imaginary part + int16x4x2_t a_val, b_val, accumulator1, accumulator2; + + __VOLK_ATTR_ALIGNED(16) lv_16sc_t accum_result[4]; + accumulator1.val[0] = vdup_n_s16(0); + accumulator1.val[1] = vdup_n_s16(0); + accumulator2.val[0] = vdup_n_s16(0); + accumulator2.val[1] = vdup_n_s16(0); + + for(number = 0; number < quarter_points; ++number) + { + a_val = vld2_s16((int16_t*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i + b_val = vld2_s16((int16_t*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i + __builtin_prefetch(a_ptr + 8); + __builtin_prefetch(b_ptr + 8); + + // use 2 accumulators to remove inter-instruction data dependencies + accumulator1.val[0] = vmla_s16(accumulator1.val[0], a_val.val[0], b_val.val[0]); + accumulator1.val[1] = vmla_s16(accumulator1.val[1], a_val.val[0], b_val.val[1]); + accumulator2.val[0] = vmls_s16(accumulator2.val[0], a_val.val[1], b_val.val[1]); + accumulator2.val[1] = vmla_s16(accumulator2.val[1], a_val.val[1], b_val.val[0]); + + a_ptr += 4; + b_ptr += 4; + } + + accumulator1.val[0] = vqadd_s16(accumulator1.val[0], accumulator2.val[0]); + accumulator1.val[1] = vqadd_s16(accumulator1.val[1], accumulator2.val[1]); + + vst2_s16((int16_t*)accum_result, accumulator1); + *out = accum_result[0] + accum_result[1] + accum_result[2] + accum_result[3]; + + // tail case + for(number = quarter_points * 4; number < num_points; ++number) + { + *out += (*a_ptr++) * (*b_ptr++); + } +} + +#endif /* LV_HAVE_NEON */ + #endif /*INCLUDED_volk_gnsssdr_16ic_x2_dot_prod_16ic_H*/ diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h index 6d27bf97a..341b9ebcb 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dot_prod_16ic_xn.h @@ -62,6 +62,8 @@ #include +#include +#include #include #ifdef LV_HAVE_GENERIC @@ -120,11 +122,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul { __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; - __m128i* realcacc; - __m128i* imagcacc; + __m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); + __m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); - realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 - imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + realcacc[n_vec] = _mm_setzero_si128(); + imagcacc[n_vec] = _mm_setzero_si128(); + } __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag; @@ -176,8 +181,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_a_sse2(lv_16sc_t* resul } _out[n_vec] = dotProduct; } - free(realcacc); - free(imagcacc); + volk_gnsssdr_free(realcacc); + volk_gnsssdr_free(imagcacc); } for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) @@ -211,11 +216,14 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul { __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; - __m128i* realcacc; - __m128i* imagcacc; + __m128i* realcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); + __m128i* imagcacc = (__m128i*)volk_gnsssdr_malloc(num_a_vectors * sizeof(__m128i), volk_gnsssdr_get_alignment()); - realcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 - imagcacc = (__m128i*)calloc(num_a_vectors, sizeof(__m128i)); //calloc also sets memory to 0 + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + realcacc[n_vec] = _mm_setzero_si128(); + imagcacc[n_vec] = _mm_setzero_si128(); + } __m128i a, b, c, c_sr, mask_imag, mask_real, real, imag; @@ -246,7 +254,6 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul realcacc[n_vec] = _mm_adds_epi16(realcacc[n_vec], real); imagcacc[n_vec] = _mm_adds_epi16(imagcacc[n_vec], imag); - } _in_common += 4; } @@ -267,8 +274,8 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_u_sse2(lv_16sc_t* resul } _out[n_vec] = dotProduct; } - free(realcacc); - free(imagcacc); + volk_gnsssdr_free(realcacc); + volk_gnsssdr_free(imagcacc); } for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) @@ -304,9 +311,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result, int16x4x2_t a_val, b_val, c_val; - //todo dyn mem reg - int16x4x2_t* accumulator; - accumulator = (int16x4x2_t*)calloc(num_a_vectors, sizeof(int16x4x2_t)); + int16x4x2_t* accumulator = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); int16x4x2_t tmp_real, tmp_imag; @@ -357,7 +362,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon(lv_16sc_t* result, } _out[n_vec] = dotProduct; } - free(accumulator); + volk_gnsssdr_free(accumulator); } for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) @@ -393,8 +398,7 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res int16x4x2_t a_val, b_val, tmp; - int16x4x2_t* accumulator; - accumulator = (int16x4x2_t*)malloc(num_a_vectors * sizeof(int16x4x2_t)); + int16x4x2_t* accumulator = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) { @@ -434,7 +438,88 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_vma(lv_16sc_t* res } _out[n_vec] = dotProduct; } - free(accumulator); + volk_gnsssdr_free(accumulator); + } + + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + for(unsigned int n = neon_iters * 4; n < num_points; n++) + { + lv_16sc_t tmp = in_common[n] * in_a[n_vec][n]; + + _out[n_vec] = lv_cmake(sat_adds16i(lv_creal(_out[n_vec]), lv_creal(tmp)), + sat_adds16i(lv_cimag(_out[n_vec]), lv_cimag(tmp))); + } + } +} +#endif /* LV_HAVE_NEON */ + + +#ifdef LV_HAVE_NEON +#include + +static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) +{ + lv_16sc_t dotProduct = lv_cmake(0,0); + + const unsigned int neon_iters = num_points / 4; + + const lv_16sc_t** _in_a = in_a; + const lv_16sc_t* _in_common = in_common; + lv_16sc_t* _out = result; + + if (neon_iters > 0) + { + __VOLK_ATTR_ALIGNED(16) lv_16sc_t dotProductVector[4]; + + int16x4x2_t a_val, b_val; + + int16x4x2_t* accumulator1 = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); + int16x4x2_t* accumulator2 = (int16x4x2_t*)volk_gnsssdr_malloc(num_a_vectors * sizeof(int16x4x2_t), volk_gnsssdr_get_alignment()); + + for(int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + accumulator1[n_vec].val[0] = vdup_n_s16(0); + accumulator1[n_vec].val[1] = vdup_n_s16(0); + accumulator2[n_vec].val[0] = vdup_n_s16(0); + accumulator2[n_vec].val[1] = vdup_n_s16(0); + } + + for(unsigned int number = 0; number < neon_iters; number++) + { + b_val = vld2_s16((int16_t*)_in_common); //load (2 byte imag, 2 byte real) x 4 into 128 bits reg + __builtin_prefetch(_in_common + 8); + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + a_val = vld2_s16((int16_t*)&(_in_a[n_vec][number*4])); + + accumulator1[n_vec].val[0] = vmla_s16(accumulator1[n_vec].val[0], a_val.val[0], b_val.val[0]); + accumulator1[n_vec].val[1] = vmla_s16(accumulator1[n_vec].val[1], a_val.val[0], b_val.val[1]); + accumulator2[n_vec].val[0] = vmls_s16(accumulator2[n_vec].val[0], a_val.val[1], b_val.val[1]); + accumulator2[n_vec].val[1] = vmla_s16(accumulator2[n_vec].val[1], a_val.val[1], b_val.val[0]); + } + _in_common += 4; + } + + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + accumulator1[n_vec].val[0] = vqadd_s16(accumulator1[n_vec].val[0], accumulator2[n_vec].val[0]); + accumulator1[n_vec].val[1] = vqadd_s16(accumulator1[n_vec].val[1], accumulator2[n_vec].val[1]); + } + + for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) + { + vst2_s16((int16_t*)dotProductVector, accumulator1[n_vec]); // Store the results back into the dot product vector + dotProduct = lv_cmake(0,0); + for (int i = 0; i < 4; ++i) + { + dotProduct = lv_cmake(sat_adds16i(lv_creal(dotProduct), lv_creal(dotProductVector[i])), + sat_adds16i(lv_cimag(dotProduct), lv_cimag(dotProductVector[i]))); + } + _out[n_vec] = dotProduct; + } + volk_gnsssdr_free(accumulator1); + volk_gnsssdr_free(accumulator2); } for (int n_vec = 0; n_vec < num_a_vectors; n_vec++) diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h index 83c207524..8857f0c0d 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic.h @@ -110,7 +110,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_a_sse2(lv_16sc_t* r #endif /* SSE2 */ -#if LV_HAVE_SSE2 && LV_HAVE_64 +#if LV_HAVE_SSE2 static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_sse2(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points) { @@ -131,7 +131,7 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_u_sse2(lv_16sc_t* r volk_gnsssdr_free(in_a); } -#endif /* LV_HAVE_SSE2 && LV_HAVE_64 */ +#endif /* LV_HAVE_SSE2 */ #ifdef LV_HAVE_NEON @@ -180,6 +180,30 @@ static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_vma(lv_16sc_t* } #endif // NEON + +#ifdef LV_HAVE_NEON + +static inline void volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_neon_optvma(lv_16sc_t* result, const lv_16sc_t* local_code, const lv_16sc_t* in, unsigned int num_points) +{ + int num_a_vectors = 3; + lv_16sc_t** in_a = (lv_16sc_t**)volk_gnsssdr_malloc(sizeof(lv_16sc_t*) * num_a_vectors, volk_gnsssdr_get_alignment()); + for(unsigned int n = 0; n < num_a_vectors; n++) + { + in_a[n] = (lv_16sc_t*)volk_gnsssdr_malloc(sizeof(lv_16sc_t)*num_points, volk_gnsssdr_get_alignment()); + memcpy((lv_16sc_t*)in_a[n], (lv_16sc_t*)in, sizeof(lv_16sc_t)*num_points); + } + + volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(result, local_code, (const lv_16sc_t**) in_a, num_a_vectors, num_points); + + for(unsigned int n = 0; n < num_a_vectors; n++) + { + volk_gnsssdr_free(in_a[n]); + } + volk_gnsssdr_free(in_a); +} + +#endif // NEON + #endif // INCLUDED_volk_gnsssdr_16ic_x2_dotprodxnpuppet_16ic_H diff --git a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h index cebacb8e3..362ca16db 100644 --- a/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h +++ b/src/algorithms/libs/volk_gnsssdr_module/volk_gnsssdr/kernels/volk_gnsssdr/volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn.h @@ -71,9 +71,8 @@ #include #include -#include #include -//#include +#include #ifdef LV_HAVE_GENERIC