mirror of
https://github.com/gnss-sdr/gnss-sdr
synced 2025-11-18 16:15:21 +00:00
Fix various linting errors, use clang-format-19
Fix various linting errors, both for cpplint and then clang-format. For the latter, used clang-format-19 to quickly fix it all. Signed-off-by: Marcus Alagar <mvala079@gmail.com>
This commit is contained in:
@@ -791,24 +791,22 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
size_t ROTATOR_RELOAD = 256;
|
||||
|
||||
// Initialize reference pointers of compatible type that will not be stripmined
|
||||
float* phasePtr = (float*) phase;
|
||||
float* phasePtr = (float*)phase;
|
||||
|
||||
// Initialize pointers of compatible type to track progress as stripmine
|
||||
short* outPtr = (short*) result;
|
||||
const short* comPtr = (const short*) in_common;
|
||||
const short** inPtrBuf = (const short**) volk_gnsssdr_malloc(
|
||||
num_a_vectors * sizeof(*in_a), volk_gnsssdr_get_alignment()
|
||||
);
|
||||
short* outPtr = (short*)result;
|
||||
const short* comPtr = (const short*)in_common;
|
||||
const short** inPtrBuf = (const short**)volk_gnsssdr_malloc(num_a_vectors * sizeof(*in_a), volk_gnsssdr_get_alignment());
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
// Initialize `out` to zero
|
||||
result[n_vec] = lv_cmake(0, 0);
|
||||
{
|
||||
// Initialize `out` to zero
|
||||
result[n_vec] = lv_cmake(0, 0);
|
||||
|
||||
// Create copies in case pointers within `in_a` are not meant to be modified
|
||||
// (in which case function signature should be `const short* const* in_a`)
|
||||
inPtrBuf[n_vec] = in_a[n_vec];
|
||||
}
|
||||
// Create copies in case pointers within `in_a` are not meant to be modified
|
||||
// (in which case function signature should be `const short* const* in_a`)
|
||||
inPtrBuf[n_vec] = in_a[n_vec];
|
||||
}
|
||||
|
||||
for (int _ = 0; _ < num_points / ROTATOR_RELOAD; _++)
|
||||
{
|
||||
@@ -882,8 +880,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
vint16m2_t outImagVal = __riscv_vmul_vv_i16m2(inVal, comProdImagVal, vl);
|
||||
|
||||
// Load accumulator
|
||||
vint32m1_t accRealVal = __riscv_vmv_s_x_i32m1((int) outPtr[2 * n_vec], 1);
|
||||
vint32m1_t accImagVal = __riscv_vmv_s_x_i32m1((int) outPtr[2 * n_vec + 1], 1);
|
||||
vint32m1_t accRealVal = __riscv_vmv_s_x_i32m1((int)outPtr[2 * n_vec], 1);
|
||||
vint32m1_t accImagVal = __riscv_vmv_s_x_i32m1((int)outPtr[2 * n_vec + 1], 1);
|
||||
|
||||
// acc[0] = sum( acc[0], out[0..vl) )
|
||||
accRealVal = __riscv_vwredsum_vs_i16m2_i32m1(outRealVal, accRealVal, vl);
|
||||
@@ -898,8 +896,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
accImagVal = __riscv_vmax_vx_i32m1(accImagVal, -32768, 1);
|
||||
|
||||
// Store acc[0]
|
||||
outPtr[2 * n_vec] = (short) __riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
outPtr[2 * n_vec + 1] = (short) __riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
outPtr[2 * n_vec] = (short)__riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
outPtr[2 * n_vec + 1] = (short)__riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
|
||||
// Increment this pointer
|
||||
inPtrBuf[n_vec] += vl;
|
||||
@@ -918,7 +916,7 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
// elements left and increment the pointers
|
||||
// by the number of elements processed
|
||||
}
|
||||
// Regenerate phase
|
||||
// Regenerate phase
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
@@ -996,8 +994,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
vint16m2_t outImagVal = __riscv_vmul_vv_i16m2(inVal, comProdImagVal, vl);
|
||||
|
||||
// Load accumulator
|
||||
vint32m1_t accRealVal = __riscv_vmv_s_x_i32m1((int) outPtr[2 * n_vec], 1);
|
||||
vint32m1_t accImagVal = __riscv_vmv_s_x_i32m1((int) outPtr[2 * n_vec + 1], 1);
|
||||
vint32m1_t accRealVal = __riscv_vmv_s_x_i32m1((int)outPtr[2 * n_vec], 1);
|
||||
vint32m1_t accImagVal = __riscv_vmv_s_x_i32m1((int)outPtr[2 * n_vec + 1], 1);
|
||||
|
||||
// acc[0] = sum( acc[0], out[0..vl) )
|
||||
accRealVal = __riscv_vwredsum_vs_i16m2_i32m1(outRealVal, accRealVal, vl);
|
||||
@@ -1012,8 +1010,8 @@ static inline void volk_gnsssdr_16ic_16i_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
accImagVal = __riscv_vmax_vx_i32m1(accImagVal, -32768, 1);
|
||||
|
||||
// Store acc[0]
|
||||
outPtr[2 * n_vec] = (short) __riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
outPtr[2 * n_vec + 1] = (short) __riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
outPtr[2 * n_vec] = (short)__riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
outPtr[2 * n_vec + 1] = (short)__riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
|
||||
// Increment this pointer
|
||||
inPtrBuf[n_vec] += vl;
|
||||
|
||||
@@ -195,8 +195,8 @@ static inline void volk_gnsssdr_16ic_conjugate_16ic_rvv(lv_16sc_t* cVector, cons
|
||||
size_t n = num_points;
|
||||
|
||||
// Initialize pointers to keep track as stripmine
|
||||
short* cPtr = (short*) cVector;
|
||||
const short* aPtr = (short*) aVector;
|
||||
short* cPtr = (short*)cVector;
|
||||
const short* aPtr = (short*)aVector;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, cPtr += vl * 2, aPtr += vl * 2)
|
||||
{
|
||||
|
||||
@@ -275,8 +275,8 @@ static inline void volk_gnsssdr_16ic_convert_32fc_rvv(lv_32fc_t* outputVector, c
|
||||
size_t n = num_points * 2;
|
||||
|
||||
// Initialize pointers to keep track as stripmine
|
||||
float* outPtr = (float*) outputVector;
|
||||
const short* inPtr = (const short*) inputVector;
|
||||
float* outPtr = (float*)outputVector;
|
||||
const short* inPtr = (const short*)inputVector;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, outPtr += vl, inPtr += vl)
|
||||
{
|
||||
|
||||
@@ -340,14 +340,14 @@ static inline void volk_gnsssdr_16ic_resampler_fast_16ic_rvv(lv_16sc_t* result,
|
||||
// of each complex number as a single 32-bit number to move around
|
||||
|
||||
// Initialize reference pointer, as stays same and not stripmined
|
||||
const int* inPtr = (const int*) local_code;
|
||||
const int* inPtr = (const int*)local_code;
|
||||
|
||||
size_t n = num_output_samples;
|
||||
|
||||
const float constIndexShift = rem_code_phase_chips;
|
||||
|
||||
// Initialize pointer to track progress as stripmine
|
||||
int* outPtr = (int*) result;
|
||||
int* outPtr = (int*)result;
|
||||
// Simulates how, compared to generic implementation, `i` continues
|
||||
// increasing across different vector computation batches
|
||||
unsigned int currI = 0;
|
||||
|
||||
@@ -969,12 +969,12 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_rvv(lv_16sc_t* outVec
|
||||
size_t ROTATOR_RELOAD = 512;
|
||||
|
||||
// Initialize reference pointers of compatible type that will not be stripmined
|
||||
float* phasePtr = (float*) phase;
|
||||
float* phaseIncPtr = (float*) phase_inc;
|
||||
float* phasePtr = (float*)phase;
|
||||
float* phaseIncPtr = (float*)phase_inc;
|
||||
|
||||
// Initialize pointers of compatible type to track progress as stripmine
|
||||
short* outPtr = (short*) outVector;
|
||||
const short* inPtr = (const short*) inVector;
|
||||
short* outPtr = (short*)outVector;
|
||||
const short* inPtr = (const short*)inVector;
|
||||
|
||||
for (int _ = 0; _ < num_points / ROTATOR_RELOAD; _++)
|
||||
{
|
||||
@@ -1058,7 +1058,7 @@ static inline void volk_gnsssdr_16ic_s32fc_x2_rotator_16ic_rvv(lv_16sc_t* outVec
|
||||
// numbers are each stored as two 16-bit numbers
|
||||
}
|
||||
|
||||
// Regenerate phase
|
||||
// Regenerate phase
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
|
||||
@@ -565,11 +565,11 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_rvv(lv_16sc_t* result, con
|
||||
|
||||
// Explicitly cast in order to directly fill
|
||||
// with calculated values
|
||||
short* resPtr = (short*) result;
|
||||
short* resPtr = (short*)result;
|
||||
|
||||
// Initialize pointers to track progress as stripmine
|
||||
const short* aPtr = (const short*) in_a;
|
||||
const short* bPtr = (const short*) in_b;
|
||||
const short* aPtr = (const short*)in_a;
|
||||
const short* bPtr = (const short*)in_b;
|
||||
|
||||
// Use 32-bit accumulator in order to saturate
|
||||
// to 16 bits
|
||||
@@ -623,9 +623,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_rvv(lv_16sc_t* result, con
|
||||
}
|
||||
|
||||
// Real part of resultant complex number
|
||||
resPtr[0] = (short) __riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
resPtr[0] = (short)__riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
// Imaginary part of resultant complex number
|
||||
resPtr[1] = (short) __riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
resPtr[1] = (short)__riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
}
|
||||
#endif /* LV_HAVE_RVV */
|
||||
|
||||
|
||||
@@ -726,18 +726,19 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_neon_optvma(lv_16sc_t*
|
||||
#ifdef LV_HAVE_RVV
|
||||
#include <riscv_vector.h>
|
||||
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_rvv(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points) {
|
||||
static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_rvv(lv_16sc_t* result, const lv_16sc_t* in_common, const lv_16sc_t** in_a, int num_a_vectors, unsigned int num_points)
|
||||
{
|
||||
int n_vec = num_a_vectors;
|
||||
|
||||
for (int i = 0; i < n_vec; i++)
|
||||
{
|
||||
size_t n = num_points;
|
||||
|
||||
short* resPtr = (short*) &result[i];
|
||||
short* resPtr = (short*)&result[i];
|
||||
|
||||
// Initialize pointers to track progress as stripmine
|
||||
const short* comPtr = (const short*) in_common;
|
||||
const short* aPtr = (const short*) in_a[i];
|
||||
const short* comPtr = (const short*)in_common;
|
||||
const short* aPtr = (const short*)in_a[i];
|
||||
|
||||
// Use 32-bit accumulator in order to saturate
|
||||
// to 16 bits
|
||||
@@ -791,9 +792,9 @@ static inline void volk_gnsssdr_16ic_x2_dot_prod_16ic_xn_rvv(lv_16sc_t* result,
|
||||
}
|
||||
|
||||
// Real part of resultant complex number
|
||||
resPtr[0] = (short) __riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
resPtr[0] = (short)__riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
// Imaginary part of resultant complex number
|
||||
resPtr[1] = (short) __riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
resPtr[1] = (short)__riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_RVV */
|
||||
|
||||
@@ -335,9 +335,9 @@ static inline void volk_gnsssdr_16ic_x2_multiply_16ic_rvv(lv_16sc_t* result, con
|
||||
size_t n = num_points;
|
||||
|
||||
// Initialize pointers to keep track as stripmine
|
||||
short* resPtr = (short*) result;
|
||||
const short* aPtr = (const short*) in_a;
|
||||
const short* bPtr = (const short*) in_b;
|
||||
short* resPtr = (short*)result;
|
||||
const short* aPtr = (const short*)in_a;
|
||||
const short* bPtr = (const short*)in_b;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, resPtr += vl * 2, aPtr += vl * 2, bPtr += vl * 2)
|
||||
{
|
||||
|
||||
@@ -1869,24 +1869,23 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
size_t ROTATOR_RELOAD = 256;
|
||||
|
||||
// Initialize reference pointers of compatible type that will not be stripmined
|
||||
float* phasePtr = (float*) phase;
|
||||
float* phasePtr = (float*)phase;
|
||||
|
||||
// Initialize pointers of compatible type to track progress as stripmine
|
||||
short* outPtr = (short*) result;
|
||||
const short* comPtr = (const short*) in_common;
|
||||
const short** inPtrBuf = (const short**) volk_gnsssdr_malloc(
|
||||
num_a_vectors * sizeof(*in_a), volk_gnsssdr_get_alignment()
|
||||
);
|
||||
short* outPtr = (short*)result;
|
||||
const short* comPtr = (const short*)in_common;
|
||||
const short** inPtrBuf = (const short**)volk_gnsssdr_malloc(
|
||||
num_a_vectors * sizeof(*in_a), volk_gnsssdr_get_alignment());
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
// Initialize `out` to zero
|
||||
result[n_vec] = lv_cmake(0, 0);
|
||||
{
|
||||
// Initialize `out` to zero
|
||||
result[n_vec] = lv_cmake(0, 0);
|
||||
|
||||
// Treat complex number as struct containting
|
||||
// two 16-bit integers
|
||||
inPtrBuf[n_vec] = (const short*) in_a[n_vec];
|
||||
}
|
||||
// Treat complex number as struct containting
|
||||
// two 16-bit integers
|
||||
inPtrBuf[n_vec] = (const short*)in_a[n_vec];
|
||||
}
|
||||
|
||||
for (int _ = 0; _ < num_points / ROTATOR_RELOAD; _++)
|
||||
{
|
||||
@@ -1964,8 +1963,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
outImagVal = __riscv_vmacc_vv_i16m2(outImagVal, inImagVal, comProdRealVal, vl);
|
||||
|
||||
// Load accumulator
|
||||
vint32m1_t accRealVal = __riscv_vmv_s_x_i32m1((int) outPtr[2 * n_vec], 1);
|
||||
vint32m1_t accImagVal = __riscv_vmv_s_x_i32m1((int) outPtr[2 * n_vec + 1], 1);
|
||||
vint32m1_t accRealVal = __riscv_vmv_s_x_i32m1((int)outPtr[2 * n_vec], 1);
|
||||
vint32m1_t accImagVal = __riscv_vmv_s_x_i32m1((int)outPtr[2 * n_vec + 1], 1);
|
||||
|
||||
// acc[0] = sum( acc[0], out[0..vl) )
|
||||
accRealVal = __riscv_vwredsum_vs_i16m2_i32m1(outRealVal, accRealVal, vl);
|
||||
@@ -1980,8 +1979,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
accImagVal = __riscv_vmax_vx_i32m1(accImagVal, -32768, 1);
|
||||
|
||||
// Store acc[0]
|
||||
outPtr[2 * n_vec] = (short) __riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
outPtr[2 * n_vec + 1] = (short) __riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
outPtr[2 * n_vec] = (short)__riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
outPtr[2 * n_vec + 1] = (short)__riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
|
||||
// Increment this pointer, accounting how each complex
|
||||
// element is two 16-bit integer numbers
|
||||
@@ -2001,7 +2000,7 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
// elements left and increment the pointers
|
||||
// by the number of elements processed
|
||||
}
|
||||
// Regenerate phase
|
||||
// Regenerate phase
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
@@ -2083,8 +2082,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
outImagVal = __riscv_vmacc_vv_i16m2(outImagVal, inImagVal, comProdRealVal, vl);
|
||||
|
||||
// Load accumulator
|
||||
vint32m1_t accRealVal = __riscv_vmv_s_x_i32m1((int) outPtr[2 * n_vec], 1);
|
||||
vint32m1_t accImagVal = __riscv_vmv_s_x_i32m1((int) outPtr[2 * n_vec + 1], 1);
|
||||
vint32m1_t accRealVal = __riscv_vmv_s_x_i32m1((int)outPtr[2 * n_vec], 1);
|
||||
vint32m1_t accImagVal = __riscv_vmv_s_x_i32m1((int)outPtr[2 * n_vec + 1], 1);
|
||||
|
||||
// acc[0] = sum( acc[0], out[0..vl) )
|
||||
accRealVal = __riscv_vwredsum_vs_i16m2_i32m1(outRealVal, accRealVal, vl);
|
||||
@@ -2099,8 +2098,8 @@ static inline void volk_gnsssdr_16ic_x2_rotator_dot_prod_16ic_xn_rvv(lv_16sc_t*
|
||||
accImagVal = __riscv_vmax_vx_i32m1(accImagVal, -32768, 1);
|
||||
|
||||
// Store acc[0]
|
||||
outPtr[2 * n_vec] = (short) __riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
outPtr[2 * n_vec + 1] = (short) __riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
outPtr[2 * n_vec] = (short)__riscv_vmv_x_s_i32m1_i32(accRealVal);
|
||||
outPtr[2 * n_vec + 1] = (short)__riscv_vmv_x_s_i32m1_i32(accImagVal);
|
||||
|
||||
// Increment this pointer, accounting how each complex
|
||||
// element is two 16-bit integer numbers
|
||||
|
||||
@@ -605,7 +605,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_rvv(lv_16sc_t** result
|
||||
// of each complex number as a single 32-bit number to move around
|
||||
|
||||
// Initialize reference pointer, as stays same across loops
|
||||
const int* inPtr = (const int*) local_code;
|
||||
const int* inPtr = (const int*)local_code;
|
||||
|
||||
for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
@@ -614,7 +614,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_16ic_xn_rvv(lv_16sc_t** result
|
||||
const float constIndexShift = shifts_chips[current_correlator_tap] - rem_code_phase_chips;
|
||||
|
||||
// Initialize pointers to track progress as stripmine
|
||||
int* outPtr = (int*) result[current_correlator_tap];
|
||||
int* outPtr = (int*)result[current_correlator_tap];
|
||||
// Simulates how, compared to generic implementation, `i` continues
|
||||
// increasing across different vector computatation batches
|
||||
unsigned int currI = 0;
|
||||
|
||||
@@ -382,10 +382,10 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_rvv(lv_16sc_t** r
|
||||
// of each complex number as a single 32-bit number to move around
|
||||
|
||||
// Initialize reference pointer, as stays same and not stripmined
|
||||
const int* inPtr = (const int*) local_code;
|
||||
const int* inPtr = (const int*)local_code;
|
||||
|
||||
// Initialize variable to clearer, applicable type
|
||||
int code_len = (int) code_length_chips;
|
||||
int code_len = (int)code_length_chips;
|
||||
|
||||
for (int current_vector = 0; current_vector < num_out_vectors; current_vector++)
|
||||
{
|
||||
@@ -394,7 +394,7 @@ static inline void volk_gnsssdr_16ic_xn_resampler_fast_16ic_xn_rvv(lv_16sc_t** r
|
||||
const float constIndexShift = rem_code_phase_chips[current_vector];
|
||||
|
||||
// Initialize pointer to track progress as stripmine
|
||||
int* outPtr = (int*) result[current_vector];
|
||||
int* outPtr = (int*)result[current_vector];
|
||||
// Simulates how, compared to generic implementation, `i` continues
|
||||
// increasing across different vector computation batches
|
||||
unsigned int currI = 0;
|
||||
|
||||
@@ -755,7 +755,7 @@ static inline void volk_gnsssdr_32f_sincos_32fc_rvv(lv_32fc_t* out, const float*
|
||||
size_t n = num_points;
|
||||
|
||||
// Initialize pointers to keep track as stripmine
|
||||
float* outPtr = (float*) out;
|
||||
float* outPtr = (float*)out;
|
||||
const float* inPtr = in;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, outPtr += vl * 2, inPtr += vl)
|
||||
@@ -768,7 +768,7 @@ static inline void volk_gnsssdr_32f_sincos_32fc_rvv(lv_32fc_t* out, const float*
|
||||
|
||||
// Save initial signs
|
||||
// signMask[i] = in[i] < 0
|
||||
vbool8_t signMask = __riscv_vmflt_vf_f32m4_b8(inVal, (float) 0, vl);
|
||||
vbool8_t signMask = __riscv_vmflt_vf_f32m4_b8(inVal, (float)0, vl);
|
||||
|
||||
// x[i] = |in[i]|
|
||||
vfloat32m4_t xVal = __riscv_vfabs_v_f32m4(inVal, vl);
|
||||
@@ -843,11 +843,9 @@ static inline void volk_gnsssdr_32f_sincos_32fc_rvv(lv_32fc_t* out, const float*
|
||||
// outImag[i] = sinSignMask ? -sin[i] : sin[i]
|
||||
// outReal[i] = cosSignMask ? cos[i] : -cos[i]
|
||||
vfloat32m4_t outImagVal = __riscv_vmerge_vvm_f32m4(
|
||||
sinVal, __riscv_vfneg_v_f32m4(sinVal, vl), sinSignMask, vl
|
||||
);
|
||||
sinVal, __riscv_vfneg_v_f32m4(sinVal, vl), sinSignMask, vl);
|
||||
vfloat32m4_t outRealVal = __riscv_vmerge_vvm_f32m4(
|
||||
__riscv_vfneg_v_f32m4(cosVal, vl), cosVal, cosSignMask, vl
|
||||
);
|
||||
__riscv_vfneg_v_f32m4(cosVal, vl), cosVal, cosSignMask, vl);
|
||||
|
||||
// Store out[0..vl)
|
||||
vfloat32m4x2_t outVal = __riscv_vcreate_v_f32m4x2(outRealVal, outImagVal);
|
||||
@@ -856,7 +854,7 @@ static inline void volk_gnsssdr_32f_sincos_32fc_rvv(lv_32fc_t* out, const float*
|
||||
// In looping, decrement the number of
|
||||
// elements left and increment the pointers
|
||||
// by the number of elements processed,
|
||||
// taking into account how the output `vl`
|
||||
// taking into account how the output `vl`
|
||||
// complex numbers are stored as 2 `float`s
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ static inline void volk_gnsssdr_32f_xn_high_dynamics_resampler_32f_xn_generic(fl
|
||||
if (local_code_chip_index < 0) local_code_chip_index += (int)code_length_chips * (abs(local_code_chip_index) / code_length_chips + 1);
|
||||
local_code_chip_index = local_code_chip_index % code_length_chips;
|
||||
result[0][n] = local_code[local_code_chip_index];
|
||||
}
|
||||
}
|
||||
|
||||
// adjacent correlators
|
||||
unsigned int shift_samples = 0;
|
||||
|
||||
@@ -492,24 +492,23 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_rvv(lv_32fc_t*
|
||||
size_t ROTATOR_RELOAD = 256;
|
||||
|
||||
// Initialize reference pointers of compatible type that will not be stripmined
|
||||
float* phasePtr = (float*) phase;
|
||||
float* phasePtr = (float*)phase;
|
||||
|
||||
// Initialize pointers of compatible type to track progress as stripmine
|
||||
float* outPtr = (float*) result;
|
||||
const float* comPtr = (const float*) in_common;
|
||||
const float** inPtrBuf = (const float**) volk_gnsssdr_malloc(
|
||||
num_a_vectors * sizeof(*in_a), volk_gnsssdr_get_alignment()
|
||||
);
|
||||
float* outPtr = (float*)result;
|
||||
const float* comPtr = (const float*)in_common;
|
||||
const float** inPtrBuf = (const float**)volk_gnsssdr_malloc(
|
||||
num_a_vectors * sizeof(*in_a), volk_gnsssdr_get_alignment());
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
// Initialize `out` to zero
|
||||
result[n_vec] = lv_cmake(0.0f, 0.0f);
|
||||
{
|
||||
// Initialize `out` to zero
|
||||
result[n_vec] = lv_cmake(0.0f, 0.0f);
|
||||
|
||||
// Treat complex number as struct containting
|
||||
// two 16-bit integers
|
||||
inPtrBuf[n_vec] = in_a[n_vec];
|
||||
}
|
||||
// Treat complex number as struct containting
|
||||
// two 16-bit integers
|
||||
inPtrBuf[n_vec] = in_a[n_vec];
|
||||
}
|
||||
|
||||
for (int _ = 0; _ < num_points / ROTATOR_RELOAD; _++)
|
||||
{
|
||||
@@ -602,8 +601,8 @@ static inline void volk_gnsssdr_32fc_32f_rotator_dot_prod_32fc_xn_rvv(lv_32fc_t*
|
||||
// In looping, decrement the number of
|
||||
// elements left and increment the pointers
|
||||
// by the number of elements processed
|
||||
}
|
||||
// Regenerate phase
|
||||
}
|
||||
// Regenerate phase
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
|
||||
@@ -471,8 +471,8 @@ static inline void volk_gnsssdr_32fc_convert_16ic_rvv(lv_16sc_t* outputVector, c
|
||||
size_t n = num_points * 2;
|
||||
|
||||
// Initialize pointers to keep track as stripmine
|
||||
short* outPtr = (short*) outputVector;
|
||||
const float* inPtr = (const float*) inputVector;
|
||||
short* outPtr = (short*)outputVector;
|
||||
const float* inPtr = (const float*)inputVector;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, outPtr += vl, inPtr += vl)
|
||||
{
|
||||
@@ -485,8 +485,8 @@ static inline void volk_gnsssdr_32fc_convert_16ic_rvv(lv_16sc_t* outputVector, c
|
||||
vfloat32m8_t inVal = __riscv_vle32_v_f32m8(inPtr, vl);
|
||||
|
||||
// Saturate in[i] to 16 bits
|
||||
inVal = __riscv_vfmin_vf_f32m8(inVal, (float) 32767, vl);
|
||||
inVal = __riscv_vfmax_vf_f32m8(inVal, (float) -32768, vl);
|
||||
inVal = __riscv_vfmin_vf_f32m8(inVal, (float)32767, vl);
|
||||
inVal = __riscv_vfmax_vf_f32m8(inVal, (float)-32768, vl);
|
||||
|
||||
// out[i] = (short) in[i]
|
||||
vint16m4_t outVal = __riscv_vfncvt_x_f_w_i16m4(inVal, vl);
|
||||
|
||||
@@ -464,8 +464,8 @@ static inline void volk_gnsssdr_32fc_convert_8ic_rvv(lv_8sc_t* outputVector, con
|
||||
// Initialize pointers to keep track as stripmine
|
||||
// Assuming `signed char` is intended, as `char`'s
|
||||
// signedness is implementation-based
|
||||
signed char* outPtr = (signed char*) outputVector;
|
||||
const float* inPtr = (const float*) inputVector;
|
||||
signed char* outPtr = (signed char*)outputVector;
|
||||
const float* inPtr = (const float*)inputVector;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, outPtr += vl, inPtr += vl)
|
||||
{
|
||||
@@ -480,11 +480,11 @@ static inline void volk_gnsssdr_32fc_convert_8ic_rvv(lv_8sc_t* outputVector, con
|
||||
// For some reason, generic implementation
|
||||
// multiplies float by `INT8_MAX` before converting
|
||||
// tmp[i] *= INT8_MAX
|
||||
vfloat32m8_t tmp32Val = __riscv_vfmul_vf_f32m8(inVal, (float) 127, vl);
|
||||
vfloat32m8_t tmp32Val = __riscv_vfmul_vf_f32m8(inVal, (float)127, vl);
|
||||
|
||||
// Saturate tmp[i] to 8 bits
|
||||
tmp32Val = __riscv_vfmin_vf_f32m8(tmp32Val, (float) 127, vl);
|
||||
tmp32Val = __riscv_vfmax_vf_f32m8(tmp32Val, (float) -128, vl);
|
||||
tmp32Val = __riscv_vfmin_vf_f32m8(tmp32Val, (float)127, vl);
|
||||
tmp32Val = __riscv_vfmax_vf_f32m8(tmp32Val, (float)-128, vl);
|
||||
|
||||
// out[i] = (signed char) tmp[i]
|
||||
vint16m4_t tmp16Val = __riscv_vfncvt_x_f_w_i16m4(tmp32Val, vl);
|
||||
|
||||
@@ -797,24 +797,23 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_rvv(lv_32fc_t*
|
||||
size_t ROTATOR_RELOAD = 256;
|
||||
|
||||
// Initialize reference pointers of compatible type that will not be stripmined
|
||||
float* phasePtr = (float*) phase;
|
||||
float* phasePtr = (float*)phase;
|
||||
|
||||
// Initialize pointers of compatible type to track progress as stripmine
|
||||
float* outPtr = (float*) result;
|
||||
const float* comPtr = (const float*) in_common;
|
||||
const float** inPtrBuf = (const float**) volk_gnsssdr_malloc(
|
||||
num_a_vectors * sizeof(*in_a), volk_gnsssdr_get_alignment()
|
||||
);
|
||||
float* outPtr = (float*)result;
|
||||
const float* comPtr = (const float*)in_common;
|
||||
const float** inPtrBuf = (const float**)volk_gnsssdr_malloc(
|
||||
num_a_vectors * sizeof(*in_a), volk_gnsssdr_get_alignment());
|
||||
|
||||
for (int n_vec = 0; n_vec < num_a_vectors; n_vec++)
|
||||
{
|
||||
// Initialize `out` to zero
|
||||
result[n_vec] = lv_cmake(0.0f, 0.0f);
|
||||
{
|
||||
// Initialize `out` to zero
|
||||
result[n_vec] = lv_cmake(0.0f, 0.0f);
|
||||
|
||||
// Treat complex number as struct containting
|
||||
// two 16-bit integers
|
||||
inPtrBuf[n_vec] = (float*) in_a[n_vec];
|
||||
}
|
||||
// Treat complex number as struct containting
|
||||
// two 16-bit integers
|
||||
inPtrBuf[n_vec] = (float*)in_a[n_vec];
|
||||
}
|
||||
|
||||
for (int _ = 0; _ < num_points / ROTATOR_RELOAD; _++)
|
||||
{
|
||||
@@ -913,7 +912,7 @@ static inline void volk_gnsssdr_32fc_x2_rotator_dot_prod_32fc_xn_rvv(lv_32fc_t*
|
||||
// elements left and increment the pointers
|
||||
// by the number of elements processed
|
||||
}
|
||||
// Regenerate phase
|
||||
// Regenerate phase
|
||||
#ifdef __cplusplus
|
||||
(*phase) /= std::abs((*phase));
|
||||
#else
|
||||
|
||||
@@ -764,7 +764,7 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_rvv(lv_32fc_t** result
|
||||
// of each complex number as a single 64-bit number to move around
|
||||
|
||||
// Initialize reference pointer, as stays same across loops
|
||||
const long* inPtr = (const long*) local_code;
|
||||
const long* inPtr = (const long*)local_code;
|
||||
|
||||
for (int current_correlator_tap = 0; current_correlator_tap < num_out_vectors; current_correlator_tap++)
|
||||
{
|
||||
@@ -773,7 +773,7 @@ static inline void volk_gnsssdr_32fc_xn_resampler_32fc_xn_rvv(lv_32fc_t** result
|
||||
const float constIndexShift = shifts_chips[current_correlator_tap] - rem_code_phase_chips;
|
||||
|
||||
// Initialize pointers to track progress as stripmine
|
||||
long* outPtr = (long*) result[current_correlator_tap];
|
||||
long* outPtr = (long*)result[current_correlator_tap];
|
||||
// Simulates how, compared to generic implementation, `i` continues
|
||||
// increasing across different vector computatation batches
|
||||
unsigned int currI = 0;
|
||||
|
||||
@@ -233,7 +233,7 @@ static inline void volk_gnsssdr_64f_accumulator_64f_rvv(double* result, const do
|
||||
const double* inPtr = inputBuffer;
|
||||
|
||||
// acc[0] = 0
|
||||
vfloat64m1_t accVal = __riscv_vfmv_v_f_f64m1((double) 0, 1);
|
||||
vfloat64m1_t accVal = __riscv_vfmv_v_f_f64m1((double)0, 1);
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, inPtr += vl)
|
||||
{
|
||||
|
||||
@@ -228,7 +228,7 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_rvv(char* result, const char*
|
||||
|
||||
// Initialize pointer of correct type
|
||||
// to keep track while stripmining
|
||||
const signed char* inPtr = (const signed char*) inputBuffer;
|
||||
const signed char* inPtr = (const signed char*)inputBuffer;
|
||||
|
||||
// acc[0] = 0
|
||||
vint8m1_t accVal = __riscv_vmv_v_x_i8m1(0, 1);
|
||||
@@ -251,7 +251,7 @@ static inline void volk_gnsssdr_8i_accumulator_s8i_rvv(char* result, const char*
|
||||
}
|
||||
|
||||
// Explicitly cast to type accepted by macro
|
||||
signed char* resPtr = (signed char*) result;
|
||||
signed char* resPtr = (signed char*)result;
|
||||
|
||||
// *result = acc[0]
|
||||
// NOTE: With this implementation,
|
||||
|
||||
@@ -215,9 +215,9 @@ static inline void volk_gnsssdr_8i_x2_add_8i_rvv(char* cVector, const char* aVec
|
||||
// Initialize pointers to track progress as stripmine
|
||||
// Macro expects `int8_t`, and `char`'s signedness
|
||||
// depends on implementation
|
||||
signed char* cPtr = (signed char*) cVector; // For consistency
|
||||
const signed char* aPtr = (const signed char*) aVector;
|
||||
const signed char* bPtr = (const signed char*) bVector;
|
||||
signed char* cPtr = (signed char*)cVector; // For consistency
|
||||
const signed char* aPtr = (const signed char*)aVector;
|
||||
const signed char* bPtr = (const signed char*)bVector;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, cPtr += vl, aPtr += vl, bPtr += vl)
|
||||
{
|
||||
|
||||
@@ -363,8 +363,8 @@ static inline void volk_gnsssdr_8ic_conjugate_8ic_rvv(lv_8sc_t* cVector, const l
|
||||
// Initialize pointers to track progress as stripmine
|
||||
// Assuming that intended to use `signed char`,
|
||||
// as `char`'s signedness is implementation-specific
|
||||
signed char* cPtr = (signed char*) cVector;
|
||||
const signed char* aPtr = (const signed char*) aVector;
|
||||
signed char* cPtr = (signed char*)cVector;
|
||||
const signed char* aPtr = (const signed char*)aVector;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, cPtr += vl * 2, aPtr += vl * 2)
|
||||
{
|
||||
|
||||
@@ -185,8 +185,8 @@ static inline void volk_gnsssdr_8ic_magnitude_squared_8i_rvv(char* magnitudeVect
|
||||
// Initialize pointers to track progress as stripmine
|
||||
// Assuming that intended to use `signed char`,
|
||||
// as `char`'s signedness is implementation-specific
|
||||
signed char* outPtr = (signed char*) magnitudeVector;
|
||||
const signed char* inPtr = (const signed char*) complexVector;
|
||||
signed char* outPtr = (signed char*)magnitudeVector;
|
||||
const signed char* inPtr = (const signed char*)complexVector;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, outPtr += vl, inPtr += vl * 2)
|
||||
{
|
||||
|
||||
@@ -493,13 +493,13 @@ static inline void volk_gnsssdr_8ic_x2_dot_prod_8ic_rvv(lv_8sc_t* result, const
|
||||
|
||||
// Explicitly cast in order to directly fill
|
||||
// with calculated values
|
||||
signed char* resPtr = (signed char*) result;
|
||||
signed char* resPtr = (signed char*)result;
|
||||
|
||||
// Initialize pointers to track progress as stripmine
|
||||
// Assuming that intended to use `signed char`,
|
||||
// as `char`'s signedness is implementation-specific
|
||||
const signed char* aPtr = (const signed char*) in_a;
|
||||
const signed char* bPtr = (const signed char*) in_b;
|
||||
const signed char* aPtr = (const signed char*)in_a;
|
||||
const signed char* bPtr = (const signed char*)in_b;
|
||||
|
||||
// accReal[0] = 0
|
||||
vint8m1_t accRealVal = __riscv_vmv_s_x_i8m1(0, 1);
|
||||
|
||||
@@ -290,52 +290,54 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
|
||||
#ifdef LV_HAVE_RVV
|
||||
#include <riscv_vector.h>
|
||||
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_rvv(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) {
|
||||
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_rvv(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points)
|
||||
{
|
||||
size_t n = num_points;
|
||||
|
||||
// Initialize pointers to track progress as stripmine
|
||||
// Assuming that intended to use `signed char`
|
||||
// as `char`'s signedness is implementation-specific
|
||||
signed char* cPtr = (signed char*) cVector;
|
||||
const signed char* aPtr = (const signed char*) aVector;
|
||||
const signed char* bPtr = (const signed char*) bVector;
|
||||
signed char* cPtr = (signed char*)cVector;
|
||||
const signed char* aPtr = (const signed char*)aVector;
|
||||
const signed char* bPtr = (const signed char*)bVector;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, cPtr += vl * 2, aPtr += vl * 2, bPtr += vl * 2) {
|
||||
// Record how many elements will actually be processed
|
||||
vl = __riscv_vsetvl_e8m4(n);
|
||||
for (size_t vl; n > 0; n -= vl, cPtr += vl * 2, aPtr += vl * 2, bPtr += vl * 2)
|
||||
{
|
||||
// Record how many elements will actually be processed
|
||||
vl = __riscv_vsetvl_e8m4(n);
|
||||
|
||||
// Load aReal[0..vl), aImag[0..vl)
|
||||
vint8m4x2_t aVal = __riscv_vlseg2e8_v_i8m4x2(aPtr, vl);
|
||||
vint8m4_t aRealVal = __riscv_vget_v_i8m4x2_i8m4(aVal, 0);
|
||||
vint8m4_t aImagVal = __riscv_vget_v_i8m4x2_i8m4(aVal, 1);
|
||||
// Load aReal[0..vl), aImag[0..vl)
|
||||
vint8m4x2_t aVal = __riscv_vlseg2e8_v_i8m4x2(aPtr, vl);
|
||||
vint8m4_t aRealVal = __riscv_vget_v_i8m4x2_i8m4(aVal, 0);
|
||||
vint8m4_t aImagVal = __riscv_vget_v_i8m4x2_i8m4(aVal, 1);
|
||||
|
||||
// Load bReal[0..vl), bImag[0..vl)
|
||||
vint8m4x2_t bVal = __riscv_vlseg2e8_v_i8m4x2(bPtr, vl);
|
||||
vint8m4_t bRealVal = __riscv_vget_v_i8m4x2_i8m4(bVal, 0);
|
||||
vint8m4_t bImagVal = __riscv_vget_v_i8m4x2_i8m4(bVal, 1);
|
||||
// Load bReal[0..vl), bImag[0..vl)
|
||||
vint8m4x2_t bVal = __riscv_vlseg2e8_v_i8m4x2(bPtr, vl);
|
||||
vint8m4_t bRealVal = __riscv_vget_v_i8m4x2_i8m4(bVal, 0);
|
||||
vint8m4_t bImagVal = __riscv_vget_v_i8m4x2_i8m4(bVal, 1);
|
||||
|
||||
// cReal[i] = aReal[i] * bReal[i]
|
||||
vint8m4_t cRealVal = __riscv_vmul_vv_i8m4(aRealVal, bRealVal, vl);
|
||||
// cReal[i] = aReal[i] * bReal[i]
|
||||
vint8m4_t cRealVal = __riscv_vmul_vv_i8m4(aRealVal, bRealVal, vl);
|
||||
|
||||
// cReal[i] = -(aImag[i] * bImag[i]) + cReal[i]
|
||||
cRealVal = __riscv_vnmsac_vv_i8m4(cRealVal, aImagVal, bImagVal, vl);
|
||||
// cReal[i] = -(aImag[i] * bImag[i]) + cReal[i]
|
||||
cRealVal = __riscv_vnmsac_vv_i8m4(cRealVal, aImagVal, bImagVal, vl);
|
||||
|
||||
// cImag[i] = aReal[i] * bImag[i]
|
||||
vint8m4_t cImagVal = __riscv_vmul_vv_i8m4(aRealVal, bImagVal, vl);
|
||||
// cImag[i] = aReal[i] * bImag[i]
|
||||
vint8m4_t cImagVal = __riscv_vmul_vv_i8m4(aRealVal, bImagVal, vl);
|
||||
|
||||
// cImag[i] = (aImag[i] * bReal[i]) + cImag[i]
|
||||
cImagVal = __riscv_vmacc_vv_i8m4(cImagVal, aImagVal, bRealVal, vl);
|
||||
// cImag[i] = (aImag[i] * bReal[i]) + cImag[i]
|
||||
cImagVal = __riscv_vmacc_vv_i8m4(cImagVal, aImagVal, bRealVal, vl);
|
||||
|
||||
// Store cReal[0..vl), cImag[0..vl)
|
||||
vint8m4x2_t cVal = __riscv_vcreate_v_i8m4x2(cRealVal, cImagVal);
|
||||
__riscv_vsseg2e8_v_i8m4x2(cPtr, cVal, vl);
|
||||
// Store cReal[0..vl), cImag[0..vl)
|
||||
vint8m4x2_t cVal = __riscv_vcreate_v_i8m4x2(cRealVal, cImagVal);
|
||||
__riscv_vsseg2e8_v_i8m4x2(cPtr, cVal, vl);
|
||||
|
||||
// In looping, decrement the number of
|
||||
// elements left and increment the pointers
|
||||
// by the number of elements processed,
|
||||
// taking into account how the `vl` complex
|
||||
// numbers are each stored as two 1-byte `char`s
|
||||
}
|
||||
// In looping, decrement the number of
|
||||
// elements left and increment the pointers
|
||||
// by the number of elements processed,
|
||||
// taking into account how the `vl` complex
|
||||
// numbers are each stored as two 1-byte `char`s
|
||||
}
|
||||
}
|
||||
#endif /* LV_HAVE_RVV */
|
||||
|
||||
|
||||
@@ -263,7 +263,6 @@ static inline void volk_gnsssdr_8u_x2_multiply_8u_a_avx2(unsigned char* cChar, c
|
||||
#endif /* LV_HAVE_SSE3 */
|
||||
|
||||
|
||||
|
||||
#ifdef LV_HAVE_RVV
|
||||
#include <riscv_vector.h>
|
||||
|
||||
|
||||
@@ -937,7 +937,7 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_neon(lv_32fc_t *out, const floa
|
||||
#include <riscv_vector.h>
|
||||
|
||||
// Reverse-engineered from NEON implementation
|
||||
static inline void volk_gnsssdr_s32f_sincos_32fc_rvv(lv_32fc_t* out, const float phase_inc, float* phase, unsigned int num_points)
|
||||
static inline void volk_gnsssdr_s32f_sincos_32fc_rvv(lv_32fc_t *out, const float phase_inc, float *phase, unsigned int num_points)
|
||||
{
|
||||
// Copied from other implementations, specifically NEON
|
||||
const float c_minus_cephes_DP1 = -0.78515625;
|
||||
@@ -954,10 +954,10 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_rvv(lv_32fc_t* out, const float
|
||||
size_t n = num_points;
|
||||
|
||||
// Initialize other pointers for consistency
|
||||
float* phasePtr = phase;
|
||||
float *phasePtr = phase;
|
||||
|
||||
// Initialize pointers to keep track as stripmine
|
||||
float* outPtr = (float*) out;
|
||||
float *outPtr = (float *)out;
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, outPtr += vl * 2)
|
||||
{
|
||||
@@ -979,7 +979,7 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_rvv(lv_32fc_t* out, const float
|
||||
|
||||
// Save initial signs
|
||||
// signMask[i] = phase[i] < 0
|
||||
vbool8_t signMask = __riscv_vmflt_vf_f32m4_b8(phaseVal, (float) 0, vl);
|
||||
vbool8_t signMask = __riscv_vmflt_vf_f32m4_b8(phaseVal, (float)0, vl);
|
||||
|
||||
// x[i] = |phase[i]|
|
||||
vfloat32m4_t xVal = __riscv_vfabs_v_f32m4(phaseVal, vl);
|
||||
@@ -1054,11 +1054,9 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_rvv(lv_32fc_t* out, const float
|
||||
// outImag[i] = sinSignMask ? -sin[i] : sin[i]
|
||||
// outReal[i] = cosSignMask ? cos[i] : -cos[i]
|
||||
vfloat32m4_t outImagVal = __riscv_vmerge_vvm_f32m4(
|
||||
sinVal, __riscv_vfneg_v_f32m4(sinVal, vl), sinSignMask, vl
|
||||
);
|
||||
sinVal, __riscv_vfneg_v_f32m4(sinVal, vl), sinSignMask, vl);
|
||||
vfloat32m4_t outRealVal = __riscv_vmerge_vvm_f32m4(
|
||||
__riscv_vfneg_v_f32m4(cosVal, vl), cosVal, cosSignMask, vl
|
||||
);
|
||||
__riscv_vfneg_v_f32m4(cosVal, vl), cosVal, cosSignMask, vl);
|
||||
|
||||
// Store out[0..vl)
|
||||
vfloat32m4x2_t outVal = __riscv_vcreate_v_f32m4x2(outRealVal, outImagVal);
|
||||
@@ -1074,7 +1072,7 @@ static inline void volk_gnsssdr_s32f_sincos_32fc_rvv(lv_32fc_t* out, const float
|
||||
// In looping, decrement the number of
|
||||
// elements left and increment the pointers
|
||||
// by the number of elements processed,
|
||||
// taking into account how the output `vl`
|
||||
// taking into account how the output `vl`
|
||||
// complex numbers are stored as 2 `float`s
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user