1
0
mirror of https://github.com/gnss-sdr/gnss-sdr synced 2025-11-04 17:23:20 +00:00

Implement 8ic multiply with RVV

Implement mulitplying two vectors of complex numbers together,
point by point, into a new vector, using RVV C intrinsics.
This is in `volk_gnsssdr_8ic_x2_multiply_8ic_rvv`.

Signed-off-by: Marcus Alagar <mvala079@gmail.com>
This commit is contained in:
Marcus Alagar
2025-08-08 01:44:24 -05:00
parent 425dd70450
commit 02dfb70ecd

View File

@@ -287,6 +287,62 @@ static inline void volk_gnsssdr_8ic_x2_multiply_8ic_a_sse4_1(lv_8sc_t* cVector,
#endif /* LV_HAVE_SSE4_1 */
#ifdef LV_HAVE_RVV
#include <riscv_vector.h>
static inline void volk_gnsssdr_8ic_x2_multiply_8ic_rvv(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points) {
size_t n = num_points;
// Initialize pointers to track progress as stripmine
// Assuming that intended to use `signed char`
// as `char`'s signedness is implementation-specific
signed char* cPtr = (signed char*) cVector;
const signed char* aPtr = (const signed char*) aVector;
const signed char* bPtr = (const signed char*) bVector;
for (size_t vl; n > 0; n -= vl, cPtr += vl * 2, aPtr += vl * 2, bPtr += vl * 2) {
// Record how many elements will actually be processed
vl = __riscv_vsetvl_e8m4(n);
// Load aReal[0..vl), aImag[0..vl)
vint8m4x2_t aVal = __riscv_vlseg2e8_v_i8m4x2(aPtr, vl);
vint8m4_t aRealVal = __riscv_vget_v_i8m4x2_i8m4(aVal, 0);
vint8m4_t aImagVal = __riscv_vget_v_i8m4x2_i8m4(aVal, 1);
// Load bReal[0..vl), bImag[0..vl)
vint8m4x2_t bVal = __riscv_vlseg2e8_v_i8m4x2(bPtr, vl);
vint8m4_t bRealVal = __riscv_vget_v_i8m4x2_i8m4(bVal, 0);
vint8m4_t bImagVal = __riscv_vget_v_i8m4x2_i8m4(bVal, 1);
// cReal[i] = aReal[i] * bReal[i]
vint8m4_t cRealVal = __riscv_vmul_vv_i8m4(aRealVal, bRealVal, vl);
// cReal[i] = -(aImag[i] * bImag[i]) + cReal[i]
cRealVal = __riscv_vnmsac_vv_i8m4(cRealVal, aImagVal, bImagVal, vl);
// cImag[i] = aReal[i] * bImag[i]
vint8m4_t cImagVal = __riscv_vmul_vv_i8m4(aRealVal, bImagVal, vl);
// cImag[i] = (aImag[i] * bReal[i]) + cImag[i]
cImagVal = __riscv_vmacc_vv_i8m4(cImagVal, aImagVal, bRealVal, vl);
// Store cReal[0..vl), cImag[0..vl)
vint8m4x2_t cVal = __riscv_vset_v_i8m4_i8m4x2(
__riscv_vundefined_i8m4x2(), 0, cRealVal
);
cVal = __riscv_vset_v_i8m4_i8m4x2(cVal, 1, cImagVal);
__riscv_vsseg2e8_v_i8m4x2(cPtr, cVal, vl);
// In looping, decrement the number of
// elements left and increment the pointers
// by the number of elements processed,
// taking into account how the `vl` complex
// numbers are each stored as two 1-byte `char`s
}
}
#endif /* LV_HAVE_RVV */
#ifdef LV_HAVE_ORC
extern void volk_gnsssdr_8ic_x2_multiply_8ic_a_orc_impl(lv_8sc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int num_points);