mirror of https://github.com/gnss-sdr/gnss-sdr
68 lines
2.6 KiB
Plaintext
68 lines
2.6 KiB
Plaintext
########################################################################
|
|
# How to create custom kernel dispatchers
|
|
########################################################################
|
|
A kernel dispatcher is kernel implementation that calls other kernel implementations.
|
|
By default, a dispatcher is generated by the build system for every kernel such that:
|
|
* the best aligned implemention is called when all pointer arguments are aligned,
|
|
* and otherwise the best unaligned implementation is called.
|
|
|
|
The author of a VOLK kernel may create a custom dispatcher,
|
|
to be called in place of the automatically generated one.
|
|
A custom dispatcher may be useful to handle head and tail cases,
|
|
or to implement different alignment and bounds checking logic.
|
|
|
|
########################################################################
|
|
# Code for an example dispatcher w/ tail case
|
|
########################################################################
|
|
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
|
|
|
#ifdef LV_HAVE_DISPATCHER
|
|
|
|
static inline void volk_gnsssdr_32f_x2_add_32f_dispatcher(float* cVector, const float* aVector, const float* bVector, unsigned int num_points)
|
|
{
|
|
const unsigned int num_points_r = num_points%4;
|
|
const unsigned int num_points_x = num_points - num_points_r;
|
|
|
|
if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(cVector, VOLK_OR_PTR(aVector, bVector))))
|
|
{
|
|
volk_gnsssdr_32f_x2_add_32f_a(cVector, aVector, bVector, num_points_x);
|
|
}
|
|
else
|
|
{
|
|
volk_gnsssdr_32f_x2_add_32f_u(cVector, aVector, bVector, num_points_x);
|
|
}
|
|
|
|
volk_gnsssdr_32f_x2_add_32f_g(cVector+num_points_x, aVector+num_points_x, bVector+num_points_x, num_points_r);
|
|
}
|
|
|
|
#endif //LV_HAVE_DISPATCHER
|
|
|
|
########################################################################
|
|
# Code for an example dispatcher w/ tail case and accumulator
|
|
########################################################################
|
|
#include <volk_gnsssdr/volk_gnsssdr_common.h>
|
|
|
|
#ifdef LV_HAVE_DISPATCHER
|
|
|
|
static inline void volk_gnsssdr_32f_x2_dot_prod_32f_dispatcher(float * result, const float * input, const float * taps, unsigned int num_points)
|
|
{
|
|
const unsigned int num_points_r = num_points%16;
|
|
const unsigned int num_points_x = num_points - num_points_r;
|
|
|
|
if (volk_gnsssdr_is_aligned(VOLK_OR_PTR(input, taps)))
|
|
{
|
|
volk_gnsssdr_32f_x2_dot_prod_32f_a(result, input, taps, num_points_x);
|
|
}
|
|
else
|
|
{
|
|
volk_gnsssdr_32f_x2_dot_prod_32f_u(result, input, taps, num_points_x);
|
|
}
|
|
|
|
float result_tail = 0;
|
|
volk_gnsssdr_32f_x2_dot_prod_32f_g(&result_tail, input+num_points_x, taps+num_points_x, num_points_r);
|
|
|
|
*result += result_tail;
|
|
}
|
|
|
|
#endif //LV_HAVE_DISPATCHER
|