1
0
mirror of https://github.com/janet-lang/janet synced 2024-11-27 18:49:54 +00:00

Merge pull request #1468 from SyrupThinker/st/aarch64_ffi

Add support for the AAPCS64 calling convention
This commit is contained in:
Calvin Rose 2024-06-26 06:55:04 -07:00 committed by GitHub
commit 3078686f8f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 343 additions and 4 deletions

View File

@ -35,6 +35,11 @@ typedef struct {
int c;
} intintint;
typedef struct {
uint64_t a;
uint64_t b;
} uint64pair;
typedef struct {
int64_t a;
int64_t b;
@ -203,3 +208,20 @@ EXPORTER
int sixints_fn_3(SixInts s, int x) {
return x + s.u + s.v + s.w + s.x + s.y + s.z;
}
EXPORTER
intint stack_spill_fn(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
uint8_t e, uint8_t f, uint8_t g, uint8_t h,
float i, float j, float k, float l,
float m, float n, float o, float p,
float s1, int8_t s2, uint8_t s3, double s4, uint8_t s5, intint s6) {
return (intint) {
(a | b | c | d | e | f | g | h) + (i + j + k + l + m + n + o + p),
s1 * s6.a + s2 * s6.b + s3 * s4 * s5
};
}
EXPORTER
double stack_spill_fn_2(uint64pair a, uint64pair b, uint64pair c, int8_t d, uint64pair e, int8_t f) {
return (double)(a.a * c.a + a.b * c.b + b.a * e.a) * f - (double)(b.b * e.b) + d;
}

View File

@ -8,11 +8,13 @@
(if is-windows
(os/execute ["cl.exe" "/nologo" "/LD" ffi/source-loc "/link" "/DLL" (string "/OUT:" ffi/loc)] :px)
(os/execute ["cc" ffi/source-loc "-shared" "-o" ffi/loc] :px))
(os/execute ["cc" ffi/source-loc "-g" "-shared" "-o" ffi/loc] :px))
(ffi/context ffi/loc)
(def intint (ffi/struct :int :int))
(def intintint (ffi/struct :int :int :int))
(def uint64pair (ffi/struct :u64 :u64))
(def big (ffi/struct :s64 :s64 :s64))
(def split (ffi/struct :int :int :float :float))
(def split-flip (ffi/struct :float :float :int :int))
@ -55,6 +57,13 @@
(ffi/defbind sixints-fn six-ints [])
(ffi/defbind sixints-fn-2 :int [x :int s six-ints])
(ffi/defbind sixints-fn-3 :int [s six-ints x :int])
(ffi/defbind stack-spill-fn intint
[a :u8 b :u8 c :u8 d :u8
e :u8 f :u8 g :u8 h :u8
i :float j :float k :float l :float
m :float n :float o :float p :float
s1 :float s2 :s8 s3 :u8 s4 :double s5 :u8 s6 intint])
(ffi/defbind stack-spill-fn-2 :double [a uint64pair b uint64pair c uint64pair d :s8 e uint64pair f :s8])
(ffi/defbind-alias int-fn int-fn-aliased :int [a :int b :int])
#
@ -132,5 +141,10 @@
(assert (= 21 (math/round (double-many 1 2 3 4 5 6.01))))
(assert (= 19 (double-lots 1 2 3 4 5 6 7 8 9 10)))
(assert (= 204 (float-fn 8 4 17)))
(assert (= [0 38534415] (stack-spill-fn
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
1.5 -32 196 65536.5 3 [-15 32])))
(assert (= -2806 (stack-spill-fn-2 [2 3] [5 7] [9 11] -19 [13 17] -23)))
(print "Done.")

View File

@ -56,6 +56,9 @@
#if (defined(__x86_64__) || defined(_M_X64)) && !defined(JANET_WINDOWS)
#define JANET_FFI_SYSV64_ENABLED
#endif
#if (defined(__aarch64__) || defined(_M_ARM64)) && !defined(JANET_WINDOWS)
#define JANET_FFI_AAPCS64_ENABLED
#endif
typedef struct JanetFFIType JanetFFIType;
typedef struct JanetFFIStruct JanetFFIStruct;
@ -140,7 +143,13 @@ typedef enum {
JANET_WIN64_REGISTER,
JANET_WIN64_STACK,
JANET_WIN64_REGISTER_REF,
JANET_WIN64_STACK_REF
JANET_WIN64_STACK_REF,
JANET_AAPCS64_GENERAL,
JANET_AAPCS64_SSE,
JANET_AAPCS64_GENERAL_REF,
JANET_AAPCS64_STACK,
JANET_AAPCS64_STACK_REF,
JANET_AAPCS64_NONE
} JanetFFIWordSpec;
/* Describe how each Janet argument is interpreted in terms of machine words
@ -155,13 +164,16 @@ typedef struct {
typedef enum {
JANET_FFI_CC_NONE,
JANET_FFI_CC_SYSV_64,
JANET_FFI_CC_WIN_64
JANET_FFI_CC_WIN_64,
JANET_FFI_CC_AAPCS64
} JanetFFICallingConvention;
#ifdef JANET_FFI_WIN64_ENABLED
#define JANET_FFI_CC_DEFAULT JANET_FFI_CC_WIN_64
#elif defined(JANET_FFI_SYSV64_ENABLED)
#define JANET_FFI_CC_DEFAULT JANET_FFI_CC_SYSV_64
#elif defined(JANET_FFI_AAPCS64_ENABLED)
#define JANET_FFI_CC_DEFAULT JANET_FFI_CC_AAPCS64
#else
#define JANET_FFI_CC_DEFAULT JANET_FFI_CC_NONE
#endif
@ -301,6 +313,9 @@ static JanetFFICallingConvention decode_ffi_cc(const uint8_t *name) {
#endif
#ifdef JANET_FFI_SYSV64_ENABLED
if (!janet_cstrcmp(name, "sysv64")) return JANET_FFI_CC_SYSV_64;
#endif
#ifdef JANET_FFI_AAPCS64_ENABLED
if (!janet_cstrcmp(name, "aapcs64")) return JANET_FFI_CC_AAPCS64;
#endif
if (!janet_cstrcmp(name, "default")) return JANET_FFI_CC_DEFAULT;
janet_panicf("unknown calling convention %s", name);
@ -763,6 +778,101 @@ static JanetFFIWordSpec sysv64_classify(JanetFFIType type) {
}
#endif
#ifdef JANET_FFI_AAPCS64_ENABLED
/* Procedure Call Standard for the Arm® 64-bit Architecture (AArch64) 2023Q3 October 6, 2023
* See section 6.8.2 Parameter passing rules.
* https://github.com/ARM-software/abi-aa/releases/download/2023Q3/aapcs64.pdf
*
* Additional documentation needed for Apple platforms.
* https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms */
#define JANET_FFI_AAPCS64_FORCE_STACK_ALIGN(ptr, alignment) (ptr = ((ptr) + ((alignment) - 1)) & ~((alignment) - 1))
#if !defined(JANET_APPLE)
#define JANET_FFI_AAPCS64_STACK_ALIGN(ptr, alignment) ((void) alignment, JANET_FFI_AAPCS64_FORCE_STACK_ALIGN(ptr, 8))
#else
#define JANET_FFI_AAPCS64_STACK_ALIGN(ptr, alignment) JANET_FFI_AAPCS64_FORCE_STACK_ALIGN(ptr, alignment)
#endif
typedef struct {
uint64_t a;
uint64_t b;
} Aapcs64Variant1ReturnGeneral;
typedef struct {
double a;
double b;
double c;
double d;
} Aapcs64Variant2ReturnSse;
/* Workaround for passing a return value pointer through x8.
* Limits struct returns to 128 bytes. */
typedef struct {
uint64_t a;
uint64_t b;
uint64_t c;
uint64_t d;
uint64_t e;
uint64_t f;
uint64_t g;
uint64_t h;
uint64_t i;
uint64_t j;
uint64_t k;
uint64_t l;
uint64_t m;
uint64_t n;
uint64_t o;
uint64_t p;
} Aapcs64Variant3ReturnPointer;
static JanetFFIWordSpec aapcs64_classify(JanetFFIType type) {
switch (type.prim) {
case JANET_FFI_TYPE_PTR:
case JANET_FFI_TYPE_STRING:
case JANET_FFI_TYPE_BOOL:
case JANET_FFI_TYPE_INT8:
case JANET_FFI_TYPE_INT16:
case JANET_FFI_TYPE_INT32:
case JANET_FFI_TYPE_INT64:
case JANET_FFI_TYPE_UINT8:
case JANET_FFI_TYPE_UINT16:
case JANET_FFI_TYPE_UINT32:
case JANET_FFI_TYPE_UINT64:
return JANET_AAPCS64_GENERAL;
case JANET_FFI_TYPE_DOUBLE:
case JANET_FFI_TYPE_FLOAT:
return JANET_AAPCS64_SSE;
case JANET_FFI_TYPE_STRUCT: {
JanetFFIStruct *st = type.st;
if (st->field_count <= 4 && aapcs64_classify(st->fields[0].type) == JANET_AAPCS64_SSE) {
bool is_hfa = true;
for (uint32_t i = 1; i < st->field_count; i++) {
if (st->fields[0].type.prim != st->fields[i].type.prim) {
is_hfa = false;
break;
}
}
if (is_hfa) {
return JANET_AAPCS64_SSE;
}
}
if (type_size(type) > 16) {
return JANET_AAPCS64_GENERAL_REF;
}
return JANET_AAPCS64_GENERAL;
}
case JANET_FFI_TYPE_VOID:
return JANET_AAPCS64_NONE;
default:
janet_panic("nyi");
return JANET_AAPCS64_NONE;
}
}
#endif
JANET_CORE_FN(cfun_ffi_signature,
"(ffi/signature calling-convention ret-type & arg-types)",
"Create a function signature object that can be used to make calls "
@ -960,6 +1070,96 @@ JANET_CORE_FN(cfun_ffi_signature,
}
break;
#endif
#ifdef JANET_FFI_AAPCS64_ENABLED
case JANET_FFI_CC_AAPCS64: {
uint32_t next_general_reg = 0;
uint32_t next_fp_reg = 0;
uint32_t stack_offset = 0;
uint32_t ref_stack_offset = 0;
JanetFFIWordSpec ret_spec = aapcs64_classify(ret_type);
ret.spec = ret_spec;
if (ret_spec == JANET_AAPCS64_SSE) {
variant = 1;
} else if (ret_spec == JANET_AAPCS64_GENERAL_REF) {
if (type_size(ret_type) > sizeof(Aapcs64Variant3ReturnPointer)) {
janet_panic("return value bigger than supported");
}
variant = 2;
} else {
variant = 0;
}
for (uint32_t i = 0; i < arg_count; i++) {
mappings[i].type = decode_ffi_type(argv[i + 2]);
mappings[i].spec = aapcs64_classify(mappings[i].type);
size_t arg_size = type_size(mappings[i].type);
switch (mappings[i].spec) {
case JANET_AAPCS64_GENERAL: {
bool arg_is_struct = mappings[i].type.prim == JANET_FFI_TYPE_STRUCT;
uint32_t needed_registers = (arg_size + 7) / 8;
if (next_general_reg + needed_registers <= 8) {
mappings[i].offset = next_general_reg;
next_general_reg += needed_registers;
} else {
size_t arg_align = arg_is_struct ? 8 : type_align(mappings[i].type);
mappings[i].spec = JANET_AAPCS64_STACK;
mappings[i].offset = JANET_FFI_AAPCS64_STACK_ALIGN(stack_offset, arg_align);
#if !defined(JANET_APPLE)
stack_offset += arg_size > 8 ? arg_size : 8;
#else
stack_offset += arg_size;
#endif
next_general_reg = 8;
}
break;
}
case JANET_AAPCS64_GENERAL_REF:
if (next_general_reg < 8) {
mappings[i].offset = next_general_reg++;
} else {
mappings[i].spec = JANET_AAPCS64_STACK_REF;
mappings[i].offset = JANET_FFI_AAPCS64_STACK_ALIGN(stack_offset, 8);
stack_offset += 8;
}
mappings[i].offset2 = JANET_FFI_AAPCS64_FORCE_STACK_ALIGN(ref_stack_offset, 8);
ref_stack_offset += arg_size;
break;
case JANET_AAPCS64_SSE: {
uint32_t needed_registers = (arg_size + 7) / 8;
if (next_fp_reg + needed_registers <= 8) {
mappings[i].offset = next_fp_reg;
next_fp_reg += needed_registers;
} else {
mappings[i].spec = JANET_AAPCS64_STACK;
mappings[i].offset = JANET_FFI_AAPCS64_STACK_ALIGN(stack_offset, 8);
#if !defined(JANET_APPLE)
stack_offset += 8;
#else
stack_offset += arg_size;
#endif
}
break;
}
default:
janet_panic("nyi");
}
}
stack_offset = (stack_offset + 15) & ~0xFUL;
ref_stack_offset = (ref_stack_offset + 15) & ~0xFUL;
stack_count = stack_offset + ref_stack_offset;
for (uint32_t i = 0; i < arg_count; i++) {
if (mappings[i].spec == JANET_AAPCS64_GENERAL_REF || mappings[i].spec == JANET_AAPCS64_STACK_REF) {
mappings[i].offset2 = stack_offset + mappings[i].offset2;
}
}
}
break;
#endif
}
/* Create signature abstract value */
@ -1294,6 +1494,99 @@ static Janet janet_ffi_win64(JanetFFISignature *signature, void *function_pointe
#endif
#ifdef JANET_FFI_AAPCS64_ENABLED
static void janet_ffi_aapcs64_standard_callback(void *ctx, void *userdata) {
janet_ffi_trampoline(ctx, userdata);
}
typedef Aapcs64Variant1ReturnGeneral janet_aapcs64_variant_1(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7,
double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7);
typedef Aapcs64Variant2ReturnSse janet_aapcs64_variant_2(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7,
double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7);
typedef Aapcs64Variant3ReturnPointer janet_aapcs64_variant_3(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7,
double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7);
static Janet janet_ffi_aapcs64(JanetFFISignature *signature, void *function_pointer, const Janet *argv) {
union {
Aapcs64Variant1ReturnGeneral general_return;
Aapcs64Variant2ReturnSse sse_return;
Aapcs64Variant3ReturnPointer pointer_return;
} retu;
uint64_t regs[8];
double fp_regs[8];
void *ret_mem = &retu.general_return;
/* Apple's stack values do not need to be 8-byte aligned,
* thus all stack offsets refer to actual byte positions. */
uint8_t *stack = alloca(signature->stack_count);
#if defined(JANET_APPLE)
/* Values must be zero-extended by the caller instead of the callee. */
memset(stack, 0, signature->stack_count);
#endif
for (uint32_t i = 0; i < signature->arg_count; i++) {
int32_t n = i + 2;
JanetFFIMapping arg = signature->args[i];
void *to = NULL;
switch (arg.spec) {
case JANET_AAPCS64_GENERAL:
to = regs + arg.offset;
break;
case JANET_AAPCS64_GENERAL_REF:
to = stack + arg.offset2;
regs[arg.offset] = (uint64_t) to;
break;
case JANET_AAPCS64_SSE:
to = fp_regs + arg.offset;
break;
case JANET_AAPCS64_STACK:
to = stack + arg.offset;
break;
case JANET_AAPCS64_STACK_REF:
to = stack + arg.offset2;
uint64_t *ptr = (uint64_t *) stack + arg.offset;
*ptr = (uint64_t) to;
break;
default:
janet_panic("nyi");
}
if (to) {
janet_ffi_write_one(to, argv, n, arg.type, JANET_FFI_MAX_RECUR);
}
}
switch (signature->variant) {
case 0:
retu.general_return = ((janet_aapcs64_variant_1 *)(function_pointer))(
regs[0], regs[1], regs[2], regs[3],
regs[4], regs[5], regs[6], regs[7],
fp_regs[0], fp_regs[1], fp_regs[2], fp_regs[3],
fp_regs[4], fp_regs[5], fp_regs[6], fp_regs[7]);
break;
case 1:
retu.sse_return = ((janet_aapcs64_variant_2 *)(function_pointer))(
regs[0], regs[1], regs[2], regs[3],
regs[4], regs[5], regs[6], regs[7],
fp_regs[0], fp_regs[1], fp_regs[2], fp_regs[3],
fp_regs[4], fp_regs[5], fp_regs[6], fp_regs[7]);
break;
case 2: {
retu.pointer_return = ((janet_aapcs64_variant_3 *)(function_pointer))(
regs[0], regs[1], regs[2], regs[3],
regs[4], regs[5], regs[6], regs[7],
fp_regs[0], fp_regs[1], fp_regs[2], fp_regs[3],
fp_regs[4], fp_regs[5], fp_regs[6], fp_regs[7]);
}
}
return janet_ffi_read_one(ret_mem, signature->ret.type, JANET_FFI_MAX_RECUR);
}
#endif
/* Allocate executable memory chunks in sizes of a page. Ideally we would keep
* an allocator around so that multiple JIT allocations would point to the same
* region but it isn't really worth it. */
@ -1373,6 +1666,10 @@ JANET_CORE_FN(cfun_ffi_call,
#ifdef JANET_FFI_SYSV64_ENABLED
case JANET_FFI_CC_SYSV_64:
return janet_ffi_sysv64(signature, function_pointer, argv);
#endif
#ifdef JANET_FFI_AAPCS64_ENABLED
case JANET_FFI_CC_AAPCS64:
return janet_ffi_aapcs64(signature, function_pointer, argv);
#endif
}
}
@ -1442,6 +1739,10 @@ JANET_CORE_FN(cfun_ffi_get_callback_trampoline,
#ifdef JANET_FFI_SYSV64_ENABLED
case JANET_FFI_CC_SYSV_64:
return janet_wrap_pointer(janet_ffi_sysv64_standard_callback);
#endif
#ifdef JANET_FFI_AAPCS64_ENABLED
case JANET_FFI_CC_AAPCS64:
return janet_wrap_pointer(janet_ffi_aapcs64_standard_callback);
#endif
}
}
@ -1561,6 +1862,9 @@ JANET_CORE_FN(cfun_ffi_supported_calling_conventions,
#endif
#ifdef JANET_FFI_SYSV64_ENABLED
janet_array_push(array, janet_ckeywordv("sysv64"));
#endif
#ifdef JANET_FFI_AAPCS64_ENABLED
janet_array_push(array, janet_ckeywordv("aapcs64"));
#endif
janet_array_push(array, janet_ckeywordv("none"));
return janet_wrap_array(array);

View File

@ -21,7 +21,6 @@
(import ./helper :prefix "" :exit true)
(start-suite)
# We should get ARM support...
(def has-ffi (dyn 'ffi/native))
(def has-full-ffi
(and has-ffi