diff --git a/examples/ffi/so.c b/examples/ffi/so.c index b2d097bf..04cdee80 100644 --- a/examples/ffi/so.c +++ b/examples/ffi/so.c @@ -35,6 +35,11 @@ typedef struct { int c; } intintint; +typedef struct { + uint64_t a; + uint64_t b; +} uint64pair; + typedef struct { int64_t a; int64_t b; @@ -203,3 +208,20 @@ EXPORTER int sixints_fn_3(SixInts s, int x) { return x + s.u + s.v + s.w + s.x + s.y + s.z; } + +EXPORTER +intint stack_spill_fn(uint8_t a, uint8_t b, uint8_t c, uint8_t d, + uint8_t e, uint8_t f, uint8_t g, uint8_t h, + float i, float j, float k, float l, + float m, float n, float o, float p, + float s1, int8_t s2, uint8_t s3, double s4, uint8_t s5, intint s6) { + return (intint) { + (a | b | c | d | e | f | g | h) + (i + j + k + l + m + n + o + p), + s1 * s6.a + s2 * s6.b + s3 * s4 * s5 + }; +} + +EXPORTER +double stack_spill_fn_2(uint64pair a, uint64pair b, uint64pair c, int8_t d, uint64pair e, int8_t f) { + return (double)(a.a * c.a + a.b * c.b + b.a * e.a) * f - (double)(b.b * e.b) + d; +} diff --git a/examples/ffi/test.janet b/examples/ffi/test.janet index 3d97596c..b63fb1c3 100644 --- a/examples/ffi/test.janet +++ b/examples/ffi/test.janet @@ -8,11 +8,13 @@ (if is-windows (os/execute ["cl.exe" "/nologo" "/LD" ffi/source-loc "/link" "/DLL" (string "/OUT:" ffi/loc)] :px) - (os/execute ["cc" ffi/source-loc "-shared" "-o" ffi/loc] :px)) + (os/execute ["cc" ffi/source-loc "-g" "-shared" "-o" ffi/loc] :px)) (ffi/context ffi/loc) +(def intint (ffi/struct :int :int)) (def intintint (ffi/struct :int :int :int)) +(def uint64pair (ffi/struct :u64 :u64)) (def big (ffi/struct :s64 :s64 :s64)) (def split (ffi/struct :int :int :float :float)) (def split-flip (ffi/struct :float :float :int :int)) @@ -55,6 +57,13 @@ (ffi/defbind sixints-fn six-ints []) (ffi/defbind sixints-fn-2 :int [x :int s six-ints]) (ffi/defbind sixints-fn-3 :int [s six-ints x :int]) +(ffi/defbind stack-spill-fn intint + [a :u8 b :u8 c :u8 d :u8 + e :u8 f :u8 g :u8 h :u8 + i :float j :float k :float l :float + m :float n :float o :float p :float + s1 :float s2 :s8 s3 :u8 s4 :double s5 :u8 s6 intint]) +(ffi/defbind stack-spill-fn-2 :double [a uint64pair b uint64pair c uint64pair d :s8 e uint64pair f :s8]) (ffi/defbind-alias int-fn int-fn-aliased :int [a :int b :int]) # @@ -132,5 +141,10 @@ (assert (= 21 (math/round (double-many 1 2 3 4 5 6.01)))) (assert (= 19 (double-lots 1 2 3 4 5 6 7 8 9 10))) (assert (= 204 (float-fn 8 4 17))) +(assert (= [0 38534415] (stack-spill-fn + 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 + 1.5 -32 196 65536.5 3 [-15 32]))) +(assert (= -2806 (stack-spill-fn-2 [2 3] [5 7] [9 11] -19 [13 17] -23))) (print "Done.") diff --git a/src/core/ffi.c b/src/core/ffi.c index 3a3b08f1..fb322c9d 100644 --- a/src/core/ffi.c +++ b/src/core/ffi.c @@ -56,6 +56,9 @@ #if (defined(__x86_64__) || defined(_M_X64)) && !defined(JANET_WINDOWS) #define JANET_FFI_SYSV64_ENABLED #endif +#if (defined(__aarch64__) || defined(_M_ARM64)) && !defined(JANET_WINDOWS) +#define JANET_FFI_AAPCS64_ENABLED +#endif typedef struct JanetFFIType JanetFFIType; typedef struct JanetFFIStruct JanetFFIStruct; @@ -140,7 +143,13 @@ typedef enum { JANET_WIN64_REGISTER, JANET_WIN64_STACK, JANET_WIN64_REGISTER_REF, - JANET_WIN64_STACK_REF + JANET_WIN64_STACK_REF, + JANET_AAPCS64_GENERAL, + JANET_AAPCS64_SSE, + JANET_AAPCS64_GENERAL_REF, + JANET_AAPCS64_STACK, + JANET_AAPCS64_STACK_REF, + JANET_AAPCS64_NONE } JanetFFIWordSpec; /* Describe how each Janet argument is interpreted in terms of machine words @@ -155,13 +164,16 @@ typedef struct { typedef enum { JANET_FFI_CC_NONE, JANET_FFI_CC_SYSV_64, - JANET_FFI_CC_WIN_64 + JANET_FFI_CC_WIN_64, + JANET_FFI_CC_AAPCS64 } JanetFFICallingConvention; #ifdef JANET_FFI_WIN64_ENABLED #define JANET_FFI_CC_DEFAULT JANET_FFI_CC_WIN_64 #elif defined(JANET_FFI_SYSV64_ENABLED) #define JANET_FFI_CC_DEFAULT JANET_FFI_CC_SYSV_64 +#elif defined(JANET_FFI_AAPCS64_ENABLED) +#define JANET_FFI_CC_DEFAULT JANET_FFI_CC_AAPCS64 #else #define JANET_FFI_CC_DEFAULT JANET_FFI_CC_NONE #endif @@ -301,6 +313,9 @@ static JanetFFICallingConvention decode_ffi_cc(const uint8_t *name) { #endif #ifdef JANET_FFI_SYSV64_ENABLED if (!janet_cstrcmp(name, "sysv64")) return JANET_FFI_CC_SYSV_64; +#endif +#ifdef JANET_FFI_AAPCS64_ENABLED + if (!janet_cstrcmp(name, "aapcs64")) return JANET_FFI_CC_AAPCS64; #endif if (!janet_cstrcmp(name, "default")) return JANET_FFI_CC_DEFAULT; janet_panicf("unknown calling convention %s", name); @@ -763,6 +778,101 @@ static JanetFFIWordSpec sysv64_classify(JanetFFIType type) { } #endif +#ifdef JANET_FFI_AAPCS64_ENABLED +/* Procedure Call Standard for the Arm® 64-bit Architecture (AArch64) 2023Q3 – October 6, 2023 + * See section 6.8.2 Parameter passing rules. + * https://github.com/ARM-software/abi-aa/releases/download/2023Q3/aapcs64.pdf + * + * Additional documentation needed for Apple platforms. + * https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms */ + +#define JANET_FFI_AAPCS64_FORCE_STACK_ALIGN(ptr, alignment) (ptr = ((ptr) + ((alignment) - 1)) & ~((alignment) - 1)) +#if !defined(JANET_APPLE) +#define JANET_FFI_AAPCS64_STACK_ALIGN(ptr, alignment) ((void) alignment, JANET_FFI_AAPCS64_FORCE_STACK_ALIGN(ptr, 8)) +#else +#define JANET_FFI_AAPCS64_STACK_ALIGN(ptr, alignment) JANET_FFI_AAPCS64_FORCE_STACK_ALIGN(ptr, alignment) +#endif + +typedef struct { + uint64_t a; + uint64_t b; +} Aapcs64Variant1ReturnGeneral; + +typedef struct { + double a; + double b; + double c; + double d; +} Aapcs64Variant2ReturnSse; + +/* Workaround for passing a return value pointer through x8. + * Limits struct returns to 128 bytes. */ +typedef struct { + uint64_t a; + uint64_t b; + uint64_t c; + uint64_t d; + uint64_t e; + uint64_t f; + uint64_t g; + uint64_t h; + uint64_t i; + uint64_t j; + uint64_t k; + uint64_t l; + uint64_t m; + uint64_t n; + uint64_t o; + uint64_t p; +} Aapcs64Variant3ReturnPointer; + +static JanetFFIWordSpec aapcs64_classify(JanetFFIType type) { + switch (type.prim) { + case JANET_FFI_TYPE_PTR: + case JANET_FFI_TYPE_STRING: + case JANET_FFI_TYPE_BOOL: + case JANET_FFI_TYPE_INT8: + case JANET_FFI_TYPE_INT16: + case JANET_FFI_TYPE_INT32: + case JANET_FFI_TYPE_INT64: + case JANET_FFI_TYPE_UINT8: + case JANET_FFI_TYPE_UINT16: + case JANET_FFI_TYPE_UINT32: + case JANET_FFI_TYPE_UINT64: + return JANET_AAPCS64_GENERAL; + case JANET_FFI_TYPE_DOUBLE: + case JANET_FFI_TYPE_FLOAT: + return JANET_AAPCS64_SSE; + case JANET_FFI_TYPE_STRUCT: { + JanetFFIStruct *st = type.st; + if (st->field_count <= 4 && aapcs64_classify(st->fields[0].type) == JANET_AAPCS64_SSE) { + bool is_hfa = true; + for (uint32_t i = 1; i < st->field_count; i++) { + if (st->fields[0].type.prim != st->fields[i].type.prim) { + is_hfa = false; + break; + } + } + if (is_hfa) { + return JANET_AAPCS64_SSE; + } + } + + if (type_size(type) > 16) { + return JANET_AAPCS64_GENERAL_REF; + } + + return JANET_AAPCS64_GENERAL; + } + case JANET_FFI_TYPE_VOID: + return JANET_AAPCS64_NONE; + default: + janet_panic("nyi"); + return JANET_AAPCS64_NONE; + } +} +#endif + JANET_CORE_FN(cfun_ffi_signature, "(ffi/signature calling-convention ret-type & arg-types)", "Create a function signature object that can be used to make calls " @@ -960,6 +1070,96 @@ JANET_CORE_FN(cfun_ffi_signature, } break; #endif + +#ifdef JANET_FFI_AAPCS64_ENABLED + case JANET_FFI_CC_AAPCS64: { + uint32_t next_general_reg = 0; + uint32_t next_fp_reg = 0; + uint32_t stack_offset = 0; + uint32_t ref_stack_offset = 0; + + JanetFFIWordSpec ret_spec = aapcs64_classify(ret_type); + ret.spec = ret_spec; + if (ret_spec == JANET_AAPCS64_SSE) { + variant = 1; + } else if (ret_spec == JANET_AAPCS64_GENERAL_REF) { + if (type_size(ret_type) > sizeof(Aapcs64Variant3ReturnPointer)) { + janet_panic("return value bigger than supported"); + } + variant = 2; + } else { + variant = 0; + } + + for (uint32_t i = 0; i < arg_count; i++) { + mappings[i].type = decode_ffi_type(argv[i + 2]); + mappings[i].spec = aapcs64_classify(mappings[i].type); + size_t arg_size = type_size(mappings[i].type); + + switch (mappings[i].spec) { + case JANET_AAPCS64_GENERAL: { + bool arg_is_struct = mappings[i].type.prim == JANET_FFI_TYPE_STRUCT; + uint32_t needed_registers = (arg_size + 7) / 8; + if (next_general_reg + needed_registers <= 8) { + mappings[i].offset = next_general_reg; + next_general_reg += needed_registers; + } else { + size_t arg_align = arg_is_struct ? 8 : type_align(mappings[i].type); + mappings[i].spec = JANET_AAPCS64_STACK; + mappings[i].offset = JANET_FFI_AAPCS64_STACK_ALIGN(stack_offset, arg_align); +#if !defined(JANET_APPLE) + stack_offset += arg_size > 8 ? arg_size : 8; +#else + stack_offset += arg_size; +#endif + next_general_reg = 8; + } + break; + } + case JANET_AAPCS64_GENERAL_REF: + if (next_general_reg < 8) { + mappings[i].offset = next_general_reg++; + } else { + mappings[i].spec = JANET_AAPCS64_STACK_REF; + mappings[i].offset = JANET_FFI_AAPCS64_STACK_ALIGN(stack_offset, 8); + stack_offset += 8; + } + mappings[i].offset2 = JANET_FFI_AAPCS64_FORCE_STACK_ALIGN(ref_stack_offset, 8); + ref_stack_offset += arg_size; + break; + case JANET_AAPCS64_SSE: { + uint32_t needed_registers = (arg_size + 7) / 8; + if (next_fp_reg + needed_registers <= 8) { + mappings[i].offset = next_fp_reg; + next_fp_reg += needed_registers; + } else { + mappings[i].spec = JANET_AAPCS64_STACK; + mappings[i].offset = JANET_FFI_AAPCS64_STACK_ALIGN(stack_offset, 8); +#if !defined(JANET_APPLE) + stack_offset += 8; +#else + stack_offset += arg_size; +#endif + } + break; + } + default: + janet_panic("nyi"); + } + } + + stack_offset = (stack_offset + 15) & ~0xFUL; + ref_stack_offset = (ref_stack_offset + 15) & ~0xFUL; + stack_count = stack_offset + ref_stack_offset; + + for (uint32_t i = 0; i < arg_count; i++) { + if (mappings[i].spec == JANET_AAPCS64_GENERAL_REF || mappings[i].spec == JANET_AAPCS64_STACK_REF) { + mappings[i].offset2 = stack_offset + mappings[i].offset2; + } + } + } + break; +#endif } /* Create signature abstract value */ @@ -1294,6 +1494,99 @@ static Janet janet_ffi_win64(JanetFFISignature *signature, void *function_pointe #endif +#ifdef JANET_FFI_AAPCS64_ENABLED + +static void janet_ffi_aapcs64_standard_callback(void *ctx, void *userdata) { + janet_ffi_trampoline(ctx, userdata); +} + +typedef Aapcs64Variant1ReturnGeneral janet_aapcs64_variant_1(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7, + double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7); +typedef Aapcs64Variant2ReturnSse janet_aapcs64_variant_2(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7, + double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7); +typedef Aapcs64Variant3ReturnPointer janet_aapcs64_variant_3(uint64_t x0, uint64_t x1, uint64_t x2, uint64_t x3, uint64_t x4, uint64_t x5, uint64_t x6, uint64_t x7, + double v0, double v1, double v2, double v3, double v4, double v5, double v6, double v7); + + +static Janet janet_ffi_aapcs64(JanetFFISignature *signature, void *function_pointer, const Janet *argv) { + union { + Aapcs64Variant1ReturnGeneral general_return; + Aapcs64Variant2ReturnSse sse_return; + Aapcs64Variant3ReturnPointer pointer_return; + } retu; + uint64_t regs[8]; + double fp_regs[8]; + void *ret_mem = &retu.general_return; + + /* Apple's stack values do not need to be 8-byte aligned, + * thus all stack offsets refer to actual byte positions. */ + uint8_t *stack = alloca(signature->stack_count); +#if defined(JANET_APPLE) + /* Values must be zero-extended by the caller instead of the callee. */ + memset(stack, 0, signature->stack_count); +#endif + for (uint32_t i = 0; i < signature->arg_count; i++) { + int32_t n = i + 2; + JanetFFIMapping arg = signature->args[i]; + void *to = NULL; + + switch (arg.spec) { + case JANET_AAPCS64_GENERAL: + to = regs + arg.offset; + break; + case JANET_AAPCS64_GENERAL_REF: + to = stack + arg.offset2; + regs[arg.offset] = (uint64_t) to; + break; + case JANET_AAPCS64_SSE: + to = fp_regs + arg.offset; + break; + case JANET_AAPCS64_STACK: + to = stack + arg.offset; + break; + case JANET_AAPCS64_STACK_REF: + to = stack + arg.offset2; + uint64_t *ptr = (uint64_t *) stack + arg.offset; + *ptr = (uint64_t) to; + break; + default: + janet_panic("nyi"); + } + + if (to) { + janet_ffi_write_one(to, argv, n, arg.type, JANET_FFI_MAX_RECUR); + } + } + + switch (signature->variant) { + case 0: + retu.general_return = ((janet_aapcs64_variant_1 *)(function_pointer))( + regs[0], regs[1], regs[2], regs[3], + regs[4], regs[5], regs[6], regs[7], + fp_regs[0], fp_regs[1], fp_regs[2], fp_regs[3], + fp_regs[4], fp_regs[5], fp_regs[6], fp_regs[7]); + break; + case 1: + retu.sse_return = ((janet_aapcs64_variant_2 *)(function_pointer))( + regs[0], regs[1], regs[2], regs[3], + regs[4], regs[5], regs[6], regs[7], + fp_regs[0], fp_regs[1], fp_regs[2], fp_regs[3], + fp_regs[4], fp_regs[5], fp_regs[6], fp_regs[7]); + break; + case 2: { + retu.pointer_return = ((janet_aapcs64_variant_3 *)(function_pointer))( + regs[0], regs[1], regs[2], regs[3], + regs[4], regs[5], regs[6], regs[7], + fp_regs[0], fp_regs[1], fp_regs[2], fp_regs[3], + fp_regs[4], fp_regs[5], fp_regs[6], fp_regs[7]); + } + } + + return janet_ffi_read_one(ret_mem, signature->ret.type, JANET_FFI_MAX_RECUR); +} + +#endif + /* Allocate executable memory chunks in sizes of a page. Ideally we would keep * an allocator around so that multiple JIT allocations would point to the same * region but it isn't really worth it. */ @@ -1373,6 +1666,10 @@ JANET_CORE_FN(cfun_ffi_call, #ifdef JANET_FFI_SYSV64_ENABLED case JANET_FFI_CC_SYSV_64: return janet_ffi_sysv64(signature, function_pointer, argv); +#endif +#ifdef JANET_FFI_AAPCS64_ENABLED + case JANET_FFI_CC_AAPCS64: + return janet_ffi_aapcs64(signature, function_pointer, argv); #endif } } @@ -1442,6 +1739,10 @@ JANET_CORE_FN(cfun_ffi_get_callback_trampoline, #ifdef JANET_FFI_SYSV64_ENABLED case JANET_FFI_CC_SYSV_64: return janet_wrap_pointer(janet_ffi_sysv64_standard_callback); +#endif +#ifdef JANET_FFI_AAPCS64_ENABLED + case JANET_FFI_CC_AAPCS64: + return janet_wrap_pointer(janet_ffi_aapcs64_standard_callback); #endif } } @@ -1561,6 +1862,9 @@ JANET_CORE_FN(cfun_ffi_supported_calling_conventions, #endif #ifdef JANET_FFI_SYSV64_ENABLED janet_array_push(array, janet_ckeywordv("sysv64")); +#endif +#ifdef JANET_FFI_AAPCS64_ENABLED + janet_array_push(array, janet_ckeywordv("aapcs64")); #endif janet_array_push(array, janet_ckeywordv("none")); return janet_wrap_array(array); diff --git a/test/suite-ffi.janet b/test/suite-ffi.janet index 6ad4f70c..0a810198 100644 --- a/test/suite-ffi.janet +++ b/test/suite-ffi.janet @@ -21,7 +21,6 @@ (import ./helper :prefix "" :exit true) (start-suite) -# We should get ARM support... (def has-ffi (dyn 'ffi/native)) (def has-full-ffi (and has-ffi