From 4782a76bcab1d6ad6af4a1bc821d6b3604d5300b Mon Sep 17 00:00:00 2001 From: Calvin Rose Date: Mon, 29 May 2023 16:10:48 -0500 Subject: [PATCH] Add inital bytecode optimizations for #1163 This removes unnecessary movn, movf, lds, and a few other instructions. Any instructions that has not side effects and writes to a slot that isn't used can be removed. A number of other optimizations can follow from this: - Implement the def-aliasing-var optimization better - This function can be iterated as a fix point until no more instructions are removed. - If we implement slot renaming, then we no longer need to free slots and can simplify the initial code generation a lot. --- Makefile | 2 +- src/core/bytecode.c | 274 +++++++++++++++++++++++++++++++++++++++++++ src/core/compile.c | 4 + src/core/compile.h | 4 + src/core/regalloc.c | 10 ++ src/core/regalloc.h | 1 + src/core/specials.c | 12 +- test/suite0015.janet | 14 ++- 8 files changed, 313 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index d6d7330d..cd5cfb3f 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ LDFLAGS?=-rdynamic RUN:=$(RUN) COMMON_CFLAGS:=-std=c99 -Wall -Wextra -Isrc/include -Isrc/conf -fvisibility=hidden -fPIC -BOOT_CFLAGS:=-DJANET_BOOTSTRAP -DJANET_BUILD=$(JANET_BUILD) -O0 $(COMMON_CFLAGS) +BOOT_CFLAGS:=-DJANET_BOOTSTRAP -DJANET_BUILD=$(JANET_BUILD) -O0 $(COMMON_CFLAGS) -g BUILD_CFLAGS:=$(CFLAGS) $(COMMON_CFLAGS) # For installation diff --git a/src/core/bytecode.c b/src/core/bytecode.c index 31faa5bf..185bccab 100644 --- a/src/core/bytecode.c +++ b/src/core/bytecode.c @@ -25,6 +25,7 @@ #include #include "gc.h" #include "util.h" +#include "regalloc.h" #endif /* Look up table for instructions */ @@ -106,6 +107,279 @@ enum JanetInstructionType janet_instructions[JOP_INSTRUCTION_COUNT] = { JINT_SSS /* JOP_CANCEL, */ }; +/* Remove all noops while preserving jumps and debugging information. + * Useful as part of a filtering compiler pass. */ +void janet_bytecode_remove_noops(JanetFuncDef *def) { + + /* Get an instruction rewrite map so we can rewrite jumps */ + uint32_t *pc_map = janet_smalloc(sizeof(uint32_t) * (1 + def->bytecode_length)); + uint32_t new_bytecode_length = 0; + for (int32_t i = 0; i < def->bytecode_length; i++) { + uint32_t instr = def->bytecode[i]; + uint32_t opcode = instr & 0x7F; + pc_map[i] = new_bytecode_length; + if (opcode != JOP_NOOP) { + new_bytecode_length++; + } + } + pc_map[def->bytecode_length] = new_bytecode_length; + + /* Linear scan rewrite bytecode and sourcemap. Also fix jumps. */ + int32_t j = 0; + for (int32_t i = 0; i < def->bytecode_length; i++) { + uint32_t instr = def->bytecode[i]; + uint32_t opcode = instr & 0x7F; + int32_t old_jump_target = 0; + int32_t new_jump_target = 0; + switch (opcode) { + case JOP_NOOP: + continue; + case JOP_JUMP: + /* relative pc is in DS field of instruction */ + old_jump_target = i + (((int32_t)instr) >> 8); + new_jump_target = pc_map[old_jump_target]; + instr += (new_jump_target - old_jump_target + (i - j)) << 8; + break; + case JOP_JUMP_IF: + case JOP_JUMP_IF_NIL: + case JOP_JUMP_IF_NOT: + case JOP_JUMP_IF_NOT_NIL: + /* relative pc is in ES field of instruction */ + old_jump_target = i + (((int32_t)instr) >> 16); + new_jump_target = pc_map[old_jump_target]; + instr += (new_jump_target - old_jump_target + (i - j)) << 16; + break; + default: + break; + } + def->bytecode[j] = instr; + if (def->sourcemap != NULL) { + def->sourcemap[j] = def->sourcemap[i]; + } + j++; + } + + /* Rewrite symbolmap */ + for (int32_t i = 0; i < def->symbolmap_length; i++) { + JanetSymbolMap *sm = def->symbolmap + i; + /* Don't rewrite upvalue mappings */ + if (sm->birth_pc < UINT32_MAX) { + sm->birth_pc = pc_map[sm->birth_pc]; + sm->death_pc = pc_map[sm->death_pc]; + } + } + + def->bytecode_length = new_bytecode_length; + janet_sfree(pc_map); +} + +/* Remove redundant loads, moves and other instructions if possible and convert them to + * noops. Input is assumed valid bytecode. */ +void janet_bytecode_movopt(JanetFuncDef *def) { + JanetcRegisterAllocator ra; + janetc_regalloc_init(&ra); + + /* Look for slots that have writes but no reads (and aren't in the closure bitset). */ + if (def->closure_bitset != NULL) { + for (int32_t i = 0; i < def->slotcount; i++) { + int32_t index = i >> 5; + uint32_t mask = 1U << (((uint32_t) i) & 31); + if (def->closure_bitset[index] & mask) { + janetc_regalloc_touch(&ra, i); + } + } + } + +#define AA ((instr >> 8) & 0xFF) +#define BB ((instr >> 16) & 0xFF) +#define CC (instr >> 24) +#define DD (instr >> 8) +#define EE (instr >> 16) + + /* Check reads and writes */ + for (int32_t i = 0; i < def->bytecode_length; i++) { + uint32_t instr = def->bytecode[i]; + switch (instr & 0x7F) { + + /* Group instructions my how they read from slots */ + + /* No reads or writes */ + default: + janet_assert(0, "unhandled instruction"); + case JOP_JUMP: + case JOP_NOOP: + case JOP_RETURN_NIL: + /* Write A */ + case JOP_LOAD_INTEGER: + case JOP_LOAD_CONSTANT: + case JOP_LOAD_UPVALUE: + case JOP_CLOSURE: + /* Write D */ + case JOP_LOAD_NIL: + case JOP_LOAD_TRUE: + case JOP_LOAD_FALSE: + case JOP_LOAD_SELF: + case JOP_MAKE_ARRAY: + case JOP_MAKE_BUFFER: + case JOP_MAKE_STRING: + case JOP_MAKE_STRUCT: + case JOP_MAKE_TABLE: + case JOP_MAKE_TUPLE: + case JOP_MAKE_BRACKET_TUPLE: + break; + + /* Read A */ + case JOP_ERROR: + case JOP_TYPECHECK: + case JOP_JUMP_IF: + case JOP_JUMP_IF_NOT: + case JOP_JUMP_IF_NIL: + case JOP_JUMP_IF_NOT_NIL: + case JOP_SET_UPVALUE: + /* Write E, Read A */ + case JOP_MOVE_FAR: + janetc_regalloc_touch(&ra, AA); + break; + + /* Read B */ + case JOP_SIGNAL: + /* Write A, Read B */ + case JOP_ADD_IMMEDIATE: + case JOP_MULTIPLY_IMMEDIATE: + case JOP_DIVIDE_IMMEDIATE: + case JOP_SHIFT_LEFT_IMMEDIATE: + case JOP_SHIFT_RIGHT_IMMEDIATE: + case JOP_SHIFT_RIGHT_UNSIGNED_IMMEDIATE: + case JOP_GREATER_THAN_IMMEDIATE: + case JOP_LESS_THAN_IMMEDIATE: + case JOP_EQUALS_IMMEDIATE: + case JOP_NOT_EQUALS_IMMEDIATE: + case JOP_GET_INDEX: + janetc_regalloc_touch(&ra, BB); + break; + + /* Read D */ + case JOP_RETURN: + case JOP_PUSH: + case JOP_PUSH_ARRAY: + case JOP_TAILCALL: + janetc_regalloc_touch(&ra, DD); + break; + + /* Write A, Read E */ + case JOP_MOVE_NEAR: + case JOP_LENGTH: + case JOP_BNOT: + case JOP_CALL: + janetc_regalloc_touch(&ra, EE); + break; + + /* Read A, B */ + case JOP_PUT_INDEX: + janetc_regalloc_touch(&ra, AA); + janetc_regalloc_touch(&ra, BB); + break; + + /* Read A, E */ + case JOP_PUSH_2: + janetc_regalloc_touch(&ra, AA); + janetc_regalloc_touch(&ra, EE); + break; + + /* Read B, C */ + case JOP_PROPAGATE: + /* Write A, Read B and C */ + case JOP_BAND: + case JOP_BOR: + case JOP_BXOR: + case JOP_ADD: + case JOP_SUBTRACT: + case JOP_MULTIPLY: + case JOP_DIVIDE: + case JOP_MODULO: + case JOP_REMAINDER: + case JOP_SHIFT_LEFT: + case JOP_SHIFT_RIGHT: + case JOP_SHIFT_RIGHT_UNSIGNED: + case JOP_GREATER_THAN: + case JOP_LESS_THAN: + case JOP_EQUALS: + case JOP_COMPARE: + case JOP_IN: + case JOP_GET: + case JOP_GREATER_THAN_EQUAL: + case JOP_LESS_THAN_EQUAL: + case JOP_NOT_EQUALS: + case JOP_CANCEL: + case JOP_RESUME: + case JOP_NEXT: + janetc_regalloc_touch(&ra, BB); + janetc_regalloc_touch(&ra, CC); + break; + + /* Read A, B, C */ + case JOP_PUT: + case JOP_PUSH_3: + janetc_regalloc_touch(&ra, AA); + janetc_regalloc_touch(&ra, BB); + janetc_regalloc_touch(&ra, CC); + break; + } + } + + /* Iterate and set noops on instructions that make writes that no one ever reads. + * Only set noops for instructions with no side effects - moves, loads, etc. that can't + * raise errors (outside of systemic errors like oom or stack overflow). */ + for (int32_t i = 0; i < def->bytecode_length; i++) { + uint32_t instr = def->bytecode[i]; + switch (instr & 0x7F) { + default: + break; + /* Write D */ + case JOP_LOAD_NIL: + case JOP_LOAD_TRUE: + case JOP_LOAD_FALSE: + case JOP_LOAD_SELF: + case JOP_MAKE_ARRAY: + case JOP_MAKE_TUPLE: + case JOP_MAKE_BRACKET_TUPLE: { + if (!janetc_regalloc_check(&ra, DD)) { + def->bytecode[i] = JOP_NOOP; + } + } + break; + /* Write E, Read A */ + case JOP_MOVE_FAR: { + if (!janetc_regalloc_check(&ra, EE)) { + def->bytecode[i] = JOP_NOOP; + } + } + break; + /* Write A, Read E */ + case JOP_MOVE_NEAR: + /* Write A, Read B */ + case JOP_GET_INDEX: + /* Write A */ + case JOP_LOAD_INTEGER: + case JOP_LOAD_CONSTANT: + case JOP_LOAD_UPVALUE: + case JOP_CLOSURE: { + if (!janetc_regalloc_check(&ra, AA)) { + def->bytecode[i] = JOP_NOOP; + } + } + break; + } + } + + janetc_regalloc_deinit(&ra); +#undef AA +#undef BB +#undef CC +#undef DD +#undef EE +} + /* Verify some bytecode */ int janet_verify(JanetFuncDef *def) { int vargs = !!(def->flags & JANET_FUNCDEF_FLAG_VARARG); diff --git a/src/core/compile.c b/src/core/compile.c index 656cdd3d..29a57273 100644 --- a/src/core/compile.c +++ b/src/core/compile.c @@ -989,6 +989,10 @@ JanetFuncDef *janetc_pop_funcdef(JanetCompiler *c) { /* Pop the scope */ janetc_popscope(c); + /* Do basic optimization */ + janet_bytecode_movopt(def); + janet_bytecode_remove_noops(def); + return def; } diff --git a/src/core/compile.h b/src/core/compile.h index 39dfa8a8..5863c0b8 100644 --- a/src/core/compile.h +++ b/src/core/compile.h @@ -267,4 +267,8 @@ JanetSlot janetc_cslot(Janet x); /* Search for a symbol */ JanetSlot janetc_resolve(JanetCompiler *c, const uint8_t *sym); +/* Bytecode optimization */ +void janet_bytecode_movopt(JanetFuncDef *def); +void janet_bytecode_remove_noops(JanetFuncDef *def); + #endif diff --git a/src/core/regalloc.c b/src/core/regalloc.c index 1b8b7071..5df2a242 100644 --- a/src/core/regalloc.c +++ b/src/core/regalloc.c @@ -27,6 +27,8 @@ #include "util.h" #endif +/* The JanetRegisterAllocator is really just a bitset. */ + void janetc_regalloc_init(JanetcRegisterAllocator *ra) { ra->chunks = NULL; ra->count = 0; @@ -139,6 +141,14 @@ void janetc_regalloc_free(JanetcRegisterAllocator *ra, int32_t reg) { ra->chunks[chunk] &= ~ithbit(bit); } +/* Check if a register is set. */ +int janetc_regalloc_check(JanetcRegisterAllocator *ra, int32_t reg) { + int32_t chunk = reg >> 5; + int32_t bit = reg & 0x1F; + while (chunk >= ra->count) pushchunk(ra); + return !!(ra->chunks[chunk] & ithbit(bit)); +} + /* Get a register that will fit in 8 bits (< 256). Do not call this * twice with the same value of nth without calling janetc_regalloc_free * on the returned register before. */ diff --git a/src/core/regalloc.h b/src/core/regalloc.h index b7521a52..c02e4757 100644 --- a/src/core/regalloc.h +++ b/src/core/regalloc.h @@ -56,5 +56,6 @@ int32_t janetc_regalloc_temp(JanetcRegisterAllocator *ra, JanetcRegisterTemp nth void janetc_regalloc_freetemp(JanetcRegisterAllocator *ra, int32_t reg, JanetcRegisterTemp nth); void janetc_regalloc_clone(JanetcRegisterAllocator *dest, JanetcRegisterAllocator *src); void janetc_regalloc_touch(JanetcRegisterAllocator *ra, int32_t reg); +int janetc_regalloc_check(JanetcRegisterAllocator *ra, int32_t reg); #endif diff --git a/src/core/specials.c b/src/core/specials.c index c186245f..30977416 100644 --- a/src/core/specials.c +++ b/src/core/specials.c @@ -354,7 +354,17 @@ static int namelocal(JanetCompiler *c, const uint8_t *head, int32_t flags, Janet int isUnnamedRegister = !(ret.flags & JANET_SLOT_NAMED) && ret.index > 0 && ret.envindex >= 0; - if (!isUnnamedRegister) { + /* optimization for `(def x my-def)` - don't emit a movn/movf instruction, we can just alias my-def */ + /* TODO - implement optimization for `(def x my-var)` correctly as well w/ de-aliasing */ + int canAlias = !(flags & JANET_SLOT_MUTABLE) && + !(ret.flags & JANET_SLOT_MUTABLE) && + (ret.flags & JANET_SLOT_NAMED) && + (ret.index >= 0) && + (ret.envindex == -1); + if (canAlias) { + ret.flags &= ~JANET_SLOT_MUTABLE; + isUnnamedRegister = 1; /* don't free slot after use - is an alias for another slot */ + } else if (!isUnnamedRegister) { /* Slot is not able to be named */ JanetSlot localslot = janetc_farslot(c); janetc_copy(c, localslot, ret); diff --git a/test/suite0015.janet b/test/suite0015.janet index b747389a..bb00a9b6 100644 --- a/test/suite0015.janet +++ b/test/suite0015.janet @@ -4,7 +4,7 @@ (start-suite 15) (assert (deep= (in (disasm (defn a [] (def x 10) x)) :symbolmap) - @[[0 3 0 'a] [1 3 1 'x]]) + @[[0 2 0 'a] [0 2 1 'x]]) "symbolslots when *debug* is true") (defn a [arg] @@ -33,11 +33,11 @@ (def y 20) (def z 30) (+ x y z)))) :symbolmap) - @[[0 7 0 'arg] - [0 7 1 'a] - [1 7 2 'x] - [2 7 3 'y] - [3 7 4 'z]]) + @[[0 6 0 'arg] + [0 6 1 'a] + [0 6 2 'x] + [1 6 3 'y] + [2 6 4 'z]]) "arg & inner symbolslots") # buffer/push-at @@ -45,4 +45,6 @@ (assert (deep= @"abc456789" (buffer/push-at @"abc123" 3 "456789")) "buffer/push-at 2") (assert (deep= @"abc423" (buffer/push-at @"abc123" 3 "4")) "buffer/push-at 3") +(assert (= 10 (do (var x 10) (def y x) (++ x) y)) "no invalid aliasing") + (end-suite)