1
0
mirror of https://github.com/janet-lang/janet synced 2024-12-25 07:50:27 +00:00

Merge branch 'bytecode_opt'

This commit is contained in:
Calvin Rose 2023-05-31 07:57:13 -05:00
commit 4f8f7f66ee
8 changed files with 322 additions and 8 deletions

View File

@ -51,7 +51,7 @@ LDFLAGS?=-rdynamic
RUN:=$(RUN)
COMMON_CFLAGS:=-std=c99 -Wall -Wextra -Isrc/include -Isrc/conf -fvisibility=hidden -fPIC
BOOT_CFLAGS:=-DJANET_BOOTSTRAP -DJANET_BUILD=$(JANET_BUILD) -O0 $(COMMON_CFLAGS)
BOOT_CFLAGS:=-DJANET_BOOTSTRAP -DJANET_BUILD=$(JANET_BUILD) -O0 $(COMMON_CFLAGS) -g
BUILD_CFLAGS:=$(CFLAGS) $(COMMON_CFLAGS)
# For installation

View File

@ -25,6 +25,7 @@
#include <janet.h>
#include "gc.h"
#include "util.h"
#include "regalloc.h"
#endif
/* Look up table for instructions */
@ -106,6 +107,288 @@ enum JanetInstructionType janet_instructions[JOP_INSTRUCTION_COUNT] = {
JINT_SSS /* JOP_CANCEL, */
};
/* Remove all noops while preserving jumps and debugging information.
* Useful as part of a filtering compiler pass. */
void janet_bytecode_remove_noops(JanetFuncDef *def) {
/* Get an instruction rewrite map so we can rewrite jumps */
uint32_t *pc_map = janet_smalloc(sizeof(uint32_t) * (1 + def->bytecode_length));
uint32_t new_bytecode_length = 0;
for (int32_t i = 0; i < def->bytecode_length; i++) {
uint32_t instr = def->bytecode[i];
uint32_t opcode = instr & 0x7F;
pc_map[i] = new_bytecode_length;
if (opcode != JOP_NOOP) {
new_bytecode_length++;
}
}
pc_map[def->bytecode_length] = new_bytecode_length;
/* Linear scan rewrite bytecode and sourcemap. Also fix jumps. */
int32_t j = 0;
for (int32_t i = 0; i < def->bytecode_length; i++) {
uint32_t instr = def->bytecode[i];
uint32_t opcode = instr & 0x7F;
int32_t old_jump_target = 0;
int32_t new_jump_target = 0;
switch (opcode) {
case JOP_NOOP:
continue;
case JOP_JUMP:
/* relative pc is in DS field of instruction */
old_jump_target = i + (((int32_t)instr) >> 8);
new_jump_target = pc_map[old_jump_target];
instr += (new_jump_target - old_jump_target + (i - j)) << 8;
break;
case JOP_JUMP_IF:
case JOP_JUMP_IF_NIL:
case JOP_JUMP_IF_NOT:
case JOP_JUMP_IF_NOT_NIL:
/* relative pc is in ES field of instruction */
old_jump_target = i + (((int32_t)instr) >> 16);
new_jump_target = pc_map[old_jump_target];
instr += (new_jump_target - old_jump_target + (i - j)) << 16;
break;
default:
break;
}
def->bytecode[j] = instr;
if (def->sourcemap != NULL) {
def->sourcemap[j] = def->sourcemap[i];
}
j++;
}
/* Rewrite symbolmap */
for (int32_t i = 0; i < def->symbolmap_length; i++) {
JanetSymbolMap *sm = def->symbolmap + i;
/* Don't rewrite upvalue mappings */
if (sm->birth_pc < UINT32_MAX) {
sm->birth_pc = pc_map[sm->birth_pc];
sm->death_pc = pc_map[sm->death_pc];
}
}
def->bytecode_length = new_bytecode_length;
janet_sfree(pc_map);
}
/* Remove redundant loads, moves and other instructions if possible and convert them to
* noops. Input is assumed valid bytecode. */
void janet_bytecode_movopt(JanetFuncDef *def) {
JanetcRegisterAllocator ra;
int recur = 1;
/* Iterate this until no more instructions can be removed. */
while (recur) {
janetc_regalloc_init(&ra);
/* Look for slots that have writes but no reads (and aren't in the closure bitset). */
if (def->closure_bitset != NULL) {
for (int32_t i = 0; i < def->slotcount; i++) {
int32_t index = i >> 5;
uint32_t mask = 1U << (((uint32_t) i) & 31);
if (def->closure_bitset[index] & mask) {
janetc_regalloc_touch(&ra, i);
}
}
}
#define AA ((instr >> 8) & 0xFF)
#define BB ((instr >> 16) & 0xFF)
#define CC (instr >> 24)
#define DD (instr >> 8)
#define EE (instr >> 16)
/* Check reads and writes */
for (int32_t i = 0; i < def->bytecode_length; i++) {
uint32_t instr = def->bytecode[i];
switch (instr & 0x7F) {
/* Group instructions my how they read from slots */
/* No reads or writes */
default:
janet_assert(0, "unhandled instruction");
case JOP_JUMP:
case JOP_NOOP:
case JOP_RETURN_NIL:
/* Write A */
case JOP_LOAD_INTEGER:
case JOP_LOAD_CONSTANT:
case JOP_LOAD_UPVALUE:
case JOP_CLOSURE:
/* Write D */
case JOP_LOAD_NIL:
case JOP_LOAD_TRUE:
case JOP_LOAD_FALSE:
case JOP_LOAD_SELF:
case JOP_MAKE_ARRAY:
case JOP_MAKE_BUFFER:
case JOP_MAKE_STRING:
case JOP_MAKE_STRUCT:
case JOP_MAKE_TABLE:
case JOP_MAKE_TUPLE:
case JOP_MAKE_BRACKET_TUPLE:
break;
/* Read A */
case JOP_ERROR:
case JOP_TYPECHECK:
case JOP_JUMP_IF:
case JOP_JUMP_IF_NOT:
case JOP_JUMP_IF_NIL:
case JOP_JUMP_IF_NOT_NIL:
case JOP_SET_UPVALUE:
/* Write E, Read A */
case JOP_MOVE_FAR:
janetc_regalloc_touch(&ra, AA);
break;
/* Read B */
case JOP_SIGNAL:
/* Write A, Read B */
case JOP_ADD_IMMEDIATE:
case JOP_MULTIPLY_IMMEDIATE:
case JOP_DIVIDE_IMMEDIATE:
case JOP_SHIFT_LEFT_IMMEDIATE:
case JOP_SHIFT_RIGHT_IMMEDIATE:
case JOP_SHIFT_RIGHT_UNSIGNED_IMMEDIATE:
case JOP_GREATER_THAN_IMMEDIATE:
case JOP_LESS_THAN_IMMEDIATE:
case JOP_EQUALS_IMMEDIATE:
case JOP_NOT_EQUALS_IMMEDIATE:
case JOP_GET_INDEX:
janetc_regalloc_touch(&ra, BB);
break;
/* Read D */
case JOP_RETURN:
case JOP_PUSH:
case JOP_PUSH_ARRAY:
case JOP_TAILCALL:
janetc_regalloc_touch(&ra, DD);
break;
/* Write A, Read E */
case JOP_MOVE_NEAR:
case JOP_LENGTH:
case JOP_BNOT:
case JOP_CALL:
janetc_regalloc_touch(&ra, EE);
break;
/* Read A, B */
case JOP_PUT_INDEX:
janetc_regalloc_touch(&ra, AA);
janetc_regalloc_touch(&ra, BB);
break;
/* Read A, E */
case JOP_PUSH_2:
janetc_regalloc_touch(&ra, AA);
janetc_regalloc_touch(&ra, EE);
break;
/* Read B, C */
case JOP_PROPAGATE:
/* Write A, Read B and C */
case JOP_BAND:
case JOP_BOR:
case JOP_BXOR:
case JOP_ADD:
case JOP_SUBTRACT:
case JOP_MULTIPLY:
case JOP_DIVIDE:
case JOP_MODULO:
case JOP_REMAINDER:
case JOP_SHIFT_LEFT:
case JOP_SHIFT_RIGHT:
case JOP_SHIFT_RIGHT_UNSIGNED:
case JOP_GREATER_THAN:
case JOP_LESS_THAN:
case JOP_EQUALS:
case JOP_COMPARE:
case JOP_IN:
case JOP_GET:
case JOP_GREATER_THAN_EQUAL:
case JOP_LESS_THAN_EQUAL:
case JOP_NOT_EQUALS:
case JOP_CANCEL:
case JOP_RESUME:
case JOP_NEXT:
janetc_regalloc_touch(&ra, BB);
janetc_regalloc_touch(&ra, CC);
break;
/* Read A, B, C */
case JOP_PUT:
case JOP_PUSH_3:
janetc_regalloc_touch(&ra, AA);
janetc_regalloc_touch(&ra, BB);
janetc_regalloc_touch(&ra, CC);
break;
}
}
/* Iterate and set noops on instructions that make writes that no one ever reads.
* Only set noops for instructions with no side effects - moves, loads, etc. that can't
* raise errors (outside of systemic errors like oom or stack overflow). */
recur = 0;
for (int32_t i = 0; i < def->bytecode_length; i++) {
uint32_t instr = def->bytecode[i];
switch (instr & 0x7F) {
default:
break;
/* Write D */
case JOP_LOAD_NIL:
case JOP_LOAD_TRUE:
case JOP_LOAD_FALSE:
case JOP_LOAD_SELF:
case JOP_MAKE_ARRAY:
case JOP_MAKE_TUPLE:
case JOP_MAKE_BRACKET_TUPLE: {
if (!janetc_regalloc_check(&ra, DD)) {
def->bytecode[i] = JOP_NOOP;
recur = 1;
}
}
break;
/* Write E, Read A */
case JOP_MOVE_FAR: {
if (!janetc_regalloc_check(&ra, EE)) {
def->bytecode[i] = JOP_NOOP;
recur = 1;
}
}
break;
/* Write A, Read E */
case JOP_MOVE_NEAR:
/* Write A, Read B */
case JOP_GET_INDEX:
/* Write A */
case JOP_LOAD_INTEGER:
case JOP_LOAD_CONSTANT:
case JOP_LOAD_UPVALUE:
case JOP_CLOSURE: {
if (!janetc_regalloc_check(&ra, AA)) {
def->bytecode[i] = JOP_NOOP;
recur = 1;
}
}
break;
}
}
janetc_regalloc_deinit(&ra);
#undef AA
#undef BB
#undef CC
#undef DD
#undef EE
}
}
/* Verify some bytecode */
int janet_verify(JanetFuncDef *def) {
int vargs = !!(def->flags & JANET_FUNCDEF_FLAG_VARARG);

View File

@ -989,6 +989,10 @@ JanetFuncDef *janetc_pop_funcdef(JanetCompiler *c) {
/* Pop the scope */
janetc_popscope(c);
/* Do basic optimization */
janet_bytecode_movopt(def);
janet_bytecode_remove_noops(def);
return def;
}

View File

@ -267,4 +267,8 @@ JanetSlot janetc_cslot(Janet x);
/* Search for a symbol */
JanetSlot janetc_resolve(JanetCompiler *c, const uint8_t *sym);
/* Bytecode optimization */
void janet_bytecode_movopt(JanetFuncDef *def);
void janet_bytecode_remove_noops(JanetFuncDef *def);
#endif

View File

@ -27,6 +27,8 @@
#include "util.h"
#endif
/* The JanetRegisterAllocator is really just a bitset. */
void janetc_regalloc_init(JanetcRegisterAllocator *ra) {
ra->chunks = NULL;
ra->count = 0;
@ -139,6 +141,14 @@ void janetc_regalloc_free(JanetcRegisterAllocator *ra, int32_t reg) {
ra->chunks[chunk] &= ~ithbit(bit);
}
/* Check if a register is set. */
int janetc_regalloc_check(JanetcRegisterAllocator *ra, int32_t reg) {
int32_t chunk = reg >> 5;
int32_t bit = reg & 0x1F;
while (chunk >= ra->count) pushchunk(ra);
return !!(ra->chunks[chunk] & ithbit(bit));
}
/* Get a register that will fit in 8 bits (< 256). Do not call this
* twice with the same value of nth without calling janetc_regalloc_free
* on the returned register before. */

View File

@ -56,5 +56,6 @@ int32_t janetc_regalloc_temp(JanetcRegisterAllocator *ra, JanetcRegisterTemp nth
void janetc_regalloc_freetemp(JanetcRegisterAllocator *ra, int32_t reg, JanetcRegisterTemp nth);
void janetc_regalloc_clone(JanetcRegisterAllocator *dest, JanetcRegisterAllocator *src);
void janetc_regalloc_touch(JanetcRegisterAllocator *ra, int32_t reg);
int janetc_regalloc_check(JanetcRegisterAllocator *ra, int32_t reg);
#endif

View File

@ -354,7 +354,17 @@ static int namelocal(JanetCompiler *c, const uint8_t *head, int32_t flags, Janet
int isUnnamedRegister = !(ret.flags & JANET_SLOT_NAMED) &&
ret.index > 0 &&
ret.envindex >= 0;
if (!isUnnamedRegister) {
/* optimization for `(def x my-def)` - don't emit a movn/movf instruction, we can just alias my-def */
/* TODO - implement optimization for `(def x my-var)` correctly as well w/ de-aliasing */
int canAlias = !(flags & JANET_SLOT_MUTABLE) &&
!(ret.flags & JANET_SLOT_MUTABLE) &&
(ret.flags & JANET_SLOT_NAMED) &&
(ret.index >= 0) &&
(ret.envindex == -1);
if (canAlias) {
ret.flags &= ~JANET_SLOT_MUTABLE;
isUnnamedRegister = 1; /* don't free slot after use - is an alias for another slot */
} else if (!isUnnamedRegister) {
/* Slot is not able to be named */
JanetSlot localslot = janetc_farslot(c);
janetc_copy(c, localslot, ret);

View File

@ -4,7 +4,7 @@
(start-suite 15)
(assert (deep= (in (disasm (defn a [] (def x 10) x)) :symbolmap)
@[[0 3 0 'a] [1 3 1 'x]])
@[[0 2 0 'a] [0 2 1 'x]])
"symbolslots when *debug* is true")
(defn a [arg]
@ -33,11 +33,11 @@
(def y 20)
(def z 30)
(+ x y z)))) :symbolmap)
@[[0 7 0 'arg]
[0 7 1 'a]
[1 7 2 'x]
[2 7 3 'y]
[3 7 4 'z]])
@[[0 6 0 'arg]
[0 6 1 'a]
[0 6 2 'x]
[1 6 3 'y]
[2 6 4 'z]])
"arg & inner symbolslots")
# buffer/push-at
@ -45,4 +45,6 @@
(assert (deep= @"abc456789" (buffer/push-at @"abc123" 3 "456789")) "buffer/push-at 2")
(assert (deep= @"abc423" (buffer/push-at @"abc123" 3 "4")) "buffer/push-at 3")
(assert (= 10 (do (var x 10) (def y x) (++ x) y)) "no invalid aliasing")
(end-suite)