From d68eae9592459d888c0f5ae8996dd723ab0c3044 Mon Sep 17 00:00:00 2001 From: bakpakin Date: Sat, 20 Jan 2018 17:19:47 -0500 Subject: [PATCH] Add bytecode verification --- CMakeLists.txt | 10 +- src/assembler/asm.c | 44 ++----- src/compiler/compile.c | 3 +- src/core/bytecode.c | 246 +++++++++++++++++++++++++++++++++++ src/core/stl.c | 3 +- src/include/dst/dst.h | 1 + src/include/dst/dstopcodes.h | 38 +++++- src/parser/parse.c | 6 +- test/suite0.dst | 9 ++ 9 files changed, 312 insertions(+), 48 deletions(-) create mode 100644 src/core/bytecode.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 52f643da..6cd5184d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,7 @@ set(CORE_SOURCES src/core/abstract.c src/core/array.c src/core/buffer.c +src/core/bytecode.c src/core/fiber.c src/core/gc.c src/core/io.c @@ -95,14 +96,13 @@ ${PARSER_SOURCES} # Build the executable add_executable(${TARGET_NAME} ${SOURCES}) target_link_libraries(${TARGET_NAME} m dl) -# TODO dont do this on windows -target_link_libraries(${TARGET_NAME} dl) # Build some modules -add_library(dstvm MODULE ${CORE_SOURCES} ${ASSEMBLER_SOURCES}) -target_link_libraries(dstvm m) +add_library(dstvm SHARED ${CORE_SOURCES} ${ASSEMBLER_SOURCES}) +target_link_libraries(dstvm m dl) -#add_library(dstasm MODULE ${ASSEMBLER_SOURCES}) +add_library(dstasm SHARED ${ASSEMBLER_SOURCES}) +target_link_libraries(dstasm dstvm) #add_library(dstparser MODULE ${PARSER_SOURCES}) #target_link_libraries(dstparser m) diff --git a/src/assembler/asm.c b/src/assembler/asm.c index d0886499..e8d2b43b 100644 --- a/src/assembler/asm.c +++ b/src/assembler/asm.c @@ -27,20 +27,6 @@ #include #include -/* Bytecode op argument types */ - -typedef enum DstOpArgType DstOpArgType; -enum DstOpArgType { - DST_OAT_SLOT, - DST_OAT_ENVIRONMENT, - DST_OAT_CONSTANT, - DST_OAT_INTEGER, - DST_OAT_TYPE, - DST_OAT_SIMPLETYPE, - DST_OAT_LABEL, - DST_OAT_FUNCDEF -}; - /* Convert a slot to to an integer for bytecode */ /* Types of instructions (some of them) */ @@ -56,25 +42,6 @@ enum DstOpArgType { * _sc - op.dest.CC.CC (load-constant, closure) */ -/* Various types of instructions */ -typedef enum DstInstructionType DstInstructionType; -enum DstInstructionType { - DIT_0, /* No args */ - DIT_S, /* One slot */ - DIT_L, /* One label */ - DIT_SS, /* Two slots */ - DIT_SL, - DIT_ST, - DIT_SI, - DIT_SD, /* Closures (D for funcDef) */ - DIT_SU, /* Unsigned */ - DIT_SSS, - DIT_SSI, - DIT_SSU, - DIT_SES, - DIT_SC -}; - /* Definition for an instruction in the assembler */ typedef struct DstInstructionDef DstInstructionDef; struct DstInstructionDef { @@ -704,10 +671,17 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, Dst source, int flags) { } } - /* Finish everything and return funcdef */ - dst_asm_deinit(&a); + /* Set environments */ def->environments = realloc(def->environments, def->environments_length * sizeof(int32_t)); + + /* Verify the func def */ + if (dst_verify(def)) { + dst_asm_error(&a, "invalid assembly"); + } + + /* Finish everything and return funcdef */ + dst_asm_deinit(&a); result.funcdef = def; result.status = DST_ASSEMBLE_OK; return result; diff --git a/src/compiler/compile.c b/src/compiler/compile.c index 94d23cde..29b2ce6b 100644 --- a/src/compiler/compile.c +++ b/src/compiler/compile.c @@ -889,7 +889,8 @@ DstCompileResult dst_compile(Dst source, DstTable *env, int flags) { dstc_value(fopts, source); if (c.result.status == DST_COMPILE_OK) { - c.result.funcdef = dstc_pop_funcdef(&c); + DstFuncDef *def = dstc_pop_funcdef(&c); + c.result.funcdef = def; } dstc_deinit(&c); diff --git a/src/core/bytecode.c b/src/core/bytecode.c new file mode 100644 index 00000000..1e9f2aac --- /dev/null +++ b/src/core/bytecode.c @@ -0,0 +1,246 @@ +/* +* Copyright (c) 2017 Calvin Rose +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to +* deal in the Software without restriction, including without limitation the +* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +* sell copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +*/ + +#include +#include + +DstInstructionType dst_instructions[DOP_INSTRUCTION_COUNT] = { + DIT_0, /* DOP_NOOP, */ + DIT_S, /* DOP_ERROR, */ + DIT_ST, /* DOP_TYPECHECK, */ + DIT_S, /* DOP_RETURN, */ + DIT_0, /* DOP_RETURN_NIL, */ + DIT_SSS, /* DOP_ADD_INTEGER, */ + DIT_SSI, /* DOP_ADD_IMMEDIATE, */ + DIT_SSS, /* DOP_ADD_REAL, */ + DIT_SSS, /* DOP_ADD, */ + DIT_SSS, /* DOP_SUBTRACT_INTEGER, */ + DIT_SSS, /* DOP_SUBTRACT_REAL, */ + DIT_SSS, /* DOP_SUBTRACT, */ + DIT_SSS, /* DOP_MULTIPLY_INTEGER, */ + DIT_SSI, /* DOP_MULTIPLY_IMMEDIATE, */ + DIT_SSS, /* DOP_MULTIPLY_REAL, */ + DIT_SSS, /* DOP_MULTIPLY, */ + DIT_SSS, /* DOP_DIVIDE_INTEGER, */ + DIT_SSI, /* DOP_DIVIDE_IMMEDIATE, */ + DIT_SSS, /* DOP_DIVIDE_REAL, */ + DIT_SSS, /* DOP_DIVIDE, */ + DIT_SSS, /* DOP_BAND, */ + DIT_SSS, /* DOP_BOR, */ + DIT_SSS, /* DOP_BXOR, */ + DIT_SS, /* DOP_BNOT, */ + DIT_SSS, /* DOP_SHIFT_LEFT, */ + DIT_SSI, /* DOP_SHIFT_LEFT_IMMEDIATE, */ + DIT_SSS, /* DOP_SHIFT_RIGHT, */ + DIT_SSI, /* DOP_SHIFT_RIGHT_IMMEDIATE, */ + DIT_SSS, /* DOP_SHIFT_RIGHT_UNSIGNED, */ + DIT_SSU, /* DOP_SHIFT_RIGHT_UNSIGNED_IMMEDIATE, */ + DIT_SS, /* DOP_MOVE_FAR, */ + DIT_SS, /* DOP_MOVE_NEAR, */ + DIT_L, /* DOP_JUMP, */ + DIT_SL, /* DOP_JUMP_IF, */ + DIT_SL, /* DOP_JUMP_IF_NOT, */ + DIT_SSS, /* DOP_GREATER_THAN, */ + DIT_SSS, /* DOP_LESS_THAN, */ + DIT_SSS, /* DOP_EQUALS, */ + DIT_SSS, /* DOP_COMPARE, */ + DIT_S, /* DOP_LOAD_NIL, */ + DIT_S, /* DOP_LOAD_TRUE, */ + DIT_S, /* DOP_LOAD_FALSE, */ + DIT_SI, /* DOP_LOAD_INTEGER, */ + DIT_SC, /* DOP_LOAD_CONSTANT, */ + DIT_SES, /* DOP_LOAD_UPVALUE, */ + DIT_S, /* DOP_LOAD_SELF, */ + DIT_SES, /* DOP_SET_UPVALUE, */ + DIT_SD, /* DOP_CLOSURE, */ + DIT_S, /* DOP_PUSH, */ + DIT_SS, /* DOP_PUSH_2, */ + DIT_SSS, /* DOP_PUSH_3, */ + DIT_S, /* DOP_PUSH_ARRAY, */ + DIT_SS, /* DOP_CALL, */ + DIT_S, /* DOP_TAILCALL, */ + DIT_SSS, /* DOP_TRANSFER, */ + DIT_SSS, /* DOP_GET, */ + DIT_SSS, /* DOP_PUT, */ + DIT_SSU, /* DOP_GET_INDEX, */ + DIT_SSU, /* DOP_PUT_INDEX, */ + DIT_SS /* DOP_LENGTH */ +}; + +/* Hold state in stack during the breadth first traversal */ +typedef struct Node Node; +struct Node { + DstFuncDef *def; + int32_t index; +}; + +/* An in memory stack of FuncDefs to verify */ + +/* Thread local */ +static Node *stack = NULL; +static int32_t stackcap = 0; +static int32_t stackcount = 0; + +/* Push a Node to the stack */ +static void push(DstFuncDef *def, int32_t index) { + Node n; + n.def = def; + n.index = index; + if (stackcount >= stackcap) { + stackcap = 2 * stackcount + 2; + stack = realloc(stack, sizeof(Node) * stackcap); + if (!stack) { + DST_OUT_OF_MEMORY; + } + } + stack[stackcount++] = n; +} + +/* Verify some bytecode */ +static int32_t dst_verify_one(DstFuncDef *def) { + int vargs = def->flags & DST_FUNCDEF_FLAG_VARARG; + int32_t i; + int32_t maxslot = def->arity + vargs; + int32_t sc = def->slotcount; + + if (def->bytecode_length == 0) return 1; + + if (maxslot > sc) return 2; + + /* Verify each instruction */ + for (i = 0; i < def->bytecode_length; i++) { + uint32_t instr = def->bytecode[i]; + /* Check for invalid instructions */ + if ((instr & 0xFF) >= DOP_INSTRUCTION_COUNT) { + return 3; + } + DstInstructionType type = dst_instructions[instr & 0xFF]; + switch (type) { + case DIT_0: + continue; + case DIT_S: + { + if ((int32_t)(instr >> 8) >= sc) return 4; + continue; + } + case DIT_SI: + case DIT_SU: + case DIT_ST: + { + if ((int32_t)((instr >> 8) & 0xFF) >= sc) return 4; + continue; + } + case DIT_L: + { + int32_t jumpdest = i + (((int32_t)instr) >> 8); + if (jumpdest < 0 || jumpdest >= def->bytecode_length) return 5; + continue; + } + case DIT_SS: + { + if ((int32_t)((instr >> 8) & 0xFF) >= sc || + (int32_t)(instr >> 16) >= sc) return 4; + continue; + } + case DIT_SSI: + case DIT_SSU: + { + if ((int32_t)((instr >> 8) & 0xFF) >= sc || + (int32_t)((instr >> 16) & 0xFF) >= sc) return 4; + continue; + } + case DIT_SL: + { + int32_t jumpdest = i + (((int32_t)instr) >> 16); + if ((int32_t)((instr >> 8) & 0xFF) >= sc) return 4; + if (jumpdest < 0 || jumpdest >= def->bytecode_length) return 5; + continue; + } + case DIT_SSS: + { + if (((int32_t)(instr >> 8) & 0xFF) >= sc || + ((int32_t)(instr >> 16) & 0xFF) >= sc || + ((int32_t)(instr >> 24) & 0xFF) >= sc) return 4; + continue; + } + case DIT_SD: + { + if ((int32_t)((instr >> 8) & 0xFF) >= sc) return 4; + if ((int32_t)(instr >> 16) >= def->defs_length) return 6; + continue; + } + case DIT_SC: + { + if ((int32_t)((instr >> 8) & 0xFF) >= sc) return 4; + if ((int32_t)(instr >> 16) >= def->constants_length) return 7; + continue; + } + case DIT_SES: + { + /* How can we check the last slot index? We need info parent funcdefs. Resort + * to runtime checks for now. Maybe invalid upvalue references could be defaulted + * to nil? (don't commit to this in the long term, though) */ + if ((int32_t)((instr >> 8) & 0xFF) >= sc) return 4; + if ((int32_t)((instr >> 16) & 0xFF) >= def->environments_length) return 8; + continue; + } + } + } + + /* Verify last instruction is either a jump, return, return-nil, or tailcall. Eventually, + * some real flow analysis would be ideal, but this should be very effective. Will completely + * prevent running over the end of bytecode. However, valid functions with dead code will + * be rejected. */ + { + uint32_t lastop = def->bytecode[def->bytecode_length - 1] & 0xFF; + switch (lastop) { + default: + return 9; + case DOP_RETURN: + case DOP_RETURN_NIL: + case DOP_JUMP: + case DOP_ERROR: + case DOP_TAILCALL: + break; + } + } + + /* Verify sub funcdefs by pushing next node to stack */ + if (def->defs_length) push(def, 0); + + return 0; +} + +/* Verify */ +int32_t dst_verify(DstFuncDef *def) { + int32_t status; + stackcount = 0; + status = dst_verify_one(def); + while (!status && stackcount) { + Node n = stack[--stackcount]; + if (n.index < n.def->defs_length) { + status = dst_verify_one(n.def->defs[n.index]); + push(n.def, n.index + 1); + } + } + return status; +} diff --git a/src/core/stl.c b/src/core/stl.c index 2d4441a3..07005aa2 100644 --- a/src/core/stl.c +++ b/src/core/stl.c @@ -283,7 +283,7 @@ DstTable *dst_stl_env() { /* Allow references to the environment */ dst_env_def(env, "_env", ret); - /*Load auxiliary envs */ + /* Load auxiliary envs */ { DstArgs args; args.n = 1; @@ -293,7 +293,6 @@ DstTable *dst_stl_env() { dst_lib_math(args); dst_lib_array(args); dst_lib_buffer(args); - dst_lib_parse(args); } return env; diff --git a/src/include/dst/dst.h b/src/include/dst/dst.h index 096b12f9..44b7368b 100644 --- a/src/include/dst/dst.h +++ b/src/include/dst/dst.h @@ -164,6 +164,7 @@ int dst_gcunrootall(Dst root); /* Functions */ DstFuncDef *dst_funcdef_alloc(); DstFunction *dst_function(DstFuncDef *def, DstFunction *parent); +int dst_verify(DstFuncDef *def); /* Misc */ int dst_equals(Dst x, Dst y); diff --git a/src/include/dst/dstopcodes.h b/src/include/dst/dstopcodes.h index 298af8a5..20e4414f 100644 --- a/src/include/dst/dstopcodes.h +++ b/src/include/dst/dstopcodes.h @@ -23,6 +23,38 @@ #ifndef DST_OPCODES_H_defined #define DST_OPCODES_H_defined +/* Bytecode op argument types */ +typedef enum DstOpArgType DstOpArgType; +enum DstOpArgType { + DST_OAT_SLOT, + DST_OAT_ENVIRONMENT, + DST_OAT_CONSTANT, + DST_OAT_INTEGER, + DST_OAT_TYPE, + DST_OAT_SIMPLETYPE, + DST_OAT_LABEL, + DST_OAT_FUNCDEF +}; + +/* Various types of instructions */ +typedef enum DstInstructionType DstInstructionType; +enum DstInstructionType { + DIT_0, /* No args */ + DIT_S, /* Slot(3) */ + DIT_L, /* Label(3) */ + DIT_SS, /* Slot(1), Slot(2) */ + DIT_SL, /* Slot(1), Label(2) */ + DIT_ST, /* Slot(1), Slot(2) */ + DIT_SI, /* Slot(1), Immediate(2) */ + DIT_SD, /* Slot(1), Closure(2) */ + DIT_SU, /* Slot(1), Unsigned Immediate(2) */ + DIT_SSS, /* Slot(1), Slot(1), Slot(1) */ + DIT_SSI, /* Slot(1), Slot(1), Immediate(1) */ + DIT_SSU, /* Slot(1), Slot(1), Unsigned Immediate(1) */ + DIT_SES, /* Slot(1), Environment(1), Far Slot(1) */ + DIT_SC /* Slot(1), Constant(2) */ +}; + typedef enum DstOpCode DstOpCode; enum DstOpCode { DOP_NOOP, @@ -84,7 +116,11 @@ enum DstOpCode { DOP_PUT, DOP_GET_INDEX, DOP_PUT_INDEX, - DOP_LENGTH + DOP_LENGTH, + DOP_INSTRUCTION_COUNT }; +/* Info about all instructions */ +extern DstInstructionType dst_instructions[DOP_INSTRUCTION_COUNT]; + #endif diff --git a/src/parser/parse.c b/src/parser/parse.c index 6ade38ff..2f117143 100644 --- a/src/parser/parse.c +++ b/src/parser/parse.c @@ -115,13 +115,11 @@ static int valid_utf8(const uint8_t *str, int32_t len) { else return 0; /* No overflow */ - if (nexti > len) - return 0; + if (nexti > len) return 0; /* Ensure trailing bytes are well formed (10XX XXXX) */ for (j = i + 1; j < nexti; j++) { - if ((str[j] >> 6) != 2) - return 0; + if ((str[j] >> 6) != 2) return 0; } /* Check for overlong encodings */ diff --git a/test/suite0.dst b/test/suite0.dst index c23a4958..1f6a6f4b 100644 --- a/test/suite0.dst +++ b/test/suite0.dst @@ -193,6 +193,15 @@ (assert (= (string "🐼" 🦊 🐮) "🐼foxcow") "emojis 🙉 :)") (assert (not= 🦊 :🦊) "utf8 strings are not symbols and vice versa") +# Symbols with @ symbol + +(def @ 1) +(assert (= @ 1) "@ symbol") +(def @@ 2) +(assert (= @@ 2) "@@ symbol") +(def @hey 3) +(assert (= @hey 3) "@hey symbol") + # Merge sort # Imperative merge sort merge