Add bytecode verification

This commit is contained in:
bakpakin 2018-01-20 17:19:47 -05:00
parent 065c4cfac1
commit d68eae9592
9 changed files with 312 additions and 48 deletions

View File

@ -49,6 +49,7 @@ set(CORE_SOURCES
src/core/abstract.c
src/core/array.c
src/core/buffer.c
src/core/bytecode.c
src/core/fiber.c
src/core/gc.c
src/core/io.c
@ -95,14 +96,13 @@ ${PARSER_SOURCES}
# Build the executable
add_executable(${TARGET_NAME} ${SOURCES})
target_link_libraries(${TARGET_NAME} m dl)
# TODO dont do this on windows
target_link_libraries(${TARGET_NAME} dl)
# Build some modules
add_library(dstvm MODULE ${CORE_SOURCES} ${ASSEMBLER_SOURCES})
target_link_libraries(dstvm m)
add_library(dstvm SHARED ${CORE_SOURCES} ${ASSEMBLER_SOURCES})
target_link_libraries(dstvm m dl)
#add_library(dstasm MODULE ${ASSEMBLER_SOURCES})
add_library(dstasm SHARED ${ASSEMBLER_SOURCES})
target_link_libraries(dstasm dstvm)
#add_library(dstparser MODULE ${PARSER_SOURCES})
#target_link_libraries(dstparser m)

View File

@ -27,20 +27,6 @@
#include <dst/dstopcodes.h>
#include <headerlibs/strbinsearch.h>
/* Bytecode op argument types */
typedef enum DstOpArgType DstOpArgType;
enum DstOpArgType {
DST_OAT_SLOT,
DST_OAT_ENVIRONMENT,
DST_OAT_CONSTANT,
DST_OAT_INTEGER,
DST_OAT_TYPE,
DST_OAT_SIMPLETYPE,
DST_OAT_LABEL,
DST_OAT_FUNCDEF
};
/* Convert a slot to to an integer for bytecode */
/* Types of instructions (some of them) */
@ -56,25 +42,6 @@ enum DstOpArgType {
* _sc - op.dest.CC.CC (load-constant, closure)
*/
/* Various types of instructions */
typedef enum DstInstructionType DstInstructionType;
enum DstInstructionType {
DIT_0, /* No args */
DIT_S, /* One slot */
DIT_L, /* One label */
DIT_SS, /* Two slots */
DIT_SL,
DIT_ST,
DIT_SI,
DIT_SD, /* Closures (D for funcDef) */
DIT_SU, /* Unsigned */
DIT_SSS,
DIT_SSI,
DIT_SSU,
DIT_SES,
DIT_SC
};
/* Definition for an instruction in the assembler */
typedef struct DstInstructionDef DstInstructionDef;
struct DstInstructionDef {
@ -704,10 +671,17 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, Dst source, int flags) {
}
}
/* Finish everything and return funcdef */
dst_asm_deinit(&a);
/* Set environments */
def->environments =
realloc(def->environments, def->environments_length * sizeof(int32_t));
/* Verify the func def */
if (dst_verify(def)) {
dst_asm_error(&a, "invalid assembly");
}
/* Finish everything and return funcdef */
dst_asm_deinit(&a);
result.funcdef = def;
result.status = DST_ASSEMBLE_OK;
return result;

View File

@ -889,7 +889,8 @@ DstCompileResult dst_compile(Dst source, DstTable *env, int flags) {
dstc_value(fopts, source);
if (c.result.status == DST_COMPILE_OK) {
c.result.funcdef = dstc_pop_funcdef(&c);
DstFuncDef *def = dstc_pop_funcdef(&c);
c.result.funcdef = def;
}
dstc_deinit(&c);

246
src/core/bytecode.c Normal file
View File

@ -0,0 +1,246 @@
/*
* Copyright (c) 2017 Calvin Rose
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <dst/dsttypes.h>
#include <dst/dstopcodes.h>
DstInstructionType dst_instructions[DOP_INSTRUCTION_COUNT] = {
DIT_0, /* DOP_NOOP, */
DIT_S, /* DOP_ERROR, */
DIT_ST, /* DOP_TYPECHECK, */
DIT_S, /* DOP_RETURN, */
DIT_0, /* DOP_RETURN_NIL, */
DIT_SSS, /* DOP_ADD_INTEGER, */
DIT_SSI, /* DOP_ADD_IMMEDIATE, */
DIT_SSS, /* DOP_ADD_REAL, */
DIT_SSS, /* DOP_ADD, */
DIT_SSS, /* DOP_SUBTRACT_INTEGER, */
DIT_SSS, /* DOP_SUBTRACT_REAL, */
DIT_SSS, /* DOP_SUBTRACT, */
DIT_SSS, /* DOP_MULTIPLY_INTEGER, */
DIT_SSI, /* DOP_MULTIPLY_IMMEDIATE, */
DIT_SSS, /* DOP_MULTIPLY_REAL, */
DIT_SSS, /* DOP_MULTIPLY, */
DIT_SSS, /* DOP_DIVIDE_INTEGER, */
DIT_SSI, /* DOP_DIVIDE_IMMEDIATE, */
DIT_SSS, /* DOP_DIVIDE_REAL, */
DIT_SSS, /* DOP_DIVIDE, */
DIT_SSS, /* DOP_BAND, */
DIT_SSS, /* DOP_BOR, */
DIT_SSS, /* DOP_BXOR, */
DIT_SS, /* DOP_BNOT, */
DIT_SSS, /* DOP_SHIFT_LEFT, */
DIT_SSI, /* DOP_SHIFT_LEFT_IMMEDIATE, */
DIT_SSS, /* DOP_SHIFT_RIGHT, */
DIT_SSI, /* DOP_SHIFT_RIGHT_IMMEDIATE, */
DIT_SSS, /* DOP_SHIFT_RIGHT_UNSIGNED, */
DIT_SSU, /* DOP_SHIFT_RIGHT_UNSIGNED_IMMEDIATE, */
DIT_SS, /* DOP_MOVE_FAR, */
DIT_SS, /* DOP_MOVE_NEAR, */
DIT_L, /* DOP_JUMP, */
DIT_SL, /* DOP_JUMP_IF, */
DIT_SL, /* DOP_JUMP_IF_NOT, */
DIT_SSS, /* DOP_GREATER_THAN, */
DIT_SSS, /* DOP_LESS_THAN, */
DIT_SSS, /* DOP_EQUALS, */
DIT_SSS, /* DOP_COMPARE, */
DIT_S, /* DOP_LOAD_NIL, */
DIT_S, /* DOP_LOAD_TRUE, */
DIT_S, /* DOP_LOAD_FALSE, */
DIT_SI, /* DOP_LOAD_INTEGER, */
DIT_SC, /* DOP_LOAD_CONSTANT, */
DIT_SES, /* DOP_LOAD_UPVALUE, */
DIT_S, /* DOP_LOAD_SELF, */
DIT_SES, /* DOP_SET_UPVALUE, */
DIT_SD, /* DOP_CLOSURE, */
DIT_S, /* DOP_PUSH, */
DIT_SS, /* DOP_PUSH_2, */
DIT_SSS, /* DOP_PUSH_3, */
DIT_S, /* DOP_PUSH_ARRAY, */
DIT_SS, /* DOP_CALL, */
DIT_S, /* DOP_TAILCALL, */
DIT_SSS, /* DOP_TRANSFER, */
DIT_SSS, /* DOP_GET, */
DIT_SSS, /* DOP_PUT, */
DIT_SSU, /* DOP_GET_INDEX, */
DIT_SSU, /* DOP_PUT_INDEX, */
DIT_SS /* DOP_LENGTH */
};
/* Hold state in stack during the breadth first traversal */
typedef struct Node Node;
struct Node {
DstFuncDef *def;
int32_t index;
};
/* An in memory stack of FuncDefs to verify */
/* Thread local */
static Node *stack = NULL;
static int32_t stackcap = 0;
static int32_t stackcount = 0;
/* Push a Node to the stack */
static void push(DstFuncDef *def, int32_t index) {
Node n;
n.def = def;
n.index = index;
if (stackcount >= stackcap) {
stackcap = 2 * stackcount + 2;
stack = realloc(stack, sizeof(Node) * stackcap);
if (!stack) {
DST_OUT_OF_MEMORY;
}
}
stack[stackcount++] = n;
}
/* Verify some bytecode */
static int32_t dst_verify_one(DstFuncDef *def) {
int vargs = def->flags & DST_FUNCDEF_FLAG_VARARG;
int32_t i;
int32_t maxslot = def->arity + vargs;
int32_t sc = def->slotcount;
if (def->bytecode_length == 0) return 1;
if (maxslot > sc) return 2;
/* Verify each instruction */
for (i = 0; i < def->bytecode_length; i++) {
uint32_t instr = def->bytecode[i];
/* Check for invalid instructions */
if ((instr & 0xFF) >= DOP_INSTRUCTION_COUNT) {
return 3;
}
DstInstructionType type = dst_instructions[instr & 0xFF];
switch (type) {
case DIT_0:
continue;
case DIT_S:
{
if ((int32_t)(instr >> 8) >= sc) return 4;
continue;
}
case DIT_SI:
case DIT_SU:
case DIT_ST:
{
if ((int32_t)((instr >> 8) & 0xFF) >= sc) return 4;
continue;
}
case DIT_L:
{
int32_t jumpdest = i + (((int32_t)instr) >> 8);
if (jumpdest < 0 || jumpdest >= def->bytecode_length) return 5;
continue;
}
case DIT_SS:
{
if ((int32_t)((instr >> 8) & 0xFF) >= sc ||
(int32_t)(instr >> 16) >= sc) return 4;
continue;
}
case DIT_SSI:
case DIT_SSU:
{
if ((int32_t)((instr >> 8) & 0xFF) >= sc ||
(int32_t)((instr >> 16) & 0xFF) >= sc) return 4;
continue;
}
case DIT_SL:
{
int32_t jumpdest = i + (((int32_t)instr) >> 16);
if ((int32_t)((instr >> 8) & 0xFF) >= sc) return 4;
if (jumpdest < 0 || jumpdest >= def->bytecode_length) return 5;
continue;
}
case DIT_SSS:
{
if (((int32_t)(instr >> 8) & 0xFF) >= sc ||
((int32_t)(instr >> 16) & 0xFF) >= sc ||
((int32_t)(instr >> 24) & 0xFF) >= sc) return 4;
continue;
}
case DIT_SD:
{
if ((int32_t)((instr >> 8) & 0xFF) >= sc) return 4;
if ((int32_t)(instr >> 16) >= def->defs_length) return 6;
continue;
}
case DIT_SC:
{
if ((int32_t)((instr >> 8) & 0xFF) >= sc) return 4;
if ((int32_t)(instr >> 16) >= def->constants_length) return 7;
continue;
}
case DIT_SES:
{
/* How can we check the last slot index? We need info parent funcdefs. Resort
* to runtime checks for now. Maybe invalid upvalue references could be defaulted
* to nil? (don't commit to this in the long term, though) */
if ((int32_t)((instr >> 8) & 0xFF) >= sc) return 4;
if ((int32_t)((instr >> 16) & 0xFF) >= def->environments_length) return 8;
continue;
}
}
}
/* Verify last instruction is either a jump, return, return-nil, or tailcall. Eventually,
* some real flow analysis would be ideal, but this should be very effective. Will completely
* prevent running over the end of bytecode. However, valid functions with dead code will
* be rejected. */
{
uint32_t lastop = def->bytecode[def->bytecode_length - 1] & 0xFF;
switch (lastop) {
default:
return 9;
case DOP_RETURN:
case DOP_RETURN_NIL:
case DOP_JUMP:
case DOP_ERROR:
case DOP_TAILCALL:
break;
}
}
/* Verify sub funcdefs by pushing next node to stack */
if (def->defs_length) push(def, 0);
return 0;
}
/* Verify */
int32_t dst_verify(DstFuncDef *def) {
int32_t status;
stackcount = 0;
status = dst_verify_one(def);
while (!status && stackcount) {
Node n = stack[--stackcount];
if (n.index < n.def->defs_length) {
status = dst_verify_one(n.def->defs[n.index]);
push(n.def, n.index + 1);
}
}
return status;
}

View File

@ -283,7 +283,7 @@ DstTable *dst_stl_env() {
/* Allow references to the environment */
dst_env_def(env, "_env", ret);
/*Load auxiliary envs */
/* Load auxiliary envs */
{
DstArgs args;
args.n = 1;
@ -293,7 +293,6 @@ DstTable *dst_stl_env() {
dst_lib_math(args);
dst_lib_array(args);
dst_lib_buffer(args);
dst_lib_parse(args);
}
return env;

View File

@ -164,6 +164,7 @@ int dst_gcunrootall(Dst root);
/* Functions */
DstFuncDef *dst_funcdef_alloc();
DstFunction *dst_function(DstFuncDef *def, DstFunction *parent);
int dst_verify(DstFuncDef *def);
/* Misc */
int dst_equals(Dst x, Dst y);

View File

@ -23,6 +23,38 @@
#ifndef DST_OPCODES_H_defined
#define DST_OPCODES_H_defined
/* Bytecode op argument types */
typedef enum DstOpArgType DstOpArgType;
enum DstOpArgType {
DST_OAT_SLOT,
DST_OAT_ENVIRONMENT,
DST_OAT_CONSTANT,
DST_OAT_INTEGER,
DST_OAT_TYPE,
DST_OAT_SIMPLETYPE,
DST_OAT_LABEL,
DST_OAT_FUNCDEF
};
/* Various types of instructions */
typedef enum DstInstructionType DstInstructionType;
enum DstInstructionType {
DIT_0, /* No args */
DIT_S, /* Slot(3) */
DIT_L, /* Label(3) */
DIT_SS, /* Slot(1), Slot(2) */
DIT_SL, /* Slot(1), Label(2) */
DIT_ST, /* Slot(1), Slot(2) */
DIT_SI, /* Slot(1), Immediate(2) */
DIT_SD, /* Slot(1), Closure(2) */
DIT_SU, /* Slot(1), Unsigned Immediate(2) */
DIT_SSS, /* Slot(1), Slot(1), Slot(1) */
DIT_SSI, /* Slot(1), Slot(1), Immediate(1) */
DIT_SSU, /* Slot(1), Slot(1), Unsigned Immediate(1) */
DIT_SES, /* Slot(1), Environment(1), Far Slot(1) */
DIT_SC /* Slot(1), Constant(2) */
};
typedef enum DstOpCode DstOpCode;
enum DstOpCode {
DOP_NOOP,
@ -84,7 +116,11 @@ enum DstOpCode {
DOP_PUT,
DOP_GET_INDEX,
DOP_PUT_INDEX,
DOP_LENGTH
DOP_LENGTH,
DOP_INSTRUCTION_COUNT
};
/* Info about all instructions */
extern DstInstructionType dst_instructions[DOP_INSTRUCTION_COUNT];
#endif

View File

@ -115,13 +115,11 @@ static int valid_utf8(const uint8_t *str, int32_t len) {
else return 0;
/* No overflow */
if (nexti > len)
return 0;
if (nexti > len) return 0;
/* Ensure trailing bytes are well formed (10XX XXXX) */
for (j = i + 1; j < nexti; j++) {
if ((str[j] >> 6) != 2)
return 0;
if ((str[j] >> 6) != 2) return 0;
}
/* Check for overlong encodings */

View File

@ -193,6 +193,15 @@
(assert (= (string "🐼" 🦊 🐮) "🐼foxcow") "emojis 🙉 :)")
(assert (not= 🦊 :🦊) "utf8 strings are not symbols and vice versa")
# Symbols with @ symbol
(def @ 1)
(assert (= @ 1) "@ symbol")
(def @@ 2)
(assert (= @@ 2) "@@ symbol")
(def @hey 3)
(assert (= @hey 3) "@hey symbol")
# Merge sort
# Imperative merge sort merge