diff --git a/Makefile b/Makefile index dbe0c538..2368be0c 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ PREFIX=/usr/local DST_TARGET=dst DST_XXD=xxd DEBUGGER=lldb -DST_INTERNAL_HEADERS=$(addprefix core/,symcache.h opcodes.h) +DST_INTERNAL_HEADERS=$(addprefix core/,symcache.h opcodes.h strtod.h) DST_HEADERS=$(addprefix include/dst/,dst.h) ############################# @@ -59,7 +59,7 @@ $(DST_XXD): libs/xxd.c ################################### DST_CORE_SOURCES=$(addprefix core/,\ - array.c asm.c buffer.c fiber.c func.c gc.c parse.c string.c\ + array.c asm.c buffer.c fiber.c func.c gc.c parse.c string.c strtod.c\ struct.c symcache.c syscalls.c table.c tuple.c userdata.c util.c\ value.c vm.c wrap.c) DST_CORE_OBJECTS=$(patsubst %.c,%.o,$(DST_CORE_SOURCES)) @@ -79,7 +79,7 @@ CCU_FLAGS = $(CFLAGS) -DDST_UNIT_TEST DST_UNIT_BINARIES=$(addprefix unittests/,\ asm_test.out array_test.out buffer_test.out fiber_test.out \ - parse_test.out table_test.out) + parse_test.out strtod_test.out table_test.out) %.out: %.c $(DST_CORE_OBJECTS) $(DST_ALL_HEADERS) unittests/unit.h $(CC) $(CCU_FLAGS) $(DST_CORE_OBJECTS) $< -o $@ @@ -90,6 +90,7 @@ unit: $(DST_UNIT_BINARIES) unittests/buffer_test.out unittests/fiber_test.out unittests/parse_test.out + unittests/strtod_test.out unittests/table_test.out ################### diff --git a/core/asm.c b/core/asm.c index 92e38490..131b5944 100644 --- a/core/asm.c +++ b/core/asm.c @@ -92,6 +92,7 @@ struct DstAssembler { DstFuncDef *def; jmp_buf on_error; const uint8_t *errmessage; + const DstValue *errmap; int32_t environments_capacity; int32_t bytecode_count; /* Used for calculating labels */ @@ -229,21 +230,27 @@ static void dst_asm_deinit(DstAssembler *a) { } /* Throw some kind of assembly error */ -static void dst_asm_error(DstAssembler *a, const char *message) { +static void dst_asm_error(DstAssembler *a, const DstValue *map, const char *message) { a->errmessage = dst_cstring(message); + a->errmap = map; longjmp(a->on_error, 1); } -#define dst_asm_assert(a, c, m) do { if (!(c)) dst_asm_error((a), (m)); } while (0) +#define dst_asm_assert(a, c, map, m) do { if (!(c)) dst_asm_error((a), (map), (m)); } while (0) /* Throw some kind of assembly error */ -static void dst_asm_errorv(DstAssembler *a, const uint8_t *m) { +static void dst_asm_errorv(DstAssembler *a, const DstValue *map, const uint8_t *m) { a->errmessage = m; + a->errmap = map; longjmp(a->on_error, 1); } /* Parse an argument to an assembly instruction, and return the result as an * integer. This integer will need to be trimmed and bound checked. */ -static int32_t doarg_1(DstAssembler *a, DstOpArgType argtype, DstValue x) { +static int32_t doarg_1( + DstAssembler *a, + const DstValue *map, + DstOpArgType argtype, + DstValue x) { int32_t ret = -1; DstTable *c; switch (argtype) { @@ -281,7 +288,7 @@ static int32_t doarg_1(DstAssembler *a, DstOpArgType argtype, DstValue x) { int32_t i = 0; ret = 0; for (i = 0; i < dst_tuple_length(t); i++) { - ret |= doarg_1(a, DST_OAT_SIMPLETYPE, t[i]); + ret |= doarg_1(a, map, DST_OAT_SIMPLETYPE, t[i]); } } else { goto error; @@ -299,14 +306,14 @@ static int32_t doarg_1(DstAssembler *a, DstOpArgType argtype, DstValue x) { ret = dst_unwrap_integer(result); } } else { - dst_asm_errorv(a, dst_formatc("unknown name %q", x)); + dst_asm_errorv(a, map, dst_formatc("unknown name %q", x)); } } else if (argtype == DST_OAT_TYPE || argtype == DST_OAT_SIMPLETYPE) { int32_t index = strsearch(dst_unwrap_symbol(x), dst_type_names); if (index != -1) { ret = index; } else { - dst_asm_errorv(a, dst_formatc("unknown type %q", x)); + dst_asm_errorv(a, map, dst_formatc("unknown type %q", x)); } } else { goto error; @@ -319,7 +326,7 @@ static int32_t doarg_1(DstAssembler *a, DstOpArgType argtype, DstValue x) { return ret; error: - dst_asm_errorv(a, dst_formatc("error parsing instruction argument %v", x)); + dst_asm_errorv(a, map, dst_formatc("error parsing instruction argument %v", x)); return 0; } @@ -327,99 +334,104 @@ static int32_t doarg_1(DstAssembler *a, DstOpArgType argtype, DstValue x) { * try to convert arguments to bit patterns */ static uint32_t doarg( DstAssembler *a, + const DstValue *map, DstOpArgType argtype, int nth, int nbytes, int hassign, DstValue x) { - int32_t arg = doarg_1(a, argtype, x); + int32_t arg = doarg_1(a, map, argtype, x); /* Calculate the min and max values that can be stored given * nbytes, and whether or not the storage is signed */ int32_t min = (-hassign) << ((nbytes << 3) - 1); int32_t max = ~((-1) << ((nbytes << 3) - hassign)); if (arg < min) - dst_asm_errorv(a, dst_formatc("instruction argument %v is too small, must be %d byte%s", + dst_asm_errorv(a, map, dst_formatc("instruction argument %v is too small, must be %d byte%s", x, nbytes, nbytes > 1 ? "s" : "")); if (arg > max) - dst_asm_errorv(a, dst_formatc("instruction argument %v is too large, must be %d byte%s", + dst_asm_errorv(a, map, dst_formatc("instruction argument %v is too large, must be %d byte%s", x, nbytes, nbytes > 1 ? "s" : "")); return ((uint32_t) arg) << (nth << 3); } /* Provide parsing methods for the different kinds of arguments */ -static uint32_t read_instruction(DstAssembler *a, const DstInstructionDef *idef, const DstValue *argt) { +static uint32_t read_instruction( + DstAssembler *a, + const DstValue *map, + const DstInstructionDef *idef, + const DstValue *argt) { uint32_t instr = idef->opcode; switch (idef->type) { case DIT_0: { if (dst_tuple_length(argt) != 1) - dst_asm_error(a, "expected 0 arguments: (op)"); + dst_asm_error(a, map, "expected 0 arguments: (op)"); break; } case DIT_S: { if (dst_tuple_length(argt) != 2) - dst_asm_error(a, "expected 1 argument: (op, slot)"); - instr |= doarg(a, DST_OAT_SLOT, 1, 3, 0, argt[1]); + dst_asm_error(a, map, "expected 1 argument: (op, slot)"); + instr |= doarg(a, dst_parse_submap_index(map, 1), DST_OAT_SLOT, 1, 3, 0, argt[1]); break; } case DIT_L: { if (dst_tuple_length(argt) != 2) - dst_asm_error(a, "expected 1 argument: (op, label)"); - instr |= doarg(a, DST_OAT_LABEL, 1, 3, 1, argt[1]); + dst_asm_error(a, map, "expected 1 argument: (op, label)"); + instr |= doarg(a, dst_parse_submap_index(map, 1), DST_OAT_LABEL, 1, 3, 1, argt[1]); break; } case DIT_SS: { if (dst_tuple_length(argt) != 3) - dst_asm_error(a, "expected 2 arguments: (op, slot, slot)"); - instr |= doarg(a, DST_OAT_SLOT, 1, 1, 0, argt[1]); - instr |= doarg(a, DST_OAT_SLOT, 2, 2, 0, argt[2]); + dst_asm_error(a, map, "expected 2 arguments: (op, slot, slot)"); + instr |= doarg(a, dst_parse_submap_index(map, 1), DST_OAT_SLOT, 1, 1, 0, argt[1]); + instr |= doarg(a, dst_parse_submap_index(map, 2), DST_OAT_SLOT, 2, 2, 0, argt[2]); break; } case DIT_SL: { if (dst_tuple_length(argt) != 3) - dst_asm_error(a, "expected 2 arguments: (op, slot, label)"); - instr |= doarg(a, DST_OAT_SLOT, 1, 1, 0, argt[1]); - instr |= doarg(a, DST_OAT_LABEL, 2, 2, 1, argt[2]); + dst_asm_error(a, map, "expected 2 arguments: (op, slot, label)"); + instr |= doarg(a, dst_parse_submap_index(map, 1), DST_OAT_SLOT, 1, 1, 0, argt[1]); + instr |= doarg(a, dst_parse_submap_index(map, 2), DST_OAT_LABEL, 2, 2, 1, argt[2]); break; } case DIT_ST: { if (dst_tuple_length(argt) != 3) - dst_asm_error(a, "expected 2 arguments: (op, slot, type)"); - instr |= doarg(a, DST_OAT_SLOT, 1, 1, 0, argt[1]); - instr |= doarg(a, DST_OAT_TYPE, 2, 2, 0, argt[2]); + dst_asm_error(a, map, "expected 2 arguments: (op, slot, type)"); + instr |= doarg(a, dst_parse_submap_index(map, 1), DST_OAT_SLOT, 1, 1, 0, argt[1]); + instr |= doarg(a, dst_parse_submap_index(map, 2), DST_OAT_TYPE, 2, 2, 0, argt[2]); break; } case DIT_SI: case DIT_SU: { if (dst_tuple_length(argt) != 3) - dst_asm_error(a, "expected 2 arguments: (op, slot, integer)"); - instr |= doarg(a, DST_OAT_SLOT, 1, 1, 0, argt[1]); - instr |= doarg(a, DST_OAT_INTEGER, 2, 2, idef->type == DIT_SI, argt[2]); + dst_asm_error(a, map, "expected 2 arguments: (op, slot, integer)"); + instr |= doarg(a, dst_parse_submap_index(map, 1), DST_OAT_SLOT, 1, 1, 0, argt[1]); + instr |= doarg(a, dst_parse_submap_index(map, 2), DST_OAT_INTEGER, 2, 2, idef->type == DIT_SI, argt[2]); break; } case DIT_SSS: { if (dst_tuple_length(argt) != 4) - dst_asm_error(a, "expected 3 arguments: (op, slot, slot, slot)"); - instr |= doarg(a, DST_OAT_SLOT, 1, 1, 0, argt[1]); - instr |= doarg(a, DST_OAT_SLOT, 2, 1, 0, argt[2]); - instr |= doarg(a, DST_OAT_SLOT, 3, 1, 0, argt[3]); + dst_asm_error(a, map, "expected 3 arguments: (op, slot, slot, slot)"); + instr |= doarg(a, dst_parse_submap_index(map, 1), DST_OAT_SLOT, 1, 1, 0, argt[1]); + instr |= doarg(a, dst_parse_submap_index(map, 2), DST_OAT_SLOT, 2, 1, 0, argt[2]); + instr |= doarg(a, dst_parse_submap_index(map, 3), DST_OAT_SLOT, 3, 1, 0, argt[3]); break; } case DIT_SSI: case DIT_SSU: { if (dst_tuple_length(argt) != 4) - dst_asm_error(a, "expected 3 arguments: (op, slot, slot, integer)"); - instr |= doarg(a, DST_OAT_SLOT, 1, 1, 0, argt[1]); - instr |= doarg(a, DST_OAT_SLOT, 2, 1, 0, argt[2]); - instr |= doarg(a, DST_OAT_INTEGER, 3, 1, idef->type == DIT_SSI, argt[3]); + dst_asm_error(a, map, "expected 3 arguments: (op, slot, slot, integer)"); + instr |= doarg(a, dst_parse_submap_index(map, 1), DST_OAT_SLOT, 1, 1, 0, argt[1]); + instr |= doarg(a, dst_parse_submap_index(map, 2), DST_OAT_SLOT, 2, 1, 0, argt[2]); + instr |= doarg(a, dst_parse_submap_index(map, 3), DST_OAT_INTEGER, 3, 1, idef->type == DIT_SSI, argt[3]); break; } case DIT_SES: @@ -427,24 +439,24 @@ static uint32_t read_instruction(DstAssembler *a, const DstInstructionDef *idef, DstAssembler *b = a; uint32_t env; if (dst_tuple_length(argt) != 4) - dst_asm_error(a, "expected 3 arguments: (op, slot, environment, envslot)"); - instr |= doarg(a, DST_OAT_SLOT, 1, 1, 0, argt[1]); - env = doarg(a, DST_OAT_ENVIRONMENT, 0, 1, 0, argt[2]); + dst_asm_error(a, map, "expected 3 arguments: (op, slot, environment, envslot)"); + instr |= doarg(a, dst_parse_submap_index(map, 1), DST_OAT_SLOT, 1, 1, 0, argt[1]); + env = doarg(a, dst_parse_submap_index(map, 2), DST_OAT_ENVIRONMENT, 0, 1, 0, argt[2]); instr |= env << 16; for (env += 1; env > 0; env--) { b = b->parent; if (NULL == b) - dst_asm_error(a, "invalid environment index"); + dst_asm_error(a, dst_parse_submap_index(map, 2), "invalid environment index"); } - instr |= doarg(b, DST_OAT_SLOT, 3, 1, 0, argt[3]); + instr |= doarg(b, dst_parse_submap_index(map, 3), DST_OAT_SLOT, 3, 1, 0, argt[3]); break; } case DIT_SC: { if (dst_tuple_length(argt) != 3) - dst_asm_error(a, "expected 2 arguments: (op, slot, constant)"); - instr |= doarg(a, DST_OAT_SLOT, 1, 1, 0, argt[1]); - instr |= doarg(a, DST_OAT_CONSTANT, 2, 2, 0, argt[2]); + dst_asm_error(a, map, "expected 2 arguments: (op, slot, constant)"); + instr |= doarg(a, dst_parse_submap_index(map, 1), DST_OAT_SLOT, 1, 1, 0, argt[1]); + instr |= doarg(a, dst_parse_submap_index(map, 2), DST_OAT_CONSTANT, 2, 2, 0, argt[2]); break; } } @@ -513,23 +525,33 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) a.errmessage = NULL; a.environments_capacity = 0; a.bytecode_count = 0; + a.errmap = NULL; dst_table_init(&a.labels, 10); dst_table_init(&a.constants, 10); dst_table_init(&a.slots, 10); dst_table_init(&a.envs, 10); + /* Initialize result */ + result.error_start = -1; + result.error_end = -1; + /* Set error jump */ if (setjmp(a.on_error)) { - dst_asm_deinit(&a); if (NULL != a.parent) { + dst_asm_deinit(&a); longjmp(a.parent->on_error, 1); } result.result.error = a.errmessage; result.status = DST_ASSEMBLE_ERROR; + if (a.errmap != NULL) { + result.error_start = dst_unwrap_integer(a.errmap[0]); + result.error_end = dst_unwrap_integer(a.errmap[1]); + } + dst_asm_deinit(&a); return result; } - dst_asm_assert(&a, dst_checktype(opts.source, DST_STRUCT), "expected struct for assembly source"); + dst_asm_assert(&a, dst_checktype(opts.source, DST_STRUCT), opts.sourcemap, "expected struct for assembly source"); /* Set function arity */ x = dst_struct_get(st, dst_csymbolv("arity")); @@ -538,20 +560,24 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) /* Create slot aliases */ x = dst_struct_get(st, dst_csymbolv("slots")); if (dst_seq_view(x, &arr, &count)) { + const DstValue *slotmap = + dst_parse_submap_value(opts.sourcemap, dst_csymbolv("slots")); for (i = 0; i < count; i++) { + const DstValue *imap = dst_parse_submap_index(slotmap, i); DstValue v = arr[i]; if (dst_checktype(v, DST_TUPLE)) { const DstValue *t = dst_unwrap_tuple(v); int32_t j; for (j = 0; j < dst_tuple_length(t); j++) { + const DstValue *tjmap = dst_parse_submap_index(imap, j); if (!dst_checktype(t[j], DST_SYMBOL)) - dst_asm_error(&a, "slot names must be symbols"); + dst_asm_error(&a, tjmap, "slot names must be symbols"); dst_table_put(&a.slots, t[j], dst_wrap_integer(i)); } } else if (dst_checktype(v, DST_SYMBOL)) { dst_table_put(&a.slots, v, dst_wrap_integer(i)); } else { - dst_asm_error(&a, "slot names must be symbols or tuple of symbols"); + dst_asm_error(&a, imap, "slot names must be symbols or tuple of symbols"); } } } @@ -559,10 +585,13 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) /* Create environment aliases */ x = dst_struct_get(st, dst_csymbolv("environments")); if (dst_seq_view(x, &arr, &count)) { + const DstValue *emap = + dst_parse_submap_value(opts.sourcemap, dst_csymbolv("environments")); for (i = 0; i < count; i++) { - dst_asm_assert(&a, dst_checktype(arr[i], DST_SYMBOL), "environment must be a symbol"); + const DstValue *imap = dst_parse_submap_index(emap, i); + dst_asm_assert(&a, dst_checktype(arr[i], DST_SYMBOL), imap, "environment must be a symbol"); if (dst_asm_addenv(&a, arr[i]) < 0) { - dst_asm_error(&a, "environment not found"); + dst_asm_error(&a, imap, "environment not found"); } } } @@ -570,12 +599,15 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) /* Parse constants */ x = dst_struct_get(st, dst_csymbolv("constants")); if (dst_seq_view(x, &arr, &count)) { + const DstValue *cmap = + dst_parse_submap_value(opts.sourcemap, dst_csymbolv("constants")); def->constants_length = count; def->constants = malloc(sizeof(DstValue) * count); if (NULL == def->constants) { DST_OUT_OF_MEMORY; } for (i = 0; i < count; i++) { + const DstValue *imap = dst_parse_submap_index(cmap, i); DstValue ct = arr[i]; if (dst_checktype(ct, DST_TUPLE) && dst_tuple_length(dst_unwrap_tuple(ct)) > 1 && @@ -591,7 +623,7 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) def->constants[i] = t[2]; dst_table_put(&a.constants, t[1], dst_wrap_integer(i)); } else { - dst_asm_errorv(&a, dst_formatc("could not parse constant \"%v\"", ct)); + dst_asm_errorv(&a, imap, dst_formatc("could not parse constant \"%v\"", ct)); } /* Todo - parse nested funcdefs */ } else { @@ -606,16 +638,19 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) /* Parse bytecode and labels */ x = dst_struct_get(st, dst_csymbolv("bytecode")); if (dst_seq_view(x, &arr, &count)) { + const DstValue *bmap = + dst_parse_submap_value(opts.sourcemap, dst_csymbolv("bytecode")); /* Do labels and find length */ int32_t blength = 0; for (i = 0; i < count; ++i) { + const DstValue *imap = dst_parse_submap_index(bmap, i); DstValue instr = arr[i]; if (dst_checktype(instr, DST_SYMBOL)) { dst_table_put(&a.labels, instr, dst_wrap_integer(blength)); } else if (dst_checktype(instr, DST_TUPLE)) { blength++; } else { - dst_asm_error(&a, "expected assembly instruction"); + dst_asm_error(&a, imap, "expected assembly instruction"); } } /* Allocate bytecode array */ @@ -626,6 +661,7 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) } /* Do bytecode */ for (i = 0; i < count; ++i) { + const DstValue *imap = dst_parse_submap_index(bmap, i); DstValue instr = arr[i]; if (dst_checktype(instr, DST_SYMBOL)) { continue; @@ -633,23 +669,23 @@ static DstAssembleResult dst_asm1(DstAssembler *parent, DstAssembleOptions opts) uint32_t op; const DstInstructionDef *idef; const DstValue *t; - dst_asm_assert(&a, dst_checktype(instr, DST_TUPLE), "expected tuple"); + dst_asm_assert(&a, dst_checktype(instr, DST_TUPLE), imap, "expected tuple"); t = dst_unwrap_tuple(instr); if (dst_tuple_length(t) == 0) { op = 0; } else { - dst_asm_assert(&a, dst_checktype(t[0], DST_SYMBOL), + dst_asm_assert(&a, dst_checktype(t[0], DST_SYMBOL), imap, "expected symbol in assembly instruction"); idef = dst_findi(dst_unwrap_symbol(t[0])); if (NULL == idef) - dst_asm_errorv(&a, dst_formatc("unknown instruction %v", instr)); - op = read_instruction(&a, idef, t); + dst_asm_errorv(&a, imap, dst_formatc("unknown instruction %v", instr)); + op = read_instruction(&a, imap, idef, t); } def->bytecode[a.bytecode_count++] = op; } } } else { - dst_asm_error(&a, "bytecode expected"); + dst_asm_error(&a, opts.sourcemap, "bytecode expected"); } /* Finish everything and return funcdef */ diff --git a/core/compile.c b/core/compile.c index f06e29e3..49db206b 100644 --- a/core/compile.c +++ b/core/compile.c @@ -29,12 +29,42 @@ typedef struct DstCompiler DstCompiler; typedef struct FormOptions FormOptions; typedef struct SlotTracker SlotTracker; typedef struct DstScope DstScope; +typedef struct DstCFunctionOptimizer DstCFunctionOptimizer; + +#define DST_SLOT_CONSTANT 1 +#define DST_SLOT_TEMP 2 +#define DST_SLOT_RETURNED 4 +#define DST_SLOT_NIL 8 /* A stack slot */ struct DstSlot { int32_t index; uint32_t flags; uint32_t types; /* bit set of possible primitive types */ + DstValue constant; /* If the slot has a constant value */ +} + +/* Most forms that return a constant will not generate any bytecode */ + +/* Special forms that need support */ +/* cond + * while (continue, break) + * quote + * fn + * def + * var + * do + */ + +#define DST_OPTIMIZER_CONSTANTS 1 +#define DST_OPTIMIZER_BYTECODE 2 +#define DST_OPTIMIZER_PARTIAL_CONSTANTS 4 + +/* A grouping of optimization on a cfunction given certain conditions + * on the arguments (such as all constants, or some known types). The appropriate + * optimizations should be tried before compiling a normal function call. */ +struct DstCFunctionOptimizer { + uint32_t flags; /* Indicate what kind of optimizations can be performed */ } /* A lexical scope during compilation */ @@ -61,6 +91,6 @@ struct DstFormOptions { DstCompiler *compiler; DstValue x; uint32_t flags; - uint32_t types; /* bit set of accepeted primitive types */ + uint32_t types; /* bit set of accepted primitive types */ int32_t target_slot; }; diff --git a/core/parse.c b/core/parse.c index 6ad520fd..ec8298aa 100644 --- a/core/parse.c +++ b/core/parse.c @@ -21,89 +21,7 @@ */ #include - -/* Get an integer power of 10 */ -static double exp10(int power) { - if (power == 0) return 1; - if (power > 0) { - double result = 10; - int currentPower = 1; - while (currentPower * 2 <= power) { - result = result * result; - currentPower *= 2; - } - return result * exp10(power - currentPower); - } else { - return 1 / exp10(-power); - } -} - -/* Read an integer */ -static int read_integer(const uint8_t *string, const uint8_t *end, int64_t *ret) { - int sign = 1, x = 0; - int64_t accum = 0; - if (*string == '-') { - sign = -1; - ++string; - } else if (*string == '+') { - ++string; - } - if (string >= end) return 0; - while (string < end) { - x = *string; - if (x < '0' || x > '9') return 0; - x -= '0'; - accum = accum * 10 + x; - ++string; - } - *ret = accum * sign; - return 1; -} - -/* Read a real from a string. Returns if successfuly - * parsed a real from the enitre input string. - * If returned 1, output is int ret. - * TODO - consider algorithm that does not lose precision. */ -static int read_real(const uint8_t *string, const uint8_t *end, double *ret, int forceInt) { - int sign = 1, x = 0; - double accum = 0, exp = 1, place = 1; - /* Check the sign */ - if (*string == '-') { - sign = -1; - ++string; - } else if (*string == '+') { - ++string; - } - if (string >= end) return 0; - while (string < end) { - if (*string == '.' && !forceInt) { - place = 0.1; - } else if (!forceInt && (*string == 'e' || *string == 'E')) { - /* Read the exponent */ - ++string; - if (string >= end) return 0; - if (!read_real(string, end, &exp, 1)) - return 0; - exp = exp10(exp); - break; - } else { - x = *string; - if (x < '0' || x > '9') return 0; - x -= '0'; - if (place < 1) { - accum += x * place; - place *= 0.1; - } else { - accum *= 10; - accum += x; - } - } - ++string; - } - *ret = accum * sign * exp; - return 1; -} - +#include "strtod.h" /* Checks if a string slice is equal to a string constant */ static int check_str_const(const char *ref, const uint8_t *start, const uint8_t *end) { @@ -236,17 +154,14 @@ static const uint8_t *parse_recur( default: atom: { - double real; - int64_t integer; + DstValue numcheck; const uint8_t *tokenend = src; if (!is_symbol_char(*src)) goto unexpected_character; while (tokenend < end && is_symbol_char(*tokenend)) tokenend++; - if (tokenend >= end) goto unexpected_eos; - if (read_integer(src, tokenend, &integer)) { - ret = dst_wrap_integer(integer); - } else if (read_real(src, tokenend, &real, 0)) { - ret = dst_wrap_real(real); + numcheck = dst_scan_number(src, tokenend - src); + if (!dst_checktype(numcheck, DST_NIL)) { + ret = numcheck; } else if (check_str_const("nil", src, tokenend)) { ret = dst_wrap_nil(); } else if (check_str_const("false", src, tokenend)) { @@ -509,10 +424,10 @@ DstParseResult dst_parse(const uint8_t *src, int32_t len) { if (args.errmsg) { res.result.error = dst_cstring(args.errmsg); - res.map = dst_wrap_nil(); + res.map = NULL; } else { res.result.value = dst_array_pop(&args.stack); - res.map = dst_array_pop(&args.mapstack); + res.map = dst_unwrap_tuple(dst_array_pop(&args.mapstack)); } dst_array_deinit(&args.stack); @@ -527,3 +442,56 @@ DstParseResult dst_parsec(const char *src) { while (src[len]) ++len; return dst_parse((const uint8_t *)src, len); } + +/* Get the sub source map by indexing a value. Used to traverse + * into arrays and tuples */ +const DstValue *dst_parse_submap_index(const DstValue *map, int32_t index) { + if (NULL != map && dst_tuple_length(map) >= 3) { + const DstValue *seq; + int32_t len; + if (dst_seq_view(map[2], &seq, &len)) { + if (index >= 0 && index < len) { + if (dst_checktype(seq[index], DST_TUPLE)) { + const DstValue *ret = dst_unwrap_tuple(seq[index]); + if (dst_tuple_length(ret) >= 2 && + dst_checktype(ret[0], DST_INTEGER) && + dst_checktype(ret[1], DST_INTEGER)) { + return ret; + } + } + } + } + } + return NULL; +} + +/* Traverse into tables and structs */ +static const DstValue *dst_parse_submap_kv(const DstValue *map, DstValue key, int kv) { + if (NULL != map && dst_tuple_length(map) >= 3) { + DstValue kvpair = dst_get(map[2], key); + if (dst_checktype(kvpair, DST_TUPLE)) { + const DstValue *kvtup = dst_unwrap_tuple(kvpair); + if (dst_tuple_length(kvtup) >= 2) { + if (dst_checktype(kvtup[kv], DST_TUPLE)) { + const DstValue *ret = dst_unwrap_tuple(kvtup[kv]); + if (dst_tuple_length(ret) >= 2 && + dst_checktype(ret[0], DST_INTEGER) && + dst_checktype(ret[1], DST_INTEGER)) { + return ret; + } + } + } + } + } + return NULL; +} + +/* Traverse into a key of a table or struct */ +const DstValue *dst_parse_submap_key(const DstValue *map, DstValue key) { + return dst_parse_submap_kv(map, key, 0); +} + +/* Traverse into a value of a table or struct */ +const DstValue *dst_parse_submap_value(const DstValue *map, DstValue key) { + return dst_parse_submap_kv(map, key, 1); +} diff --git a/core/string.c b/core/string.c index 55416605..bce9294a 100644 --- a/core/string.c +++ b/core/string.c @@ -21,6 +21,7 @@ */ #include +#include "strtod.h" /* Begin building a string */ uint8_t *dst_string_begin(int32_t length) { @@ -102,7 +103,7 @@ const uint8_t *dst_cstring(const char *str) { #define DST_BUFSIZE 36 static int32_t real_to_string_impl(uint8_t *buf, double x) { - int count = snprintf((char *) buf, DST_BUFSIZE, "%.21g", x); + int count = snprintf((char *) buf, DST_BUFSIZE, "%.17g", x); return (int32_t) count; } @@ -323,7 +324,7 @@ void dst_short_description_b(DstBuffer *buffer, DstValue x) { /* Helper structure for stringifying nested structures */ typedef struct DstPrinter DstPrinter; struct DstPrinter { - DstBuffer buffer; + DstBuffer *buffer; DstTable seen; uint32_t flags; uint32_t state; @@ -344,7 +345,7 @@ static void dst_print_indent(DstPrinter *p) { int32_t i, len; len = p->indent_size * p->indent; for (i = 0; i < len; i++) { - dst_buffer_push_u8(&p->buffer, ' '); + dst_buffer_push_u8(p->buffer, ' '); } } @@ -400,15 +401,15 @@ static void dst_print_hashtable_inner(DstPrinter *p, const DstValue *data, int32 } } if (doindent) { - dst_buffer_push_u8(&p->buffer, '\n'); + dst_buffer_push_u8(p->buffer, '\n'); p->indent++; for (i = 0; i < cap; i += 2) { if (!dst_checktype(data[i], DST_NIL)) { dst_print_indent(p); dst_description_helper(p, data[i]); - dst_buffer_push_u8(&p->buffer, ' '); + dst_buffer_push_u8(p->buffer, ' '); dst_description_helper(p, data[i + 1]); - dst_buffer_push_u8(&p->buffer, '\n'); + dst_buffer_push_u8(p->buffer, '\n'); } } p->indent--; @@ -420,9 +421,9 @@ static void dst_print_hashtable_inner(DstPrinter *p, const DstValue *data, int32 if (isfirst) isfirst = 0; else - dst_buffer_push_u8(&p->buffer, ' '); + dst_buffer_push_u8(p->buffer, ' '); dst_description_helper(p, data[i]); - dst_buffer_push_u8(&p->buffer, ' '); + dst_buffer_push_u8(p->buffer, ' '); dst_description_helper(p, data[i + 1]); } } @@ -446,12 +447,12 @@ static void dst_print_seq_inner(DstPrinter *p, const DstValue *data, int32_t len } } if (doindent) { - dst_buffer_push_u8(&p->buffer, '\n'); + dst_buffer_push_u8(p->buffer, '\n'); p->indent++; for (i = 0; i < len; ++i) { dst_print_indent(p); dst_description_helper(p, data[i]); - dst_buffer_push_u8(&p->buffer, '\n'); + dst_buffer_push_u8(p->buffer, '\n'); } p->indent--; dst_print_indent(p); @@ -459,7 +460,7 @@ static void dst_print_seq_inner(DstPrinter *p, const DstValue *data, int32_t len for (i = 0; i < len; ++i) { dst_description_helper(p, data[i]); if (i != len - 1) - dst_buffer_push_u8(&p->buffer, ' '); + dst_buffer_push_u8(p->buffer, ' '); } } } @@ -475,11 +476,11 @@ static void dst_description_helper(DstPrinter *p, DstValue x) { switch (dst_type(x)) { default: if (p->flags & DST_PRINTFLAG_COLORIZE) { - dst_buffer_push_cstring(&p->buffer, dst_type_colors[dst_type(x)]); - dst_short_description_b(&p->buffer, x); - dst_buffer_push_cstring(&p->buffer, "\x1B[0m"); + dst_buffer_push_cstring(p->buffer, dst_type_colors[dst_type(x)]); + dst_short_description_b(p->buffer, x); + dst_buffer_push_cstring(p->buffer, "\x1B[0m"); } else { - dst_short_description_b(&p->buffer, x); + dst_short_description_b(p->buffer, x); } p->depth++; return; @@ -502,21 +503,21 @@ static void dst_description_helper(DstPrinter *p, DstValue x) { } check = dst_table_get(&p->seen, x); if (dst_checktype(check, DST_INTEGER)) { - dst_buffer_push_cstring(&p->buffer, "buffer, dst_unwrap_integer(check)); - dst_buffer_push_cstring(&p->buffer, ">"); + dst_buffer_push_cstring(p->buffer, "buffer, dst_unwrap_integer(check)); + dst_buffer_push_cstring(p->buffer, ">"); return; } dst_table_put(&p->seen, x, dst_wrap_integer(p->next++)); - dst_buffer_push_cstring(&p->buffer, open); + dst_buffer_push_cstring(p->buffer, open); if (p->depth == 0) { - dst_buffer_push_cstring(&p->buffer, "..."); + dst_buffer_push_cstring(p->buffer, "..."); } else if (dst_hashtable_view(x, &data, &len, &cap)) { dst_print_hashtable_inner(p, data, len, cap); } else if (dst_seq_view(x, &data, &len)) { dst_print_seq_inner(p, data, len); } - dst_buffer_push_cstring(&p->buffer, close); + dst_buffer_push_cstring(p->buffer, close); /* Remove from seen as we know that printing completes, we * can print in multiple times and we know we are not recursing */ dst_table_remove(&p->seen, x); @@ -538,15 +539,17 @@ const uint8_t *dst_description(DstValue x) { DstPrinter printer; const uint8_t *ret; + DstBuffer buffer; dst_printer_defaults(&printer); printer.state = 0; - dst_buffer_init(&printer.buffer, 0); + dst_buffer_init(&buffer, 0); + printer.buffer = &buffer; /* Only print description up to a depth of 4 */ dst_description_helper(&printer, x); - ret = dst_string(printer.buffer.data, printer.buffer.count); + ret = dst_string(buffer.data, buffer.count); - dst_buffer_deinit(&printer.buffer); + dst_buffer_deinit(&buffer); if (printer.state) dst_table_deinit(&printer.seen); return ret; @@ -572,7 +575,8 @@ const uint8_t *dst_formatc(const char *format, ...) { int32_t i; const uint8_t *ret; DstPrinter printer; - DstBuffer *bufp = &printer.buffer; + DstBuffer buffer; + DstBuffer *bufp = &buffer; printer.state = 0; /* Calculate length */ @@ -580,6 +584,7 @@ const uint8_t *dst_formatc(const char *format, ...) { /* Initialize buffer */ dst_buffer_init(bufp, len); + printer.buffer = bufp; /* Start args */ va_start(args, format); @@ -646,8 +651,8 @@ const uint8_t *dst_formatc(const char *format, ...) { va_end(args); - ret = dst_string(printer.buffer.data, printer.buffer.count); - dst_buffer_deinit(&printer.buffer); + ret = dst_string(buffer.data, buffer.count); + dst_buffer_deinit(&buffer); if (printer.state) dst_table_deinit(&printer.seen); return ret; diff --git a/core/strtod.c b/core/strtod.c new file mode 100644 index 00000000..fb6ffc10 --- /dev/null +++ b/core/strtod.c @@ -0,0 +1,242 @@ +/* +* Copyright (c) 2017 Calvin Rose +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to +* deal in the Software without restriction, including without limitation the +* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +* sell copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +*/ + +/* Use a custom double parser instead of libc's strtod for better portability + * and control. Also, uses a less strict rounding method than ieee to not incur + * the cost of 4000 loc and dependence on arbitary precision arithmetic. There + * is no plan to use arbitrary precision arithmetic for parsing numbers, and a + * formal rounding mode has yet to be chosen (round towards 0 seems + * reasonable). + * + * This version has been modified for much greater flexibility in parsing, such + * as choosing the radix, supporting integer output, and returning DstValues + * directly. + * + * Numbers are of the form [-+]R[rR]I.F[eE&][-+]X where R is the radix, I is + * the integer part, F is the fractional part, and X is the exponent. All + * signs, radix, decimal point, fractional part, and exponent can be ommited. + * The number will be considered and integer if the there is no decimal point + * and no exponent. Any number greater the 2^32-1 or less than -(2^32) will be + * coerced to a double. If there is an error, the function dst_scan_number will + * return a dst nil. The radix is assumed to be 10 if omitted, and the E + * separator for the exponent can only be used when the radix is 10. This is + * because E is a vaid digit in bases 15 or greater. For bases greater than 10, + * the letters are used as digitis. A through Z correspond to the digits 10 + * through 35, and the lowercase letters have the same values. The radix number + * is always in base 10. For example, a hexidecimal number could be written + * '16rdeadbeef'. dst_scan_number also supports some c style syntax for + * hexidecimal literals. The previous number could also be written + * '0xdeadbeef'. Note that in this case, the number will actually be a double + * as it will not fit in the range for a signed 32 bit integer. The string + * '0xbeef' would parse to an integer as it is in the range of an int32_t. */ + +/* TODO take down missle defence */ + +#include +#include + +/* Lookup table for getting values of characters when parsing numbers. Handles + * digits 0-9 and a-z (and A-Z). A-Z have values of 10 to 35. */ +static uint8_t digit_lookup[128] = { + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, + 0,1,2,3,4,5,6,7,8,9,0xff,0xff,0xff,0xff,0xff,0xff, + 0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, + 25,26,27,28,29,30,31,32,33,34,35,0xff,0xff,0xff,0xff,0xff, + 0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, + 25,26,27,28,29,30,31,32,33,34,35,0xff,0xff,0xff,0xff,0xff +}; + +/* Read in a mantissa and exponent of a certain base, and give + * back the double value. Should properly handle 0s, Inifinties, and + * denormalized numbers. (When the exponent values are too large) */ +static double dst_convert_mantissa_exp( + int negative, + uint64_t mantissa, + int32_t base, + int32_t exponent) { + + int32_t exponent2 = 0; + + /* Short circuit zero and huge numbers */ + if (mantissa == 0) + return 0.0; + if (exponent > 1022) + return negative ? -1.0/0.0 : 1.0/0.0; + + /* TODO add fast paths */ + + /* Convert exponent on the base into exponent2, the power of + * 2 the will be used. Modify the mantissa as we convert. */ + if (exponent > 0) { + /* Make the mantissa large enough so no precision is lost */ + while (mantissa <= 0x03ffffffffffffffULL && exponent > 0) { + mantissa *= base; + exponent--; + } + while (exponent > 0) { + /* Allow 6 bits of room when multiplying. This is because + * the largest base is 36, which is 6 bits. The space of 6 should + * prevent overflow.*/ + mantissa >>= 1; + exponent2++; + if (mantissa <= 0x03ffffffffffffffULL) { + mantissa *= base; + exponent--; + } + } + } else { + while (exponent < 0) { + mantissa <<= 1; + exponent2--; + /* Ensure that the last bit is set for minimum error + * before dividing by the base */ + if (mantissa > 0x7fffffffffffffffULL) { + mantissa /= base; + exponent++; + } + } + } + + /* Build the number to return */ + return ldexp(mantissa, exponent2); +} + +/* Get the mantissa and exponent of decimal number. The + * mantissa will be stored in a 64 bit unsigned integer (always positive). + * The exponent will be in a signed 32 bit integer. Will also check if + * the decimal point has been seen. Returns -1 if there is an invalid + * number. */ +DstValue dst_scan_number( + const uint8_t *str, + int32_t len) { + + const uint8_t *end = str + len; + int32_t seenpoint = 0; + uint64_t mant = 0; + int32_t neg = 0; + int32_t ex = 0; + int foundExp = 0; + + /* Set some constants */ + int base = 10; + + /* Prevent some kinds of overflow bugs relating to the exponent + * overflowing. For example, if a string was passed 2GB worth of 0s after + * the decimal point, exponent could wrap around and become positive. It's + * easier to reject ridiculously large inputs than to check for overflows. + * */ + if (len > INT32_MAX / base) goto error; + + /* Get sign */ + if (str >= end) goto error; + if (*str == '-') { + neg = 1; + str++; + } else if (*str == '+') { + str++; + } + + /* Skip leading zeros */ + while (str < end && (*str == '0' || *str == '.')) { + if (seenpoint) ex--; + if (*str == '.') { + if (seenpoint) goto error; + seenpoint = 1; + } + str++; + } + + /* Parse significant digits */ + while (str < end) { + if (*str == '.') { + if (seenpoint) goto error; + seenpoint = 1; + } else if (*str == '&') { + foundExp = 1; + break; + } else if (base == 10 && (*str == 'E' || *str == 'e')) { + foundExp = 1; + break; + } else if (*str == 'x' || *str == 'X') { + if (seenpoint || mant > 0) goto error; + base = 16; + mant = 0; + } else if (*str == 'r' || *str == 'R') { + if (seenpoint) goto error; + if (mant < 2 || mant > 36) goto error; + base = mant; + mant = 0; + } else if (*str == '_') { + ; + /* underscores are ignored - can be used for separator */ + } else { + int digit = digit_lookup[*str & 0x7F]; + if (digit >= base) goto error; + if (seenpoint) ex--; + if (mant > 0x00ffffffffffffff) + ex++; + else + mant = base * mant + digit; + } + str++; + } + + /* Read exponent */ + if (str < end && foundExp) { + int eneg = 0; + int ee = 0; + str++; + if (str >= end) goto error; + if (*str == '-') { + eneg = 1; + str++; + } else if (*str == '+') { + str++; + } + /* Skip leading 0s in exponent */ + while (str < end && *str == '0') str++; + while (str < end && ee < (INT32_MAX / base - base)) { + int digit = digit_lookup[*str & 0x7F]; + if (digit >= base) goto error; + ee = base * ee + digit; + str++; + } + if (eneg) ex -= ee; else ex += ee; + } else if (!seenpoint) { + /* Check for integer literal */ + int64_t i64 = neg ? -mant : mant; + if (i64 <= INT32_MAX && i64 >= INT32_MIN) + return dst_wrap_integer((int32_t) i64); + } else if (str < end) { + goto error; + } + + /* Convert mantissa and exponent into double */ + return dst_wrap_real(dst_convert_mantissa_exp(neg, mant, base, ex)); + + error: + return dst_wrap_nil(); + +} + diff --git a/core/strtod.h b/core/strtod.h new file mode 100644 index 00000000..91e879d7 --- /dev/null +++ b/core/strtod.h @@ -0,0 +1,30 @@ +/* +* Copyright (c) 2017 Calvin Rose +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to +* deal in the Software without restriction, including without limitation the +* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +* sell copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +*/ + +#ifndef DST_STRTOD_H_defined +#define DST_STRTOD_H_defined + +DstValue dst_scan_number( + const uint8_t *str, + int32_t len); + +#endif diff --git a/core/syscalls.c b/core/syscalls.c index 9bf85ce0..8ce59e54 100644 --- a/core/syscalls.c +++ b/core/syscalls.c @@ -46,7 +46,9 @@ int dst_sys_asm(DstValue *argv, int32_t argn) { return 1; } opts.source = argv[0]; - opts.parsemap = argn >= 2 ? argv[1] : dst_wrap_nil(); + opts.sourcemap = (argn >= 2 && dst_checktype(argv[1], DST_TUPLE)) + ? dst_unwrap_tuple(argv[1]) + : NULL; opts.flags = 0; res = dst_asm(opts); if (res.status == DST_ASSEMBLE_OK) { diff --git a/core/util.c b/core/util.c index d1576932..e1fa564d 100644 --- a/core/util.c +++ b/core/util.c @@ -115,3 +115,4 @@ int dst_hashtable_view(DstValue tab, const DstValue **data, int32_t *len, int32_ } return 0; } + diff --git a/core/vm.c b/core/vm.c index ba310dba..0032f101 100644 --- a/core/vm.c +++ b/core/vm.c @@ -50,11 +50,6 @@ static int dst_update_fiber() { return 0; } -/* Eventually use computed gotos for more effient vm loop. */ -#define vm_next() continue -#define vm_checkgc_next() dst_maybe_collect(); continue - - /* Start running the VM from where it left off. */ int dst_continue() { @@ -63,6 +58,10 @@ int dst_continue() { uint32_t *pc; DstFunction *func; +/* Eventually use computed gotos for more effient vm loop. */ +#define vm_next() continue +#define vm_checkgc_next() dst_maybe_collect(); continue + /* Used to extract bits from the opcode that correspond to arguments. * Pulls out unsigned integers */ #define oparg(shift, mask) (((*pc) >> ((shift) << 3)) & (mask)) diff --git a/core/wrap.c b/core/wrap.c index 963369a8..e713133a 100644 --- a/core/wrap.c +++ b/core/wrap.c @@ -28,7 +28,9 @@ void *dst_nanbox_to_pointer(DstValue x) { /* We need to do this shift to keep the higher bits of the pointer * the same as bit 47 as required by the x86 architecture. We may save * an instruction if we do x.u64 & DST_NANBOX_POINTERBITS, but this 0s - * the high bits, and may make the pointer non-canocial on x86. */ + * the high bits, and may make the pointer non-canocial on x86. If we switch + * to 47 bit pointers (which is what userspace uses on Windows, we can use + * the single mask rather than two shifts. */ x.i64 = (x.i64 << 16) >> 16; return x.pointer; } diff --git a/dsts/minimal.dsts b/dsts/minimal.dsts index 0cb42c60..62899eed 100644 --- a/dsts/minimal.dsts +++ b/dsts/minimal.dsts @@ -1,5 +1,5 @@ +# A fairly minimal example of a dst assembly file { - bork 'boop bytecode [ (load-integer 0 15) (load-integer 1 0) @@ -8,9 +8,7 @@ :label (equals 2 1 0) (jump-if 2 :done) - (push 0) (add-immediate 0 0 -1) - (syscall 2 0) (get 2 3 0) (push 2) (syscall 2 0) @@ -18,6 +16,9 @@ :done (return-nil) + + :extra + (push 2r1010101010101010) ] constants [ (def lookup "0123456789abcdef") diff --git a/include/dst/dst.h b/include/dst/dst.h index b4627324..ad9a9d49 100644 --- a/include/dst/dst.h +++ b/include/dst/dst.h @@ -599,10 +599,12 @@ struct DstAssembleResult { DstFuncDef *def; const uint8_t *error; } result; + int32_t error_start; + int32_t error_end; DstAssembleStatus status; }; struct DstAssembleOptions { - DstValue parsemap; + const DstValue *sourcemap; DstValue source; uint32_t flags; }; @@ -648,12 +650,15 @@ struct DstParseResult { DstValue value; const uint8_t *error; } result; - DstValue map; + const DstValue *map; int32_t bytes_read; DstParseStatus status; }; DstParseResult dst_parse(const uint8_t *src, int32_t len); DstParseResult dst_parsec(const char *src); +const DstValue *dst_parse_submap_index(const DstValue *map, int32_t index); +const DstValue *dst_parse_submap_key(const DstValue *map, DstValue key); +const DstValue *dst_parse_submap_value(const DstValue *map, DstValue key); /* VM functions */ int dst_init(); @@ -662,9 +667,6 @@ int dst_continue(); int dst_run(DstValue callee); DstValue dst_transfer(DstFiber *fiber, DstValue x); -/* Wrap data in DstValue */ - - /* GC */ /* The metadata header associated with an allocated block of memory */ diff --git a/unittests/asm_test.c b/unittests/asm_test.c index f234c3e5..1aa23c63 100644 --- a/unittests/asm_test.c +++ b/unittests/asm_test.c @@ -26,21 +26,20 @@ int main() { free(string); if (pres.status == DST_PARSE_ERROR) { - dst_puts(dst_formatc("parse error at %d: %s\n", pres.bytes_read, pres.result.error)); + dst_puts(dst_formatc("parse error at %d: %S\n", pres.bytes_read, pres.result.error)); return 1; } assert(pres.status == DST_PARSE_OK); dst_puts(dst_formatc("\nparse result: %v\n\n", pres.result.value)); - dst_puts(dst_formatc("\nparse map result: %v\n\n", pres.map)); - /* opts.flags = 0; opts.source = pres.result.value; - opts.parsemap = dst_wrap_nil(); + opts.sourcemap = pres.map; ares = dst_asm(opts); if (ares.status == DST_ASSEMBLE_ERROR) { - dst_puts(dst_formatc("assembly error: %s\n", ares.result.error)); + dst_puts(dst_formatc("assembly error: %S\n", ares.result.error)); + dst_puts(dst_formatc("error location: %d, %d\n", ares.error_start, ares.error_end)); return 1; } assert(ares.status == DST_ASSEMBLE_OK); @@ -49,7 +48,6 @@ int main() { dst_run(dst_wrap_function(func)); dst_puts(dst_formatc("result: %v\n", dst_vm_fiber->ret)); - */ dst_deinit(); diff --git a/unittests/strtod_test.c b/unittests/strtod_test.c new file mode 100644 index 00000000..6c4b364e --- /dev/null +++ b/unittests/strtod_test.c @@ -0,0 +1,122 @@ +/* +* Copyright (c) 2017 Calvin Rose +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to +* deal in the Software without restriction, including without limitation the +* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +* sell copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +*/ + +/* Use a custom double parser instead of libc's strtod for better portability + * and control. Also, uses a less strict rounding method than ieee to not incur + * the cost of 4000 loc and dependence on arbitary precision arithmetic. There + * is no plan to use arbitrary precision arithmetic for parsing numbers, and a + * formal rounding mode has yet to be chosen (round towards 0 seems + * reasonable). + * + * This version has been modified for much greater flexibility in parsing, such + * as choosing the radix, supporting integer output, and returning DstValues + * directly. + * + * Numbers are of the form [-+]R[rR]I.F[eE&][-+]X where R is the radix, I is + * the integer part, F is the fractional part, and X is the exponent. All + * signs, radix, decimal point, fractional part, and exponent can be ommited. + * The number will be considered and integer if the there is no decimal point + * and no exponent. Any number greater the 2^32-1 or less than -(2^32) will be + * coerced to a double. If there is an error, the function dst_scan_number will + * return a dst nil. The radix is assumed to be 10 if omitted, and the E + * separator for the exponent can only be used when the radix is 10. This is + * because E is a vaid digit in bases 15 or greater. For bases greater than 10, + * the letters are used as digitis. A through Z correspond to the digits 10 + * through 35, and the lowercase letters have the same values. The radix number + * is always in base 10. For example, a hexidecimal number could be written + * '16rdeadbeef'. dst_scan_number also supports some c style syntax for + * hexidecimal literals. The previous number could also be written + * '0xdeadbeef'. Note that in this case, the number will actually be a double + * as it will not fit in the range for a signed 32 bit integer. The string + * '0xbeef' would parse to an integer as it is in the range of an int32_t. */ + +#include "unit.h" +#include +#include + +DstValue dst_scan_number(const uint8_t *str, int32_t len); + +const char *valid_test_strs[] = { + "0", + "-0.0", + "+0", + "123", + "-123", + "aaaaaa", + "+a123", + "0.12312", + "89.12312", + "-123.01231", + "123e10", + "1203412347981232379183.13013248723478932478923478e12", + "120341234798123237918313013248723478932478923478", + "999_999_999_999", + "8r777", + "", + "----", + " ", + "--123", + "0xff", + "0xff.f", + "0xff&-1", + "0xfefefe", + "1926.4823e11", + "0xff_ff_ff_ff", + "0xff_ff_ff_ff_ff_ff", + "2r1010", + "2r10101010001101", + "123a", + "0.1e510", + "4.123123e-308", + "4.123123e-320", + "1e-308", + "1e-309", + "9e-308", + "9e-309", + "919283691283e-309", + "9999e302", + "123.12312.123", + "90.e0.1", + "90.e1", + ".e1" +}; + +int main() { + dst_init(); + unsigned i; + for (i = 0; i < (sizeof(valid_test_strs) / sizeof(void *)); i++) { + DstValue out; + double refout; + const uint8_t *str = (const uint8_t *) valid_test_strs[i]; + int32_t len = 0; while (str[len]) len++; + + refout = strtod(valid_test_strs[i], NULL); + out = dst_scan_number(str, len); + dst_puts(dst_formatc("literal: %s, out: %v, refout: %v\n", + valid_test_strs[i], out, dst_wrap_real(refout))); + + } + uint64_t x = 0x07FFFFFFFFFFFFFF; + uint64_t y = 36; + + printf("%llu, %llu\n", x, (x * y) / y); +}