From 14fe30b4119880f0f60555909a41963544c0ba88 Mon Sep 17 00:00:00 2001 From: bakpakin Date: Sun, 3 Dec 2017 12:52:09 -0500 Subject: [PATCH] Add preliminary source mapping to parser (always on) --- .gitignore | 2 +- core/compile.c | 66 +++++++++++++++++++++++++++++++ core/parse.c | 86 +++++++++++++++++++++++++++++++++++++---- core/string.c | 7 +++- core/util.c | 9 +++-- core/wrap.c | 1 + include/dst/dst.h | 6 ++- unittests/asm_test.c | 1 + unittests/nanbox_test.c | 10 ++++- 9 files changed, 171 insertions(+), 17 deletions(-) create mode 100644 core/compile.c diff --git a/.gitignore b/.gitignore index ceb1a868..bc526576 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ # Target /client/dst -dst +./dst # Generated files *.gen.h diff --git a/core/compile.c b/core/compile.c new file mode 100644 index 00000000..f06e29e3 --- /dev/null +++ b/core/compile.c @@ -0,0 +1,66 @@ +/* +* Copyright (c) 2017 Calvin Rose +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to +* deal in the Software without restriction, including without limitation the +* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +* sell copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +*/ + +#include + +#define DST_LOCAL_FLAG_MUTABLE 1 + +/* Compiler typedefs */ +typedef struct DstCompiler DstCompiler; +typedef struct FormOptions FormOptions; +typedef struct SlotTracker SlotTracker; +typedef struct DstScope DstScope; + +/* A stack slot */ +struct DstSlot { + int32_t index; + uint32_t flags; + uint32_t types; /* bit set of possible primitive types */ +} + +/* A lexical scope during compilation */ +struct DstScope { + DstArray slots; + DstArray freeslots; + DstArray constants; + DstTable symbols; /* Positive numbers are stack slots, negative are negative constant indices */ + uint32_t flags; + int32_t nextslot; +} + +/* Compilation state */ +struct DstCompiler { + DstValue error; + jmp_buf on_error; + DstArray scopes; + DstBuffer buffer; + int recursion_guard; +}; + +/* Compiler state */ +struct DstFormOptions { + DstCompiler *compiler; + DstValue x; + uint32_t flags; + uint32_t types; /* bit set of accepeted primitive types */ + int32_t target_slot; +}; diff --git a/core/parse.c b/core/parse.c index 13bb6194..6ad520fd 100644 --- a/core/parse.c +++ b/core/parse.c @@ -62,7 +62,8 @@ static int read_integer(const uint8_t *string, const uint8_t *end, int64_t *ret) /* Read a real from a string. Returns if successfuly * parsed a real from the enitre input string. - * If returned 1, output is int ret.*/ + * If returned 1, output is int ret. + * TODO - consider algorithm that does not lose precision. */ static int read_real(const uint8_t *string, const uint8_t *end, double *ret, int forceInt) { int sign = 1, x = 0; double accum = 0, exp = 1, place = 1; @@ -133,7 +134,7 @@ static int is_whitespace(uint8_t c) { } /* Check if a character is a valid symbol character */ -/* TODO - wlloe utf8 - shouldn't be difficult, err on side +/* TODO - allow utf8 - shouldn't be difficult, err on side * of inclusivity */ static int is_symbol_char(uint8_t c) { if (c >= 'a' && c <= 'z') return 1; @@ -161,8 +162,35 @@ static int to_hex(uint8_t c) { } } +/* Make source mapping for atom (non recursive structure) */ +static DstValue atom_map(int32_t start, int32_t end) { + DstValue *t = dst_tuple_begin(2); + t[0] = dst_wrap_integer(start); + t[1] = dst_wrap_integer(end); + return dst_wrap_tuple(dst_tuple_end(t)); +} + +/* Create mappingd for recursive data structure */ +static DstValue ds_map(int32_t start, int32_t end, DstValue submap) { + DstValue *t = dst_tuple_begin(3); + t[0] = dst_wrap_integer(start); + t[1] = dst_wrap_integer(end); + t[2] = submap; + return dst_wrap_tuple(dst_tuple_end(t)); +} + +/* Create a sourcemapping for a key value pair */ +static DstValue kv_map(DstValue k, DstValue v) { + DstValue *t = dst_tuple_begin(2); + t[0] = k; + t[1] = v; + return dst_wrap_tuple(dst_tuple_end(t)); +} + typedef struct { DstArray stack; + DstArray mapstack; + const uint8_t *srcstart; const uint8_t *end; const char *errmsg; DstParseStatus status; @@ -175,8 +203,10 @@ static const uint8_t *parse_recur( int32_t recur) { const uint8_t *end = args->end; + const uint8_t *mapstart; int32_t qcount = 0; DstValue ret; + DstValue submapping; /* Prevent stack overflow */ if (recur == 0) goto too_much_recur; @@ -194,6 +224,10 @@ static const uint8_t *parse_recur( /* Check for end of source */ if (src >= end) goto unexpected_eos; + + /* Open mapping */ + mapstart = src; + submapping = dst_wrap_nil(); /* Detect token type based on first character */ switch (*src) { @@ -332,18 +366,27 @@ static const uint8_t *parse_recur( case ')': { DstValue *tup = dst_tuple_begin(n); - for (i = n; i > 0; i--) + DstValue *subtup = dst_tuple_begin(n); + for (i = n; i > 0; i--) { tup[i - 1] = dst_array_pop(&args->stack); + subtup[i - 1] = dst_array_pop(&args->mapstack); + } ret = dst_wrap_tuple(dst_tuple_end(tup)); + submapping = dst_wrap_tuple(dst_tuple_end(subtup)); break; } case ']': { DstArray *arr = dst_array(n); - for (i = n; i > 0; i--) + DstArray *subarr = dst_array(n); + for (i = n; i > 0; i--) { arr->data[i - 1] = dst_array_pop(&args->stack); + subarr->data[i - 1] = dst_array_pop(&args->mapstack); + } arr->count = n; + subarr->count = n; ret = dst_wrap_array(arr); + submapping = dst_wrap_array(subarr); break; } case '}': @@ -351,20 +394,32 @@ static const uint8_t *parse_recur( if (n & 1) goto struct_oddargs; if (istable) { DstTable *t = dst_table(n); + DstTable *subt = dst_table(n); for (i = n; i > 0; i -= 2) { DstValue val = dst_array_pop(&args->stack); DstValue key = dst_array_pop(&args->stack); + DstValue subval = dst_array_pop(&args->mapstack); + DstValue subkey = dst_array_pop(&args->mapstack); + dst_table_put(t, key, val); + dst_table_put(subt, key, kv_map(subkey, subval)); } ret = dst_wrap_table(t); + submapping = dst_wrap_table(subt); } else { DstValue *st = dst_struct_begin(n >> 1); + DstValue *subst = dst_struct_begin(n >> 1); for (i = n; i > 0; i -= 2) { DstValue val = dst_array_pop(&args->stack); DstValue key = dst_array_pop(&args->stack); + DstValue subval = dst_array_pop(&args->mapstack); + DstValue subkey = dst_array_pop(&args->mapstack); + dst_struct_put(st, key, val); + dst_struct_put(subst, key, kv_map(subkey, subval)); } ret = dst_wrap_struct(dst_struct_end(st)); + submapping = dst_wrap_struct(dst_struct_end(subst)); } break; } @@ -378,6 +433,20 @@ static const uint8_t *parse_recur( /* Push the result to the stack */ dst_array_push(&args->stack, ret); + + /* Push source mapping */ + if (dst_checktype(submapping, DST_NIL)) { + /* We just parsed an atom */ + dst_array_push(&args->mapstack, atom_map( + mapstart - args->srcstart, + src - args->srcstart)); + } else { + /* We just parsed a recursive data structure */ + dst_array_push(&args->mapstack, ds_map( + mapstart - args->srcstart, + src - args->srcstart, + submapping)); + } /* Return the new source position for further calls */ return src; @@ -428,23 +497,26 @@ DstParseResult dst_parse(const uint8_t *src, int32_t len) { dst_array_init(&args.stack, 10); args.status = DST_PARSE_OK; + args.srcstart = src; args.end = src + len; args.errmsg = NULL; + dst_array_init(&args.mapstack, 10); + newsrc = parse_recur(&args, src, DST_RECURSION_GUARD); res.status = args.status; res.bytes_read = (int32_t) (newsrc - src); - /* TODO - source maps */ - res.map = dst_wrap_nil(); - if (args.errmsg) { res.result.error = dst_cstring(args.errmsg); + res.map = dst_wrap_nil(); } else { res.result.value = dst_array_pop(&args.stack); + res.map = dst_array_pop(&args.mapstack); } dst_array_deinit(&args.stack); + dst_array_deinit(&args.mapstack); return res; } diff --git a/core/string.c b/core/string.c index 6f7eef55..55416605 100644 --- a/core/string.c +++ b/core/string.c @@ -200,6 +200,7 @@ static const uint8_t *string_description(const char *title, void *pointer) { #undef HEX #undef DST_BUFSIZE +/* TODO - add more characters to escapes */ static int32_t dst_escape_string_length(const uint8_t *str) { int32_t len = 2; int32_t i; @@ -359,11 +360,13 @@ static int is_print_ds(DstValue v) { } /* VT100 Colors for types */ -static const char *dst_type_colors[15] = { +/* TODO - generalize into configurable headers and footers */ +static const char *dst_type_colors[16] = { "\x1B[35m", "\x1B[33m", "\x1B[33m", "\x1B[35m", + "\x1B[35m", "\x1B[32m", "\x1B[36m", "", @@ -524,7 +527,7 @@ static void dst_description_helper(DstPrinter *p, DstValue x) { static void dst_printer_defaults(DstPrinter *p) { p->next = 0; p->flags = DST_PRINTFLAG_INDENT; - p->depth = 4; + p->depth = 10; p->indent = 0; p->indent_size = 2; p->token_line_limit = 5; diff --git a/core/util.c b/core/util.c index d03bd160..d1576932 100644 --- a/core/util.c +++ b/core/util.c @@ -31,18 +31,19 @@ const char dst_base64[65] = /* The DST value types in order. These types can be used as * mnemonics instead of a bit pattern for type checking */ -const char *dst_type_names[15] = { +const char *dst_type_names[16] = { "nil", - "real", + "false", + "true", + "fiber", "integer", - "boolean", + "real", "string", "symbol", "array", "tuple", "table", "struct", - "fiber", "buffer", "function", "cfunction", diff --git a/core/wrap.c b/core/wrap.c index 62c4f27d..2ba572c1 100644 --- a/core/wrap.c +++ b/core/wrap.c @@ -138,6 +138,7 @@ DST_WRAP_DEFINE(function, DstFunction *, DST_FUNCTION, pointer) DST_WRAP_DEFINE(cfunction, DstCFunction, DST_CFUNCTION, pointer) DST_WRAP_DEFINE(table, DstTable *, DST_TABLE, pointer) DST_WRAP_DEFINE(userdata, void *, DST_USERDATA, pointer) +DST_WRAP_DEFINE(pointer, void *, DST_USERDATA, pointer) #undef DST_WRAP_DEFINE diff --git a/include/dst/dst.h b/include/dst/dst.h index 49589d3f..b4627324 100644 --- a/include/dst/dst.h +++ b/include/dst/dst.h @@ -104,7 +104,7 @@ * ands crashing (the parser). Instead, error out. */ #define DST_RECURSION_GUARD 1000 -/* #define DST_NANBOX */ +#define DST_NANBOX #ifdef DST_NANBOX typedef union DstValue DstValue; @@ -129,7 +129,7 @@ typedef struct DstUserType DstUserType; typedef int (*DstCFunction)(DstValue *argv, int32_t argn); /* Names of all of the types */ -extern const char *dst_type_names[15]; +extern const char *dst_type_names[16]; /* Basic types for all Dst Values */ typedef enum DstType { @@ -280,6 +280,7 @@ DstValue dst_nanbox_from_bits(uint64_t bits); #define dst_wrap_string(s) dst_nanbox_wrap_c((s), DST_STRING) #define dst_wrap_symbol(s) dst_nanbox_wrap_c((s), DST_SYMBOL) #define dst_wrap_userdata(s) dst_nanbox_wrap_((s), DST_USERDATA) +#define dst_wrap_pointer(s) dst_nanbox_wrap_((s), DST_USERDATA) #define dst_wrap_function(s) dst_nanbox_wrap_((s), DST_FUNCTION) #define dst_wrap_cfunction(s) dst_nanbox_wrap_((s), DST_CFUNCTION) @@ -353,6 +354,7 @@ DstValue dst_wrap_function(DstFunction *x); DstValue dst_wrap_cfunction(DstCFunction x); DstValue dst_wrap_table(DstTable *x); DstValue dst_wrap_userdata(void *x); +DstValue dst_wrap_pointer(void *x); /* End of tagged union implementation */ #endif diff --git a/unittests/asm_test.c b/unittests/asm_test.c index eccc7d21..f234c3e5 100644 --- a/unittests/asm_test.c +++ b/unittests/asm_test.c @@ -31,6 +31,7 @@ int main() { } assert(pres.status == DST_PARSE_OK); dst_puts(dst_formatc("\nparse result: %v\n\n", pres.result.value)); + dst_puts(dst_formatc("\nparse map result: %v\n\n", pres.map)); /* opts.flags = 0; diff --git a/unittests/nanbox_test.c b/unittests/nanbox_test.c index 315f3f46..0506cce9 100644 --- a/unittests/nanbox_test.c +++ b/unittests/nanbox_test.c @@ -33,12 +33,20 @@ union dst_t { * 47 bit payload representaion is that the type bits are no long contiguous. Type * checking can still be fast, but typewise polymorphism takes a bit longer. However, * hopefully we can avoid some annoying problems that occur when trying to use 47 bit pointers - * in a 48 bit address space (Linux on ARM) */ + * in a 48 bit address space (Linux on ARM), or when generating Signaling NaNs. */ /* |.......Tag.......|.......................Payload..................| */ /* Non-double: t|11111111111|1ttt|xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */ /* Types of NIL, TRUE, and FALSE must have payload set to all 1s. */ +/* Other possible representations: */ + +/* Common Style */ +/* Non-double: 1|11111111111|tttt|xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */ + +/* LuaJIT style */ +/* Non-double: 1|11111111111|1ttt|txxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */ + /* Double (no NaNs): x xxxxxxxxxxx xxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */ /* A simple scheme for nan boxed values */