Add preliminary source mapping to parser (always on)

This commit is contained in:
bakpakin 2017-12-03 12:52:09 -05:00
parent eceb6e5a77
commit 14fe30b411
9 changed files with 171 additions and 17 deletions

2
.gitignore vendored
View File

@ -1,6 +1,6 @@
# Target
/client/dst
dst
./dst
# Generated files
*.gen.h

66
core/compile.c Normal file
View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2017 Calvin Rose
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <dst/dst.h>
#define DST_LOCAL_FLAG_MUTABLE 1
/* Compiler typedefs */
typedef struct DstCompiler DstCompiler;
typedef struct FormOptions FormOptions;
typedef struct SlotTracker SlotTracker;
typedef struct DstScope DstScope;
/* A stack slot */
struct DstSlot {
int32_t index;
uint32_t flags;
uint32_t types; /* bit set of possible primitive types */
}
/* A lexical scope during compilation */
struct DstScope {
DstArray slots;
DstArray freeslots;
DstArray constants;
DstTable symbols; /* Positive numbers are stack slots, negative are negative constant indices */
uint32_t flags;
int32_t nextslot;
}
/* Compilation state */
struct DstCompiler {
DstValue error;
jmp_buf on_error;
DstArray scopes;
DstBuffer buffer;
int recursion_guard;
};
/* Compiler state */
struct DstFormOptions {
DstCompiler *compiler;
DstValue x;
uint32_t flags;
uint32_t types; /* bit set of accepeted primitive types */
int32_t target_slot;
};

View File

@ -62,7 +62,8 @@ static int read_integer(const uint8_t *string, const uint8_t *end, int64_t *ret)
/* Read a real from a string. Returns if successfuly
* parsed a real from the enitre input string.
* If returned 1, output is int ret.*/
* If returned 1, output is int ret.
* TODO - consider algorithm that does not lose precision. */
static int read_real(const uint8_t *string, const uint8_t *end, double *ret, int forceInt) {
int sign = 1, x = 0;
double accum = 0, exp = 1, place = 1;
@ -133,7 +134,7 @@ static int is_whitespace(uint8_t c) {
}
/* Check if a character is a valid symbol character */
/* TODO - wlloe utf8 - shouldn't be difficult, err on side
/* TODO - allow utf8 - shouldn't be difficult, err on side
* of inclusivity */
static int is_symbol_char(uint8_t c) {
if (c >= 'a' && c <= 'z') return 1;
@ -161,8 +162,35 @@ static int to_hex(uint8_t c) {
}
}
/* Make source mapping for atom (non recursive structure) */
static DstValue atom_map(int32_t start, int32_t end) {
DstValue *t = dst_tuple_begin(2);
t[0] = dst_wrap_integer(start);
t[1] = dst_wrap_integer(end);
return dst_wrap_tuple(dst_tuple_end(t));
}
/* Create mappingd for recursive data structure */
static DstValue ds_map(int32_t start, int32_t end, DstValue submap) {
DstValue *t = dst_tuple_begin(3);
t[0] = dst_wrap_integer(start);
t[1] = dst_wrap_integer(end);
t[2] = submap;
return dst_wrap_tuple(dst_tuple_end(t));
}
/* Create a sourcemapping for a key value pair */
static DstValue kv_map(DstValue k, DstValue v) {
DstValue *t = dst_tuple_begin(2);
t[0] = k;
t[1] = v;
return dst_wrap_tuple(dst_tuple_end(t));
}
typedef struct {
DstArray stack;
DstArray mapstack;
const uint8_t *srcstart;
const uint8_t *end;
const char *errmsg;
DstParseStatus status;
@ -175,8 +203,10 @@ static const uint8_t *parse_recur(
int32_t recur) {
const uint8_t *end = args->end;
const uint8_t *mapstart;
int32_t qcount = 0;
DstValue ret;
DstValue submapping;
/* Prevent stack overflow */
if (recur == 0) goto too_much_recur;
@ -194,6 +224,10 @@ static const uint8_t *parse_recur(
/* Check for end of source */
if (src >= end) goto unexpected_eos;
/* Open mapping */
mapstart = src;
submapping = dst_wrap_nil();
/* Detect token type based on first character */
switch (*src) {
@ -332,18 +366,27 @@ static const uint8_t *parse_recur(
case ')':
{
DstValue *tup = dst_tuple_begin(n);
for (i = n; i > 0; i--)
DstValue *subtup = dst_tuple_begin(n);
for (i = n; i > 0; i--) {
tup[i - 1] = dst_array_pop(&args->stack);
subtup[i - 1] = dst_array_pop(&args->mapstack);
}
ret = dst_wrap_tuple(dst_tuple_end(tup));
submapping = dst_wrap_tuple(dst_tuple_end(subtup));
break;
}
case ']':
{
DstArray *arr = dst_array(n);
for (i = n; i > 0; i--)
DstArray *subarr = dst_array(n);
for (i = n; i > 0; i--) {
arr->data[i - 1] = dst_array_pop(&args->stack);
subarr->data[i - 1] = dst_array_pop(&args->mapstack);
}
arr->count = n;
subarr->count = n;
ret = dst_wrap_array(arr);
submapping = dst_wrap_array(subarr);
break;
}
case '}':
@ -351,20 +394,32 @@ static const uint8_t *parse_recur(
if (n & 1) goto struct_oddargs;
if (istable) {
DstTable *t = dst_table(n);
DstTable *subt = dst_table(n);
for (i = n; i > 0; i -= 2) {
DstValue val = dst_array_pop(&args->stack);
DstValue key = dst_array_pop(&args->stack);
DstValue subval = dst_array_pop(&args->mapstack);
DstValue subkey = dst_array_pop(&args->mapstack);
dst_table_put(t, key, val);
dst_table_put(subt, key, kv_map(subkey, subval));
}
ret = dst_wrap_table(t);
submapping = dst_wrap_table(subt);
} else {
DstValue *st = dst_struct_begin(n >> 1);
DstValue *subst = dst_struct_begin(n >> 1);
for (i = n; i > 0; i -= 2) {
DstValue val = dst_array_pop(&args->stack);
DstValue key = dst_array_pop(&args->stack);
DstValue subval = dst_array_pop(&args->mapstack);
DstValue subkey = dst_array_pop(&args->mapstack);
dst_struct_put(st, key, val);
dst_struct_put(subst, key, kv_map(subkey, subval));
}
ret = dst_wrap_struct(dst_struct_end(st));
submapping = dst_wrap_struct(dst_struct_end(subst));
}
break;
}
@ -378,6 +433,20 @@ static const uint8_t *parse_recur(
/* Push the result to the stack */
dst_array_push(&args->stack, ret);
/* Push source mapping */
if (dst_checktype(submapping, DST_NIL)) {
/* We just parsed an atom */
dst_array_push(&args->mapstack, atom_map(
mapstart - args->srcstart,
src - args->srcstart));
} else {
/* We just parsed a recursive data structure */
dst_array_push(&args->mapstack, ds_map(
mapstart - args->srcstart,
src - args->srcstart,
submapping));
}
/* Return the new source position for further calls */
return src;
@ -428,23 +497,26 @@ DstParseResult dst_parse(const uint8_t *src, int32_t len) {
dst_array_init(&args.stack, 10);
args.status = DST_PARSE_OK;
args.srcstart = src;
args.end = src + len;
args.errmsg = NULL;
dst_array_init(&args.mapstack, 10);
newsrc = parse_recur(&args, src, DST_RECURSION_GUARD);
res.status = args.status;
res.bytes_read = (int32_t) (newsrc - src);
/* TODO - source maps */
res.map = dst_wrap_nil();
if (args.errmsg) {
res.result.error = dst_cstring(args.errmsg);
res.map = dst_wrap_nil();
} else {
res.result.value = dst_array_pop(&args.stack);
res.map = dst_array_pop(&args.mapstack);
}
dst_array_deinit(&args.stack);
dst_array_deinit(&args.mapstack);
return res;
}

View File

@ -200,6 +200,7 @@ static const uint8_t *string_description(const char *title, void *pointer) {
#undef HEX
#undef DST_BUFSIZE
/* TODO - add more characters to escapes */
static int32_t dst_escape_string_length(const uint8_t *str) {
int32_t len = 2;
int32_t i;
@ -359,11 +360,13 @@ static int is_print_ds(DstValue v) {
}
/* VT100 Colors for types */
static const char *dst_type_colors[15] = {
/* TODO - generalize into configurable headers and footers */
static const char *dst_type_colors[16] = {
"\x1B[35m",
"\x1B[33m",
"\x1B[33m",
"\x1B[35m",
"\x1B[35m",
"\x1B[32m",
"\x1B[36m",
"",
@ -524,7 +527,7 @@ static void dst_description_helper(DstPrinter *p, DstValue x) {
static void dst_printer_defaults(DstPrinter *p) {
p->next = 0;
p->flags = DST_PRINTFLAG_INDENT;
p->depth = 4;
p->depth = 10;
p->indent = 0;
p->indent_size = 2;
p->token_line_limit = 5;

View File

@ -31,18 +31,19 @@ const char dst_base64[65] =
/* The DST value types in order. These types can be used as
* mnemonics instead of a bit pattern for type checking */
const char *dst_type_names[15] = {
const char *dst_type_names[16] = {
"nil",
"real",
"false",
"true",
"fiber",
"integer",
"boolean",
"real",
"string",
"symbol",
"array",
"tuple",
"table",
"struct",
"fiber",
"buffer",
"function",
"cfunction",

View File

@ -138,6 +138,7 @@ DST_WRAP_DEFINE(function, DstFunction *, DST_FUNCTION, pointer)
DST_WRAP_DEFINE(cfunction, DstCFunction, DST_CFUNCTION, pointer)
DST_WRAP_DEFINE(table, DstTable *, DST_TABLE, pointer)
DST_WRAP_DEFINE(userdata, void *, DST_USERDATA, pointer)
DST_WRAP_DEFINE(pointer, void *, DST_USERDATA, pointer)
#undef DST_WRAP_DEFINE

View File

@ -104,7 +104,7 @@
* ands crashing (the parser). Instead, error out. */
#define DST_RECURSION_GUARD 1000
/* #define DST_NANBOX */
#define DST_NANBOX
#ifdef DST_NANBOX
typedef union DstValue DstValue;
@ -129,7 +129,7 @@ typedef struct DstUserType DstUserType;
typedef int (*DstCFunction)(DstValue *argv, int32_t argn);
/* Names of all of the types */
extern const char *dst_type_names[15];
extern const char *dst_type_names[16];
/* Basic types for all Dst Values */
typedef enum DstType {
@ -280,6 +280,7 @@ DstValue dst_nanbox_from_bits(uint64_t bits);
#define dst_wrap_string(s) dst_nanbox_wrap_c((s), DST_STRING)
#define dst_wrap_symbol(s) dst_nanbox_wrap_c((s), DST_SYMBOL)
#define dst_wrap_userdata(s) dst_nanbox_wrap_((s), DST_USERDATA)
#define dst_wrap_pointer(s) dst_nanbox_wrap_((s), DST_USERDATA)
#define dst_wrap_function(s) dst_nanbox_wrap_((s), DST_FUNCTION)
#define dst_wrap_cfunction(s) dst_nanbox_wrap_((s), DST_CFUNCTION)
@ -353,6 +354,7 @@ DstValue dst_wrap_function(DstFunction *x);
DstValue dst_wrap_cfunction(DstCFunction x);
DstValue dst_wrap_table(DstTable *x);
DstValue dst_wrap_userdata(void *x);
DstValue dst_wrap_pointer(void *x);
/* End of tagged union implementation */
#endif

View File

@ -31,6 +31,7 @@ int main() {
}
assert(pres.status == DST_PARSE_OK);
dst_puts(dst_formatc("\nparse result: %v\n\n", pres.result.value));
dst_puts(dst_formatc("\nparse map result: %v\n\n", pres.map));
/*
opts.flags = 0;

View File

@ -33,12 +33,20 @@ union dst_t {
* 47 bit payload representaion is that the type bits are no long contiguous. Type
* checking can still be fast, but typewise polymorphism takes a bit longer. However,
* hopefully we can avoid some annoying problems that occur when trying to use 47 bit pointers
* in a 48 bit address space (Linux on ARM) */
* in a 48 bit address space (Linux on ARM), or when generating Signaling NaNs. */
/* |.......Tag.......|.......................Payload..................| */
/* Non-double: t|11111111111|1ttt|xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */
/* Types of NIL, TRUE, and FALSE must have payload set to all 1s. */
/* Other possible representations: */
/* Common Style */
/* Non-double: 1|11111111111|tttt|xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */
/* LuaJIT style */
/* Non-double: 1|11111111111|1ttt|txxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */
/* Double (no NaNs): x xxxxxxxxxxx xxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */
/* A simple scheme for nan boxed values */