mirror of
https://github.com/janet-lang/janet
synced 2025-01-12 08:30:26 +00:00
Add preliminary source mapping to parser (always on)
This commit is contained in:
parent
eceb6e5a77
commit
14fe30b411
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,6 +1,6 @@
|
||||
# Target
|
||||
/client/dst
|
||||
dst
|
||||
./dst
|
||||
|
||||
# Generated files
|
||||
*.gen.h
|
||||
|
66
core/compile.c
Normal file
66
core/compile.c
Normal file
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Calvin Rose
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <dst/dst.h>
|
||||
|
||||
#define DST_LOCAL_FLAG_MUTABLE 1
|
||||
|
||||
/* Compiler typedefs */
|
||||
typedef struct DstCompiler DstCompiler;
|
||||
typedef struct FormOptions FormOptions;
|
||||
typedef struct SlotTracker SlotTracker;
|
||||
typedef struct DstScope DstScope;
|
||||
|
||||
/* A stack slot */
|
||||
struct DstSlot {
|
||||
int32_t index;
|
||||
uint32_t flags;
|
||||
uint32_t types; /* bit set of possible primitive types */
|
||||
}
|
||||
|
||||
/* A lexical scope during compilation */
|
||||
struct DstScope {
|
||||
DstArray slots;
|
||||
DstArray freeslots;
|
||||
DstArray constants;
|
||||
DstTable symbols; /* Positive numbers are stack slots, negative are negative constant indices */
|
||||
uint32_t flags;
|
||||
int32_t nextslot;
|
||||
}
|
||||
|
||||
/* Compilation state */
|
||||
struct DstCompiler {
|
||||
DstValue error;
|
||||
jmp_buf on_error;
|
||||
DstArray scopes;
|
||||
DstBuffer buffer;
|
||||
int recursion_guard;
|
||||
};
|
||||
|
||||
/* Compiler state */
|
||||
struct DstFormOptions {
|
||||
DstCompiler *compiler;
|
||||
DstValue x;
|
||||
uint32_t flags;
|
||||
uint32_t types; /* bit set of accepeted primitive types */
|
||||
int32_t target_slot;
|
||||
};
|
86
core/parse.c
86
core/parse.c
@ -62,7 +62,8 @@ static int read_integer(const uint8_t *string, const uint8_t *end, int64_t *ret)
|
||||
|
||||
/* Read a real from a string. Returns if successfuly
|
||||
* parsed a real from the enitre input string.
|
||||
* If returned 1, output is int ret.*/
|
||||
* If returned 1, output is int ret.
|
||||
* TODO - consider algorithm that does not lose precision. */
|
||||
static int read_real(const uint8_t *string, const uint8_t *end, double *ret, int forceInt) {
|
||||
int sign = 1, x = 0;
|
||||
double accum = 0, exp = 1, place = 1;
|
||||
@ -133,7 +134,7 @@ static int is_whitespace(uint8_t c) {
|
||||
}
|
||||
|
||||
/* Check if a character is a valid symbol character */
|
||||
/* TODO - wlloe utf8 - shouldn't be difficult, err on side
|
||||
/* TODO - allow utf8 - shouldn't be difficult, err on side
|
||||
* of inclusivity */
|
||||
static int is_symbol_char(uint8_t c) {
|
||||
if (c >= 'a' && c <= 'z') return 1;
|
||||
@ -161,8 +162,35 @@ static int to_hex(uint8_t c) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Make source mapping for atom (non recursive structure) */
|
||||
static DstValue atom_map(int32_t start, int32_t end) {
|
||||
DstValue *t = dst_tuple_begin(2);
|
||||
t[0] = dst_wrap_integer(start);
|
||||
t[1] = dst_wrap_integer(end);
|
||||
return dst_wrap_tuple(dst_tuple_end(t));
|
||||
}
|
||||
|
||||
/* Create mappingd for recursive data structure */
|
||||
static DstValue ds_map(int32_t start, int32_t end, DstValue submap) {
|
||||
DstValue *t = dst_tuple_begin(3);
|
||||
t[0] = dst_wrap_integer(start);
|
||||
t[1] = dst_wrap_integer(end);
|
||||
t[2] = submap;
|
||||
return dst_wrap_tuple(dst_tuple_end(t));
|
||||
}
|
||||
|
||||
/* Create a sourcemapping for a key value pair */
|
||||
static DstValue kv_map(DstValue k, DstValue v) {
|
||||
DstValue *t = dst_tuple_begin(2);
|
||||
t[0] = k;
|
||||
t[1] = v;
|
||||
return dst_wrap_tuple(dst_tuple_end(t));
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
DstArray stack;
|
||||
DstArray mapstack;
|
||||
const uint8_t *srcstart;
|
||||
const uint8_t *end;
|
||||
const char *errmsg;
|
||||
DstParseStatus status;
|
||||
@ -175,8 +203,10 @@ static const uint8_t *parse_recur(
|
||||
int32_t recur) {
|
||||
|
||||
const uint8_t *end = args->end;
|
||||
const uint8_t *mapstart;
|
||||
int32_t qcount = 0;
|
||||
DstValue ret;
|
||||
DstValue submapping;
|
||||
|
||||
/* Prevent stack overflow */
|
||||
if (recur == 0) goto too_much_recur;
|
||||
@ -194,6 +224,10 @@ static const uint8_t *parse_recur(
|
||||
|
||||
/* Check for end of source */
|
||||
if (src >= end) goto unexpected_eos;
|
||||
|
||||
/* Open mapping */
|
||||
mapstart = src;
|
||||
submapping = dst_wrap_nil();
|
||||
|
||||
/* Detect token type based on first character */
|
||||
switch (*src) {
|
||||
@ -332,18 +366,27 @@ static const uint8_t *parse_recur(
|
||||
case ')':
|
||||
{
|
||||
DstValue *tup = dst_tuple_begin(n);
|
||||
for (i = n; i > 0; i--)
|
||||
DstValue *subtup = dst_tuple_begin(n);
|
||||
for (i = n; i > 0; i--) {
|
||||
tup[i - 1] = dst_array_pop(&args->stack);
|
||||
subtup[i - 1] = dst_array_pop(&args->mapstack);
|
||||
}
|
||||
ret = dst_wrap_tuple(dst_tuple_end(tup));
|
||||
submapping = dst_wrap_tuple(dst_tuple_end(subtup));
|
||||
break;
|
||||
}
|
||||
case ']':
|
||||
{
|
||||
DstArray *arr = dst_array(n);
|
||||
for (i = n; i > 0; i--)
|
||||
DstArray *subarr = dst_array(n);
|
||||
for (i = n; i > 0; i--) {
|
||||
arr->data[i - 1] = dst_array_pop(&args->stack);
|
||||
subarr->data[i - 1] = dst_array_pop(&args->mapstack);
|
||||
}
|
||||
arr->count = n;
|
||||
subarr->count = n;
|
||||
ret = dst_wrap_array(arr);
|
||||
submapping = dst_wrap_array(subarr);
|
||||
break;
|
||||
}
|
||||
case '}':
|
||||
@ -351,20 +394,32 @@ static const uint8_t *parse_recur(
|
||||
if (n & 1) goto struct_oddargs;
|
||||
if (istable) {
|
||||
DstTable *t = dst_table(n);
|
||||
DstTable *subt = dst_table(n);
|
||||
for (i = n; i > 0; i -= 2) {
|
||||
DstValue val = dst_array_pop(&args->stack);
|
||||
DstValue key = dst_array_pop(&args->stack);
|
||||
DstValue subval = dst_array_pop(&args->mapstack);
|
||||
DstValue subkey = dst_array_pop(&args->mapstack);
|
||||
|
||||
dst_table_put(t, key, val);
|
||||
dst_table_put(subt, key, kv_map(subkey, subval));
|
||||
}
|
||||
ret = dst_wrap_table(t);
|
||||
submapping = dst_wrap_table(subt);
|
||||
} else {
|
||||
DstValue *st = dst_struct_begin(n >> 1);
|
||||
DstValue *subst = dst_struct_begin(n >> 1);
|
||||
for (i = n; i > 0; i -= 2) {
|
||||
DstValue val = dst_array_pop(&args->stack);
|
||||
DstValue key = dst_array_pop(&args->stack);
|
||||
DstValue subval = dst_array_pop(&args->mapstack);
|
||||
DstValue subkey = dst_array_pop(&args->mapstack);
|
||||
|
||||
dst_struct_put(st, key, val);
|
||||
dst_struct_put(subst, key, kv_map(subkey, subval));
|
||||
}
|
||||
ret = dst_wrap_struct(dst_struct_end(st));
|
||||
submapping = dst_wrap_struct(dst_struct_end(subst));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -378,6 +433,20 @@ static const uint8_t *parse_recur(
|
||||
|
||||
/* Push the result to the stack */
|
||||
dst_array_push(&args->stack, ret);
|
||||
|
||||
/* Push source mapping */
|
||||
if (dst_checktype(submapping, DST_NIL)) {
|
||||
/* We just parsed an atom */
|
||||
dst_array_push(&args->mapstack, atom_map(
|
||||
mapstart - args->srcstart,
|
||||
src - args->srcstart));
|
||||
} else {
|
||||
/* We just parsed a recursive data structure */
|
||||
dst_array_push(&args->mapstack, ds_map(
|
||||
mapstart - args->srcstart,
|
||||
src - args->srcstart,
|
||||
submapping));
|
||||
}
|
||||
|
||||
/* Return the new source position for further calls */
|
||||
return src;
|
||||
@ -428,23 +497,26 @@ DstParseResult dst_parse(const uint8_t *src, int32_t len) {
|
||||
|
||||
dst_array_init(&args.stack, 10);
|
||||
args.status = DST_PARSE_OK;
|
||||
args.srcstart = src;
|
||||
args.end = src + len;
|
||||
args.errmsg = NULL;
|
||||
|
||||
dst_array_init(&args.mapstack, 10);
|
||||
|
||||
newsrc = parse_recur(&args, src, DST_RECURSION_GUARD);
|
||||
res.status = args.status;
|
||||
res.bytes_read = (int32_t) (newsrc - src);
|
||||
|
||||
/* TODO - source maps */
|
||||
res.map = dst_wrap_nil();
|
||||
|
||||
if (args.errmsg) {
|
||||
res.result.error = dst_cstring(args.errmsg);
|
||||
res.map = dst_wrap_nil();
|
||||
} else {
|
||||
res.result.value = dst_array_pop(&args.stack);
|
||||
res.map = dst_array_pop(&args.mapstack);
|
||||
}
|
||||
|
||||
dst_array_deinit(&args.stack);
|
||||
dst_array_deinit(&args.mapstack);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -200,6 +200,7 @@ static const uint8_t *string_description(const char *title, void *pointer) {
|
||||
#undef HEX
|
||||
#undef DST_BUFSIZE
|
||||
|
||||
/* TODO - add more characters to escapes */
|
||||
static int32_t dst_escape_string_length(const uint8_t *str) {
|
||||
int32_t len = 2;
|
||||
int32_t i;
|
||||
@ -359,11 +360,13 @@ static int is_print_ds(DstValue v) {
|
||||
}
|
||||
|
||||
/* VT100 Colors for types */
|
||||
static const char *dst_type_colors[15] = {
|
||||
/* TODO - generalize into configurable headers and footers */
|
||||
static const char *dst_type_colors[16] = {
|
||||
"\x1B[35m",
|
||||
"\x1B[33m",
|
||||
"\x1B[33m",
|
||||
"\x1B[35m",
|
||||
"\x1B[35m",
|
||||
"\x1B[32m",
|
||||
"\x1B[36m",
|
||||
"",
|
||||
@ -524,7 +527,7 @@ static void dst_description_helper(DstPrinter *p, DstValue x) {
|
||||
static void dst_printer_defaults(DstPrinter *p) {
|
||||
p->next = 0;
|
||||
p->flags = DST_PRINTFLAG_INDENT;
|
||||
p->depth = 4;
|
||||
p->depth = 10;
|
||||
p->indent = 0;
|
||||
p->indent_size = 2;
|
||||
p->token_line_limit = 5;
|
||||
|
@ -31,18 +31,19 @@ const char dst_base64[65] =
|
||||
|
||||
/* The DST value types in order. These types can be used as
|
||||
* mnemonics instead of a bit pattern for type checking */
|
||||
const char *dst_type_names[15] = {
|
||||
const char *dst_type_names[16] = {
|
||||
"nil",
|
||||
"real",
|
||||
"false",
|
||||
"true",
|
||||
"fiber",
|
||||
"integer",
|
||||
"boolean",
|
||||
"real",
|
||||
"string",
|
||||
"symbol",
|
||||
"array",
|
||||
"tuple",
|
||||
"table",
|
||||
"struct",
|
||||
"fiber",
|
||||
"buffer",
|
||||
"function",
|
||||
"cfunction",
|
||||
|
@ -138,6 +138,7 @@ DST_WRAP_DEFINE(function, DstFunction *, DST_FUNCTION, pointer)
|
||||
DST_WRAP_DEFINE(cfunction, DstCFunction, DST_CFUNCTION, pointer)
|
||||
DST_WRAP_DEFINE(table, DstTable *, DST_TABLE, pointer)
|
||||
DST_WRAP_DEFINE(userdata, void *, DST_USERDATA, pointer)
|
||||
DST_WRAP_DEFINE(pointer, void *, DST_USERDATA, pointer)
|
||||
|
||||
#undef DST_WRAP_DEFINE
|
||||
|
||||
|
@ -104,7 +104,7 @@
|
||||
* ands crashing (the parser). Instead, error out. */
|
||||
#define DST_RECURSION_GUARD 1000
|
||||
|
||||
/* #define DST_NANBOX */
|
||||
#define DST_NANBOX
|
||||
|
||||
#ifdef DST_NANBOX
|
||||
typedef union DstValue DstValue;
|
||||
@ -129,7 +129,7 @@ typedef struct DstUserType DstUserType;
|
||||
typedef int (*DstCFunction)(DstValue *argv, int32_t argn);
|
||||
|
||||
/* Names of all of the types */
|
||||
extern const char *dst_type_names[15];
|
||||
extern const char *dst_type_names[16];
|
||||
|
||||
/* Basic types for all Dst Values */
|
||||
typedef enum DstType {
|
||||
@ -280,6 +280,7 @@ DstValue dst_nanbox_from_bits(uint64_t bits);
|
||||
#define dst_wrap_string(s) dst_nanbox_wrap_c((s), DST_STRING)
|
||||
#define dst_wrap_symbol(s) dst_nanbox_wrap_c((s), DST_SYMBOL)
|
||||
#define dst_wrap_userdata(s) dst_nanbox_wrap_((s), DST_USERDATA)
|
||||
#define dst_wrap_pointer(s) dst_nanbox_wrap_((s), DST_USERDATA)
|
||||
#define dst_wrap_function(s) dst_nanbox_wrap_((s), DST_FUNCTION)
|
||||
#define dst_wrap_cfunction(s) dst_nanbox_wrap_((s), DST_CFUNCTION)
|
||||
|
||||
@ -353,6 +354,7 @@ DstValue dst_wrap_function(DstFunction *x);
|
||||
DstValue dst_wrap_cfunction(DstCFunction x);
|
||||
DstValue dst_wrap_table(DstTable *x);
|
||||
DstValue dst_wrap_userdata(void *x);
|
||||
DstValue dst_wrap_pointer(void *x);
|
||||
|
||||
/* End of tagged union implementation */
|
||||
#endif
|
||||
|
@ -31,6 +31,7 @@ int main() {
|
||||
}
|
||||
assert(pres.status == DST_PARSE_OK);
|
||||
dst_puts(dst_formatc("\nparse result: %v\n\n", pres.result.value));
|
||||
dst_puts(dst_formatc("\nparse map result: %v\n\n", pres.map));
|
||||
|
||||
/*
|
||||
opts.flags = 0;
|
||||
|
@ -33,12 +33,20 @@ union dst_t {
|
||||
* 47 bit payload representaion is that the type bits are no long contiguous. Type
|
||||
* checking can still be fast, but typewise polymorphism takes a bit longer. However,
|
||||
* hopefully we can avoid some annoying problems that occur when trying to use 47 bit pointers
|
||||
* in a 48 bit address space (Linux on ARM) */
|
||||
* in a 48 bit address space (Linux on ARM), or when generating Signaling NaNs. */
|
||||
|
||||
/* |.......Tag.......|.......................Payload..................| */
|
||||
/* Non-double: t|11111111111|1ttt|xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */
|
||||
/* Types of NIL, TRUE, and FALSE must have payload set to all 1s. */
|
||||
|
||||
/* Other possible representations: */
|
||||
|
||||
/* Common Style */
|
||||
/* Non-double: 1|11111111111|tttt|xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */
|
||||
|
||||
/* LuaJIT style */
|
||||
/* Non-double: 1|11111111111|1ttt|txxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */
|
||||
|
||||
/* Double (no NaNs): x xxxxxxxxxxx xxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx */
|
||||
|
||||
/* A simple scheme for nan boxed values */
|
||||
|
Loading…
Reference in New Issue
Block a user