mirror of
https://github.com/janet-lang/janet
synced 2026-04-20 05:41:27 +00:00
Major refactor and restructure. Add CMake for anticipated windows
support.
This commit is contained in:
187
src/parser/ast.c
Normal file
187
src/parser/ast.c
Normal file
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Calvin Rose
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <dst/dst.h>
|
||||
#include <dst/dstparse.h>
|
||||
|
||||
/* Mark an ast node */
|
||||
static int dst_ast_gcmark(void *p, size_t size) {
|
||||
DstAst *ast = (DstAst *)p;
|
||||
(void) size;
|
||||
dst_mark(ast->value);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* AST type */
|
||||
static DstAbstractType dst_ast_type = {
|
||||
"ast",
|
||||
NULL,
|
||||
dst_ast_gcmark
|
||||
};
|
||||
|
||||
/* Create an ast type */
|
||||
Dst dst_ast_wrap(Dst x, int32_t start, int32_t end) {
|
||||
DstAst *ast = dst_abstract(&dst_ast_type, sizeof(DstAst));
|
||||
ast->value = x;
|
||||
ast->source_start = start;
|
||||
ast->source_end = end;
|
||||
ast->flags = 1 << dst_type(x);
|
||||
return dst_wrap_abstract(ast);
|
||||
}
|
||||
|
||||
/* Get the node associated with a value */
|
||||
DstAst *dst_ast_node(Dst x) {
|
||||
if (dst_checktype(x, DST_ABSTRACT) &&
|
||||
dst_abstract_type(dst_unwrap_abstract(x)) == &dst_ast_type) {
|
||||
DstAst *ast = (DstAst *)dst_unwrap_abstract(x);
|
||||
return ast;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Unwrap an ast value one level deep */
|
||||
Dst dst_ast_unwrap1(Dst x) {
|
||||
if (dst_checktype(x, DST_ABSTRACT) &&
|
||||
dst_abstract_type(dst_unwrap_abstract(x)) == &dst_ast_type) {
|
||||
DstAst *ast = (DstAst *)dst_unwrap_abstract(x);
|
||||
return ast->value;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
Dst dst_ast_unwrap(Dst x);
|
||||
|
||||
static Dst astunwrap_array(DstArray *other) {
|
||||
DstArray *array;
|
||||
Dst diffval;
|
||||
int32_t i, prescan;
|
||||
for (prescan = 0; prescan < other->count; prescan++) {
|
||||
diffval = dst_ast_unwrap(other->data[prescan]);
|
||||
if (!dst_equals(diffval, other->data[prescan])) break;
|
||||
}
|
||||
if (prescan == other->count) return dst_wrap_array(other);
|
||||
array = dst_array(other->count);
|
||||
for (i = 0; i < prescan; i++) {
|
||||
array->data[i] = other->data[i];
|
||||
}
|
||||
array->data[prescan] = diffval;
|
||||
for (i = prescan + 1; i < other->count; i++) {
|
||||
array->data[i] = dst_ast_unwrap(other->data[i]);
|
||||
}
|
||||
array->count = other->count;
|
||||
return dst_wrap_array(array);
|
||||
}
|
||||
|
||||
static Dst astunwrap_tuple(const Dst *other) {
|
||||
Dst *tuple;
|
||||
int32_t i, prescan;
|
||||
Dst diffval;
|
||||
for (prescan = 0; prescan < dst_tuple_length(other); prescan++) {
|
||||
diffval = dst_ast_unwrap(other[prescan]);
|
||||
if (!dst_equals(diffval, other[prescan])) break;
|
||||
}
|
||||
if (prescan == dst_tuple_length(other)) return dst_wrap_tuple(other);
|
||||
tuple = dst_tuple_begin(dst_tuple_length(other));
|
||||
for (i = 0; i < prescan; i++) {
|
||||
tuple[i] = other[i];
|
||||
}
|
||||
tuple[prescan] = diffval;
|
||||
for (i = prescan + 1; i < dst_tuple_length(other); i++) {
|
||||
tuple[i] = dst_ast_unwrap(other[i]);
|
||||
}
|
||||
return dst_wrap_tuple(dst_tuple_end(tuple));
|
||||
}
|
||||
|
||||
static Dst astunwrap_struct(const DstKV *other) {
|
||||
DstKV *st;
|
||||
const DstKV *prescan, *iter;
|
||||
Dst diffval, diffkey;
|
||||
prescan = NULL;
|
||||
while ((prescan = dst_struct_next(other, prescan))) {
|
||||
diffkey = dst_ast_unwrap(prescan->key);
|
||||
diffval = dst_ast_unwrap(prescan->value);
|
||||
if (!dst_equals(diffkey, prescan->key) ||
|
||||
!dst_equals(diffval, prescan->value))
|
||||
break;
|
||||
}
|
||||
if (!prescan) return dst_wrap_struct(other);
|
||||
st = dst_struct_begin(dst_struct_length(other));
|
||||
iter = NULL;
|
||||
while ((iter = dst_struct_next(other, iter))) {
|
||||
if (iter == prescan) break;
|
||||
dst_struct_put(st, iter->key, iter->value);
|
||||
}
|
||||
dst_struct_put(st, diffkey, diffval);
|
||||
while ((iter = dst_struct_next(other, iter))) {
|
||||
dst_struct_put(st,
|
||||
dst_ast_unwrap(iter->key),
|
||||
dst_ast_unwrap(iter->value));
|
||||
}
|
||||
return dst_wrap_struct(dst_struct_end(st));
|
||||
}
|
||||
|
||||
static Dst astunwrap_table(DstTable *other) {
|
||||
DstTable *table;
|
||||
const DstKV *prescan, *iter;
|
||||
Dst diffval, diffkey;
|
||||
prescan = NULL;
|
||||
while ((prescan = dst_table_next(other, prescan))) {
|
||||
diffkey = dst_ast_unwrap(prescan->key);
|
||||
diffval = dst_ast_unwrap(prescan->value);
|
||||
if (!dst_equals(diffkey, prescan->key) ||
|
||||
!dst_equals(diffval, prescan->value))
|
||||
break;
|
||||
}
|
||||
if (!prescan) return dst_wrap_table(other);
|
||||
table = dst_table(other->capacity);
|
||||
iter = NULL;
|
||||
while ((iter = dst_table_next(other, iter))) {
|
||||
if (iter == prescan) break;
|
||||
dst_table_put(table, iter->key, iter->value);
|
||||
}
|
||||
dst_table_put(table, diffkey, diffval);
|
||||
while ((iter = dst_table_next(other, iter))) {
|
||||
dst_table_put(table,
|
||||
dst_ast_unwrap(iter->key),
|
||||
dst_ast_unwrap(iter->value));
|
||||
}
|
||||
return dst_wrap_table(table);
|
||||
}
|
||||
|
||||
/* Unwrap an ast value recursively. Preserve as much structure as possible
|
||||
* to avoid unecessary allocation. */
|
||||
Dst dst_ast_unwrap(Dst x) {
|
||||
x = dst_ast_unwrap1(x);
|
||||
switch (dst_type(x)) {
|
||||
default:
|
||||
return x;
|
||||
case DST_ARRAY:
|
||||
return astunwrap_array(dst_unwrap_array(x));
|
||||
case DST_TUPLE:
|
||||
return astunwrap_tuple(dst_unwrap_tuple(x));
|
||||
case DST_STRUCT:
|
||||
return astunwrap_struct(dst_unwrap_struct(x));
|
||||
case DST_TABLE:
|
||||
return astunwrap_table(dst_unwrap_table(x));
|
||||
}
|
||||
}
|
||||
|
||||
551
src/parser/parse.c
Normal file
551
src/parser/parse.c
Normal file
@@ -0,0 +1,551 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Calvin Rose
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <dst/dst.h>
|
||||
#include <dst/dstparse.h>
|
||||
#include <headerlibs/vector.h>
|
||||
|
||||
/* Quote a value */
|
||||
static Dst quote(Dst x) {
|
||||
Dst *t = dst_tuple_begin(2);
|
||||
t[0] = dst_csymbolv("quote");
|
||||
t[1] = x;
|
||||
return dst_wrap_tuple(dst_tuple_end(t));
|
||||
}
|
||||
|
||||
/* Check if a character is whitespace */
|
||||
static int is_whitespace(uint8_t c) {
|
||||
return c == ' '
|
||||
|| c == '\t'
|
||||
|| c == '\n'
|
||||
|| c == '\r'
|
||||
|| c == '\0'
|
||||
|| c == ';'
|
||||
|| c == ',';
|
||||
}
|
||||
|
||||
/* Code gen
|
||||
|
||||
printf("static uint32_t symchars[8] = {\n\t");
|
||||
for (int i = 0; i < 256; i += 32) {
|
||||
uint32_t block = 0;
|
||||
for (int j = 0; j < 32; j++) {
|
||||
block |= is_symbol_char_gen(i + j) << j;
|
||||
}
|
||||
printf("0x%08x%s", block, (i == (256 - 32)) ? "" : ", ");
|
||||
}
|
||||
printf("\n};\n");
|
||||
|
||||
static int is_symbol_char_gen(uint8_t c) {
|
||||
if (c >= 'a' && c <= 'z') return 1;
|
||||
if (c >= 'A' && c <= 'Z') return 1;
|
||||
if (c >= '0' && c <= '9') return 1;
|
||||
return (c == '!' ||
|
||||
c == '$' ||
|
||||
c == '%' ||
|
||||
c == '&' ||
|
||||
c == '*' ||
|
||||
c == '+' ||
|
||||
c == '-' ||
|
||||
c == '.' ||
|
||||
c == '/' ||
|
||||
c == ':' ||
|
||||
c == '<' ||
|
||||
c == '?' ||
|
||||
c == '=' ||
|
||||
c == '>' ||
|
||||
c == '@' ||
|
||||
c == '\\' ||
|
||||
c == '^' ||
|
||||
c == '_' ||
|
||||
c == '~' ||
|
||||
c == '|');
|
||||
}
|
||||
|
||||
The table contains 256 bits, where each bit is 1
|
||||
if the corresponding ascci code is a symbol char, and 0
|
||||
if not. The upper characters are also considered symbol
|
||||
chars and are then checked for utf-8 compliance. */
|
||||
static uint32_t symchars[8] = {
|
||||
0x00000000, 0xF7ffec72, 0xd7ffffff, 0x57fffffe,
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
};
|
||||
|
||||
/* Check if a character is a valid symbol character
|
||||
* symbol chars are A-Z, a-z, 0-9, or one of !$&*+-./:<=>@\^_~| */
|
||||
static int is_symbol_char(uint8_t c) {
|
||||
return symchars[c >> 5] & (1 << (c & 0x1F));
|
||||
}
|
||||
|
||||
/* Validate some utf8. Useful for identifiers. Only validates
|
||||
* the encoding, does not check for valid codepoints (they
|
||||
* are less well defined than the encoding). */
|
||||
static int valid_utf8(const uint8_t *str, int32_t len) {
|
||||
int32_t i = 0;
|
||||
int32_t j;
|
||||
while (i < len) {
|
||||
int32_t nexti;
|
||||
uint8_t c = str[i];
|
||||
|
||||
/* Check the number of bytes in code point */
|
||||
if (c < 0x80) nexti = i + 1;
|
||||
else if ((c >> 5) == 0x06) nexti = i + 2;
|
||||
else if ((c >> 4) == 0x0E) nexti = i + 3;
|
||||
else if ((c >> 3) == 0x1E) nexti = i + 4;
|
||||
/* Don't allow 5 or 6 byte code points */
|
||||
else return 0;
|
||||
|
||||
/* No overflow */
|
||||
if (nexti > len)
|
||||
return 0;
|
||||
|
||||
/* Ensure trailing bytes are well formed (10XX XXXX) */
|
||||
for (j = i + 1; j < nexti; j++) {
|
||||
if ((str[j] >> 6) != 2)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Check for overlong encodings */
|
||||
if ((nexti == i + 2) && str[i] < 0xC2) return 0;
|
||||
if ((str[i] == 0xE0) && str[i + 1] < 0xA0) return 0;
|
||||
if ((str[i] == 0xF0) && str[i + 1] < 0x90) return 0;
|
||||
|
||||
i = nexti;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Get hex digit from a letter */
|
||||
static int to_hex(uint8_t c) {
|
||||
if (c >= '0' && c <= '9') {
|
||||
return c - '0';
|
||||
} else if (c >= 'A' && c <= 'F') {
|
||||
return 10 + c - 'A';
|
||||
} else if (c >= 'a' && c <= 'f') {
|
||||
return 10 + c - 'a';
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
typedef int (*Consumer)(DstParser *p, DstParseState *state, uint8_t c);
|
||||
struct DstParseState {
|
||||
int32_t qcount;
|
||||
int32_t argn;
|
||||
int flags;
|
||||
size_t start;
|
||||
Consumer consumer;
|
||||
};
|
||||
|
||||
#define PFLAG_CONTAINER 1
|
||||
|
||||
static void pushstate(DstParser *p, Consumer consumer, int flags) {
|
||||
DstParseState s;
|
||||
s.qcount = 0;
|
||||
s.argn = 0;
|
||||
s.flags = flags;
|
||||
s.consumer = consumer;
|
||||
s.start = p->index;
|
||||
dst_v_push(p->states, s);
|
||||
}
|
||||
|
||||
static void popstate(DstParser *p, Dst val) {
|
||||
DstParseState top = dst_v_last(p->states);
|
||||
DstParseState *newtop;
|
||||
dst_v_pop(p->states);
|
||||
newtop = &dst_v_last(p->states);
|
||||
if (newtop->flags & PFLAG_CONTAINER) {
|
||||
int32_t i, len;
|
||||
len = newtop->qcount;
|
||||
/* Quote the returned value qcount times */
|
||||
for (i = 0; i < len; i++) {
|
||||
if (p->flags & DST_PARSEFLAG_SOURCEMAP)
|
||||
val = dst_ast_wrap(val, (int32_t) top.start, (int32_t) p->index);
|
||||
val = quote(val);
|
||||
}
|
||||
newtop->qcount = 0;
|
||||
|
||||
/* Ast wrap */
|
||||
if (p->flags & DST_PARSEFLAG_SOURCEMAP)
|
||||
val = dst_ast_wrap(val, (int32_t) top.start, (int32_t) p->index);
|
||||
|
||||
newtop->argn++;
|
||||
dst_v_push(p->argstack, val);
|
||||
}
|
||||
}
|
||||
|
||||
static uint8_t checkescape(uint8_t c) {
|
||||
switch (c) {
|
||||
default: return 0;
|
||||
case 'h': return 1;
|
||||
case 'n': return '\n';
|
||||
case 't': return '\t';
|
||||
case 'r': return '\r';
|
||||
case '0': return '\0';
|
||||
case 'z': return '\0';
|
||||
case 'f': return '\f';
|
||||
case 'e': return 27;
|
||||
case '"': return '"';
|
||||
case '\'': return '\'';
|
||||
case '\\': return '\\';
|
||||
}
|
||||
}
|
||||
|
||||
/* Forward declare */
|
||||
static int stringchar(DstParser *p, DstParseState *state, uint8_t c);
|
||||
|
||||
static int escapeh(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
int digit = to_hex(c);
|
||||
if (digit < 0) {
|
||||
p->error = "invalid hex digit in hex escape";
|
||||
return 1;
|
||||
}
|
||||
state->argn = (state->argn << 4) + digit;;
|
||||
state->qcount--;
|
||||
if (!state->qcount) {
|
||||
dst_v_push(p->buf, (state->argn & 0xFF));
|
||||
state->argn = 0;
|
||||
state->consumer = stringchar;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int escape1(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
uint8_t e = checkescape(c);
|
||||
if (!e) {
|
||||
p->error = "invalid string escape sequence";
|
||||
return 1;
|
||||
}
|
||||
if (c == 'h') {
|
||||
state->qcount = 2;
|
||||
state->argn = 0;
|
||||
state->consumer = escapeh;
|
||||
} else {
|
||||
dst_v_push(p->buf, e);
|
||||
state->consumer = stringchar;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int stringchar(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
/* Enter escape */
|
||||
if (c == '\\') {
|
||||
state->consumer = escape1;
|
||||
return 1;
|
||||
}
|
||||
/* String end */
|
||||
if (c == '"') {
|
||||
/* String end */
|
||||
Dst ret = dst_wrap_string(dst_string(p->buf, dst_v_count(p->buf)));
|
||||
dst_v_empty(p->buf);
|
||||
popstate(p, ret);
|
||||
return 1;
|
||||
}
|
||||
/* normal char */
|
||||
dst_v_push(p->buf, c);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Check for string equality in the buffer */
|
||||
static int check_str_const(const char *cstr, const uint8_t *str, int32_t len) {
|
||||
int32_t index;
|
||||
for (index = 0; index < len; index++) {
|
||||
uint8_t c = str[index];
|
||||
uint8_t k = ((const uint8_t *)cstr)[index];
|
||||
if (c < k) return -1;
|
||||
if (c > k) return 1;
|
||||
if (k == '\0') break;
|
||||
}
|
||||
return (cstr[index] == '\0') ? 0 : -1;
|
||||
}
|
||||
|
||||
static int tokenchar(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
Dst numcheck, ret;
|
||||
int32_t blen;
|
||||
if (is_symbol_char(c)) {
|
||||
dst_v_push(p->buf, (uint8_t) c);
|
||||
if (c > 127) state->argn = 1; /* Use to indicate non ascii */
|
||||
return 1;
|
||||
}
|
||||
/* Token finished */
|
||||
blen = dst_v_count(p->buf);
|
||||
numcheck = dst_scan_number(p->buf, blen);
|
||||
if (!dst_checktype(numcheck, DST_NIL)) {
|
||||
ret = numcheck;
|
||||
} else if (!check_str_const("nil", p->buf, blen)) {
|
||||
ret = dst_wrap_nil();
|
||||
} else if (!check_str_const("false", p->buf, blen)) {
|
||||
ret = dst_wrap_false();
|
||||
} else if (!check_str_const("true", p->buf, blen)) {
|
||||
ret = dst_wrap_true();
|
||||
} else {
|
||||
if (p->buf[0] >= '0' && p->buf[0] <= '9') {
|
||||
p->error = "symbol literal cannot start with a digit";
|
||||
return 0;
|
||||
} else {
|
||||
/* Don't do full utf8 check unless we have seen non ascii characters. */
|
||||
int valid = (!state->argn) || valid_utf8(p->buf, blen);
|
||||
if (!valid) {
|
||||
p->error = "invalid utf-8 in symbol";
|
||||
return 0;
|
||||
}
|
||||
if (p->buf[0] == ':') {
|
||||
ret = dst_stringv(p->buf + 1, blen - 1);
|
||||
} else {
|
||||
ret = dst_symbolv(p->buf, blen);
|
||||
}
|
||||
}
|
||||
}
|
||||
dst_v_empty(p->buf);
|
||||
popstate(p, ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int comment(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
(void) state;
|
||||
if (c == '\n') dst_v_pop(p->states);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Forward declaration */
|
||||
static int root(DstParser *p, DstParseState *state, uint8_t c);
|
||||
|
||||
static int dotuple(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
if (c == ')') {
|
||||
int32_t i;
|
||||
Dst *ret = dst_tuple_begin(state->argn);
|
||||
for (i = state->argn - 1; i >= 0; i--) {
|
||||
ret[i] = dst_v_last(p->argstack); dst_v_pop(p->argstack);
|
||||
}
|
||||
popstate(p, dst_wrap_tuple(dst_tuple_end(ret)));
|
||||
return 1;
|
||||
}
|
||||
return root(p, state, c);
|
||||
}
|
||||
|
||||
static int doarray(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
if (c == ']') {
|
||||
int32_t i;
|
||||
DstArray *array = dst_array(state->argn);
|
||||
for (i = state->argn - 1; i >= 0; i--) {
|
||||
array->data[i] = dst_v_last(p->argstack); dst_v_pop(p->argstack);
|
||||
}
|
||||
array->count = state->argn;
|
||||
popstate(p, dst_wrap_array(array));
|
||||
return 1;
|
||||
}
|
||||
return root(p, state, c);
|
||||
}
|
||||
|
||||
static int dostruct(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
if (c == '}') {
|
||||
int32_t i;
|
||||
DstKV *st;
|
||||
if (state->argn & 1) {
|
||||
p->error = "struct literal expects even number of arguments";
|
||||
return 1;
|
||||
}
|
||||
st = dst_struct_begin(state->argn >> 1);
|
||||
for (i = state->argn; i > 0; i -= 2) {
|
||||
Dst value = dst_v_last(p->argstack); dst_v_pop(p->argstack);
|
||||
Dst key = dst_v_last(p->argstack); dst_v_pop(p->argstack);
|
||||
dst_struct_put(st, key, value);
|
||||
}
|
||||
popstate(p, dst_wrap_struct(dst_struct_end(st)));
|
||||
return 1;
|
||||
}
|
||||
return root(p, state, c);
|
||||
}
|
||||
|
||||
static int dotable(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
if (c == '}') {
|
||||
int32_t i;
|
||||
DstTable *table;
|
||||
if (state->argn & 1) {
|
||||
p->error = "table literal expects even number of arguments";
|
||||
return 1;
|
||||
}
|
||||
table = dst_table(state->argn >> 1);
|
||||
for (i = state->argn; i > 0; i -= 2) {
|
||||
Dst value = dst_v_last(p->argstack); dst_v_pop(p->argstack);
|
||||
Dst key = dst_v_last(p->argstack); dst_v_pop(p->argstack);
|
||||
dst_table_put(table, key, value);
|
||||
}
|
||||
popstate(p, dst_wrap_table(table));
|
||||
return 1;
|
||||
}
|
||||
return root(p, state, c);
|
||||
}
|
||||
|
||||
static int ampersand(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
(void) state;
|
||||
dst_v_pop(p->states);
|
||||
if (c == '{') {
|
||||
pushstate(p, dotable, PFLAG_CONTAINER);
|
||||
return 1;
|
||||
}
|
||||
pushstate(p, tokenchar, 0);
|
||||
dst_v_push(p->buf, '@'); /* Push the leading ampersand that was dropped */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int root(DstParser *p, DstParseState *state, uint8_t c) {
|
||||
switch (c) {
|
||||
default:
|
||||
if (is_whitespace(c)) return 1;
|
||||
pushstate(p, tokenchar, 0);
|
||||
return 0;
|
||||
case '\'':
|
||||
state->qcount++;
|
||||
return 1;
|
||||
case '"':
|
||||
pushstate(p, stringchar, 0);
|
||||
return 1;
|
||||
case '#':
|
||||
pushstate(p, comment, 0);
|
||||
return 1;
|
||||
case '@':
|
||||
pushstate(p, ampersand, 0);
|
||||
return 1;
|
||||
case ')':
|
||||
case ']':
|
||||
case '}':
|
||||
p->error = "mismatched delimiter";
|
||||
return 1;
|
||||
case '(':
|
||||
pushstate(p, dotuple, PFLAG_CONTAINER);
|
||||
return 1;
|
||||
case '[':
|
||||
pushstate(p, doarray, PFLAG_CONTAINER);
|
||||
return 1;
|
||||
case '{':
|
||||
pushstate(p, dostruct, PFLAG_CONTAINER);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int dst_parser_consume(DstParser *parser, uint8_t c) {
|
||||
int consumed = 0;
|
||||
if (parser->error) return 0;
|
||||
while (!consumed && !parser->error) {
|
||||
DstParseState *state = &dst_v_last(parser->states);
|
||||
consumed = state->consumer(parser, state, c);
|
||||
}
|
||||
parser->lookback = c;
|
||||
parser->index++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
DstParserStatus dst_parser_status(DstParser *parser) {
|
||||
if (parser->error) return DST_PARSE_ERROR;
|
||||
if (dst_v_count(parser->states) > 1) return DST_PARSE_PENDING;
|
||||
if (dst_v_count(parser->argstack)) return DST_PARSE_FULL;
|
||||
return DST_PARSE_ROOT;
|
||||
}
|
||||
|
||||
const char *dst_parser_error(DstParser *parser) {
|
||||
DstParserStatus status = dst_parser_status(parser);
|
||||
if (status == DST_PARSE_ERROR) {
|
||||
const char *e = parser->error;
|
||||
dst_v_empty(parser->argstack);
|
||||
dst_v__cnt(parser->states) = 1;
|
||||
parser->error = NULL;
|
||||
dst_v_empty(parser->buf);
|
||||
return e;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Dst dst_parser_produce(DstParser *parser) {
|
||||
Dst ret;
|
||||
DstParserStatus status = dst_parser_status(parser);
|
||||
if (status != DST_PARSE_FULL) return dst_wrap_nil();
|
||||
ret = dst_v_last(parser->argstack);
|
||||
dst_v_pop(parser->argstack);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void dst_parser_init(DstParser *parser, int flags) {
|
||||
parser->argstack = NULL;
|
||||
parser->states = NULL;
|
||||
parser->buf = NULL;
|
||||
parser->error = NULL;
|
||||
parser->index = 0;
|
||||
parser->lookback = -1;
|
||||
parser->flags = flags;
|
||||
pushstate(parser, root, PFLAG_CONTAINER);
|
||||
}
|
||||
|
||||
void dst_parser_deinit(DstParser *parser) {
|
||||
dst_v_free(parser->argstack);
|
||||
dst_v_free(parser->buf);
|
||||
dst_v_free(parser->states);
|
||||
}
|
||||
|
||||
/* C functions */
|
||||
|
||||
static int parsermark(void *p, size_t size) {
|
||||
int32_t i;
|
||||
DstParser *parser = (DstParser *)p;
|
||||
(void) size;
|
||||
for (i = 0; i < dst_v_count(parser->argstack); i++) {
|
||||
dst_mark(parser->argstack[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parsergc(void *p, size_t size) {
|
||||
DstParser *parser = (DstParser *)p;
|
||||
(void) size;
|
||||
dst_parser_deinit(parser);
|
||||
return 0;
|
||||
}
|
||||
|
||||
DstAbstractType dst_parse_parsertype = {
|
||||
"stl.parser",
|
||||
parsergc,
|
||||
parsermark
|
||||
};
|
||||
|
||||
/* C Function parser */
|
||||
static int cfun_parser(DstArgs args) {
|
||||
int flags;
|
||||
if (args.n > 1) return dst_throw(args, "expected 1 argument");
|
||||
if (args.n) {
|
||||
if (!dst_checktype(args.v[0], DST_INTEGER)) return dst_throw(args, "expected integer");
|
||||
flags = dst_unwrap_integer(args.v[0]);
|
||||
} else {
|
||||
flags = 0;
|
||||
}
|
||||
DstParser *p = dst_abstract(&dst_parse_parsertype, sizeof(DstParser));
|
||||
dst_parser_init(p, 0);
|
||||
return dst_return(args, dst_wrap_abstract(p));
|
||||
}
|
||||
|
||||
/* Load the library */
|
||||
int dst_lib_parse(DstArgs args) {
|
||||
DstTable *env = dst_env_arg(args);
|
||||
|
||||
dst_env_def(env, "parser", dst_wrap_cfunction(cfun_parser));
|
||||
|
||||
return 0;
|
||||
}
|
||||
319
src/parser/strtod.c
Normal file
319
src/parser/strtod.c
Normal file
@@ -0,0 +1,319 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Calvin Rose
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* Use a custom double parser instead of libc's strtod for better portability
|
||||
* and control. Also, uses a less strict rounding method than ieee to not incur
|
||||
* the cost of 4000 loc and dependence on arbitary precision arithmetic. There
|
||||
* is no plan to use arbitrary precision arithmetic for parsing numbers, and a
|
||||
* formal rounding mode has yet to be chosen (round towards 0 seems
|
||||
* reasonable).
|
||||
*
|
||||
* This version has been modified for much greater flexibility in parsing, such
|
||||
* as choosing the radix, supporting integer output, and returning Dsts
|
||||
* directly.
|
||||
*
|
||||
* Numbers are of the form [-+]R[rR]I.F[eE&][-+]X where R is the radix, I is
|
||||
* the integer part, F is the fractional part, and X is the exponent. All
|
||||
* signs, radix, decimal point, fractional part, and exponent can be ommited.
|
||||
* The number will be considered and integer if the there is no decimal point
|
||||
* and no exponent. Any number greater the 2^32-1 or less than -(2^32) will be
|
||||
* coerced to a double. If there is an error, the function dst_scan_number will
|
||||
* return a dst nil. The radix is assumed to be 10 if omitted, and the E
|
||||
* separator for the exponent can only be used when the radix is 10. This is
|
||||
* because E is a vaid digit in bases 15 or greater. For bases greater than 10,
|
||||
* the letters are used as digitis. A through Z correspond to the digits 10
|
||||
* through 35, and the lowercase letters have the same values. The radix number
|
||||
* is always in base 10. For example, a hexidecimal number could be written
|
||||
* '16rdeadbeef'. dst_scan_number also supports some c style syntax for
|
||||
* hexidecimal literals. The previous number could also be written
|
||||
* '0xdeadbeef'. Note that in this case, the number will actually be a double
|
||||
* as it will not fit in the range for a signed 32 bit integer. The string
|
||||
* '0xbeef' would parse to an integer as it is in the range of an int32_t. */
|
||||
|
||||
/* TODO take down missle defence */
|
||||
|
||||
#include <dst/dst.h>
|
||||
#include <math.h>
|
||||
|
||||
/* Lookup table for getting values of characters when parsing numbers. Handles
|
||||
* digits 0-9 and a-z (and A-Z). A-Z have values of 10 to 35. */
|
||||
static uint8_t digit_lookup[128] = {
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
|
||||
0,1,2,3,4,5,6,7,8,9,0xff,0xff,0xff,0xff,0xff,0xff,
|
||||
0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
|
||||
25,26,27,28,29,30,31,32,33,34,35,0xff,0xff,0xff,0xff,0xff,
|
||||
0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
|
||||
25,26,27,28,29,30,31,32,33,34,35,0xff,0xff,0xff,0xff,0xff
|
||||
};
|
||||
|
||||
/* Read in a mantissa and exponent of a certain base, and give
|
||||
* back the double value. Should properly handle 0s, Inifinties, and
|
||||
* denormalized numbers. (When the exponent values are too large) */
|
||||
static double convert(
|
||||
int negative,
|
||||
uint64_t mantissa,
|
||||
int32_t base,
|
||||
int32_t exponent) {
|
||||
|
||||
int32_t exponent2 = 0;
|
||||
|
||||
/* Short circuit zero and huge numbers */
|
||||
if (mantissa == 0)
|
||||
return 0.0;
|
||||
if (exponent > 1022)
|
||||
return negative ? -1.0/0.0 : 1.0/0.0;
|
||||
|
||||
/* TODO add fast paths */
|
||||
|
||||
/* Convert exponent on the base into exponent2, the power of
|
||||
* 2 the will be used. Modify the mantissa as we convert. */
|
||||
if (exponent > 0) {
|
||||
/* Make the mantissa large enough so no precision is lost */
|
||||
while (mantissa <= 0x03ffffffffffffffULL && exponent > 0) {
|
||||
mantissa *= base;
|
||||
exponent--;
|
||||
}
|
||||
while (exponent > 0) {
|
||||
/* Allow 6 bits of room when multiplying. This is because
|
||||
* the largest base is 36, which is 6 bits. The space of 6 should
|
||||
* prevent overflow.*/
|
||||
mantissa >>= 1;
|
||||
exponent2++;
|
||||
if (mantissa <= 0x03ffffffffffffffULL) {
|
||||
mantissa *= base;
|
||||
exponent--;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while (exponent < 0) {
|
||||
mantissa <<= 1;
|
||||
exponent2--;
|
||||
/* Ensure that the last bit is set for minimum error
|
||||
* before dividing by the base */
|
||||
if (mantissa > 0x7fffffffffffffffULL) {
|
||||
mantissa /= base;
|
||||
exponent++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return negative
|
||||
? -ldexp(mantissa, exponent2)
|
||||
: ldexp(mantissa, exponent2);
|
||||
}
|
||||
|
||||
/* Result of scanning a number source string. Will be further processed
|
||||
* depending on the desired resultant type. */
|
||||
struct DstScanRes {
|
||||
uint64_t mant;
|
||||
int32_t ex;
|
||||
int error;
|
||||
int base;
|
||||
int seenpoint;
|
||||
int foundexp;
|
||||
int neg;
|
||||
};
|
||||
|
||||
/* Get the mantissa and exponent of decimal number. The
|
||||
* mantissa will be stored in a 64 bit unsigned integer (always positive).
|
||||
* The exponent will be in a signed 32 bit integer. Will also check if
|
||||
* the decimal point has been seen. Returns -1 if there is an invalid
|
||||
* number. */
|
||||
static struct DstScanRes dst_scan_impl(
|
||||
const uint8_t *str,
|
||||
int32_t len) {
|
||||
|
||||
struct DstScanRes res;
|
||||
const uint8_t *end = str + len;
|
||||
|
||||
/* Initialize flags */
|
||||
int seenadigit = 0;
|
||||
|
||||
/* Initialize result */
|
||||
res.mant = 0;
|
||||
res.ex = 0;
|
||||
res.error = 0;
|
||||
res.base = 10;
|
||||
res.seenpoint = 0;
|
||||
res.foundexp = 0;
|
||||
res.neg = 0;
|
||||
|
||||
/* Prevent some kinds of overflow bugs relating to the exponent
|
||||
* overflowing. For example, if a string was passed 2GB worth of 0s after
|
||||
* the decimal point, exponent could wrap around and become positive. It's
|
||||
* easier to reject ridiculously large inputs than to check for overflows.
|
||||
* */
|
||||
if (len > INT32_MAX / 40) goto error;
|
||||
|
||||
/* Get sign */
|
||||
if (str >= end) goto error;
|
||||
if (*str == '-') {
|
||||
res.neg = 1;
|
||||
str++;
|
||||
} else if (*str == '+') {
|
||||
str++;
|
||||
}
|
||||
|
||||
/* Skip leading zeros */
|
||||
while (str < end && (*str == '0' || *str == '.')) {
|
||||
if (res.seenpoint) res.ex--;
|
||||
if (*str == '.') {
|
||||
if (res.seenpoint) goto error;
|
||||
res.seenpoint = 1;
|
||||
}
|
||||
seenadigit = 1;
|
||||
str++;
|
||||
}
|
||||
|
||||
/* Parse significant digits */
|
||||
while (str < end) {
|
||||
if (*str == '.') {
|
||||
if (res.seenpoint) goto error;
|
||||
res.seenpoint = 1;
|
||||
} else if (*str == '&') {
|
||||
res.foundexp = 1;
|
||||
break;
|
||||
} else if (res.base == 10 && (*str == 'E' || *str == 'e')) {
|
||||
res.foundexp = 1;
|
||||
break;
|
||||
} else if (*str == 'x' || *str == 'X') {
|
||||
if (res.seenpoint || res.mant > 0) goto error;
|
||||
res.base = 16;
|
||||
res.mant = 0;
|
||||
} else if (*str == 'r' || *str == 'R') {
|
||||
if (res.seenpoint) goto error;
|
||||
if (res.mant < 2 || res.mant > 36) goto error;
|
||||
res.base = res.mant;
|
||||
res.mant = 0;
|
||||
} else if (*str == '_') {
|
||||
;
|
||||
/* underscores are ignored - can be used for separator */
|
||||
} else {
|
||||
int digit = digit_lookup[*str & 0x7F];
|
||||
if (digit >= res.base) goto error;
|
||||
if (res.seenpoint) res.ex--;
|
||||
if (res.mant > 0x00ffffffffffffff)
|
||||
res.ex++;
|
||||
else
|
||||
res.mant = res.base * res.mant + digit;
|
||||
seenadigit = 1;
|
||||
}
|
||||
str++;
|
||||
}
|
||||
|
||||
if (!seenadigit)
|
||||
goto error;
|
||||
|
||||
/* Read exponent */
|
||||
if (str < end && res.foundexp) {
|
||||
int eneg = 0;
|
||||
int ee = 0;
|
||||
seenadigit = 0;
|
||||
str++;
|
||||
if (str >= end) goto error;
|
||||
if (*str == '-') {
|
||||
eneg = 1;
|
||||
str++;
|
||||
} else if (*str == '+') {
|
||||
str++;
|
||||
}
|
||||
/* Skip leading 0s in exponent */
|
||||
while (str < end && *str == '0') str++;
|
||||
while (str < end && ee < (INT32_MAX / 40)) {
|
||||
int digit = digit_lookup[*str & 0x7F];
|
||||
if (digit >= res.base) goto error;
|
||||
ee = res.base * ee + digit;
|
||||
str++;
|
||||
seenadigit = 1;
|
||||
}
|
||||
if (eneg) res.ex -= ee; else res.ex += ee;
|
||||
}
|
||||
|
||||
if (!seenadigit)
|
||||
goto error;
|
||||
|
||||
return res;
|
||||
|
||||
error:
|
||||
res.error = 1;
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Scan an integer from a string. If the string cannot be converted into
|
||||
* and integer, set *err to 1 and return 0. */
|
||||
int32_t dst_scan_integer(
|
||||
const uint8_t *str,
|
||||
int32_t len,
|
||||
int *err) {
|
||||
struct DstScanRes res = dst_scan_impl(str, len);
|
||||
int64_t i64;
|
||||
if (res.error)
|
||||
goto error;
|
||||
i64 = res.neg ? -res.mant : res.mant;
|
||||
if (i64 > INT32_MAX || i64 < INT32_MIN)
|
||||
goto error;
|
||||
if (NULL != err)
|
||||
*err = 0;
|
||||
return (int32_t) i64;
|
||||
error:
|
||||
if (NULL != err)
|
||||
*err = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Scan a real (double) from a string. If the string cannot be converted into
|
||||
* and integer, set *err to 1 and return 0. */
|
||||
double dst_scan_real(
|
||||
const uint8_t *str,
|
||||
int32_t len,
|
||||
int *err) {
|
||||
struct DstScanRes res = dst_scan_impl(str, len);
|
||||
if (res.error) {
|
||||
if (NULL != err)
|
||||
*err = 1;
|
||||
return 0.0;
|
||||
} else {
|
||||
if (NULL != err)
|
||||
*err = 0;
|
||||
}
|
||||
return convert(res.neg, res.mant, res.base, res.ex);
|
||||
}
|
||||
|
||||
/* Scans a number from a string. Can return either an integer or a real if
|
||||
* the number cannot be represented as an integer. Will return nil in case of
|
||||
* an error. */
|
||||
Dst dst_scan_number(
|
||||
const uint8_t *str,
|
||||
int32_t len) {
|
||||
struct DstScanRes res = dst_scan_impl(str, len);
|
||||
if (res.error)
|
||||
return dst_wrap_nil();
|
||||
if (!res.foundexp && !res.seenpoint) {
|
||||
int64_t i64 = res.neg ? -res.mant : res.mant;
|
||||
if (i64 <= INT32_MAX && i64 >= INT32_MIN) {
|
||||
return dst_wrap_integer((int32_t) i64);
|
||||
}
|
||||
}
|
||||
return dst_wrap_real(convert(res.neg, res.mant, res.base, res.ex));
|
||||
}
|
||||
Reference in New Issue
Block a user