Major refactor and restructure. Add CMake for anticipated windows

support.
2026-04-20 05:41:27 +00:00 · 2018-01-19 16:43:19 -05:00
parent acb706ca3a
commit 30f62ca454
61 changed files with 400 additions and 4638 deletions
--- a/src/parser/ast.c
+++ b/src/parser/ast.c
@@ -0,0 +1,187 @@
+/*
+* Copyright (c) 2017 Calvin Rose
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to
+* deal in the Software without restriction, including without limitation the
+* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+* sell copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*/
+
+#include <dst/dst.h>
+#include <dst/dstparse.h>
+
+/* Mark an ast node */
+static int dst_ast_gcmark(void *p, size_t size) {
+    DstAst *ast = (DstAst *)p;
+    (void) size;
+    dst_mark(ast->value);
+    return 0;
+}
+
+/* AST type */
+static DstAbstractType dst_ast_type = {
+    "ast",
+    NULL,
+    dst_ast_gcmark
+};
+
+/* Create an ast type */
+Dst dst_ast_wrap(Dst x, int32_t start, int32_t end) {
+    DstAst *ast = dst_abstract(&dst_ast_type, sizeof(DstAst));
+    ast->value = x;
+    ast->source_start = start;
+    ast->source_end = end;
+    ast->flags = 1 << dst_type(x);
+    return dst_wrap_abstract(ast);
+}
+
+/* Get the node associated with a value */
+DstAst *dst_ast_node(Dst x) {
+    if (dst_checktype(x, DST_ABSTRACT) &&
+            dst_abstract_type(dst_unwrap_abstract(x)) == &dst_ast_type) {
+        DstAst *ast = (DstAst *)dst_unwrap_abstract(x);
+        return ast;
+    }
+    return NULL;
+}
+
+/* Unwrap an ast value one level deep */
+Dst dst_ast_unwrap1(Dst x) {
+    if (dst_checktype(x, DST_ABSTRACT) &&
+            dst_abstract_type(dst_unwrap_abstract(x)) == &dst_ast_type) {
+        DstAst *ast = (DstAst *)dst_unwrap_abstract(x);
+        return ast->value;
+    }
+    return x;
+}
+
+Dst dst_ast_unwrap(Dst x);
+
+static Dst astunwrap_array(DstArray *other) {
+    DstArray *array;
+    Dst diffval;
+    int32_t i, prescan;
+    for (prescan = 0; prescan < other->count; prescan++) {
+        diffval = dst_ast_unwrap(other->data[prescan]);
+        if (!dst_equals(diffval, other->data[prescan])) break;
+    }
+    if (prescan == other->count) return dst_wrap_array(other);
+    array = dst_array(other->count);
+    for (i = 0; i < prescan; i++) {
+        array->data[i] = other->data[i];
+    }
+    array->data[prescan] = diffval;
+    for (i = prescan + 1; i < other->count; i++) {
+        array->data[i] = dst_ast_unwrap(other->data[i]);
+    }
+    array->count = other->count;
+    return dst_wrap_array(array);
+}
+
+static Dst astunwrap_tuple(const Dst *other) {
+    Dst *tuple;
+    int32_t i, prescan;
+    Dst diffval;
+    for (prescan = 0; prescan < dst_tuple_length(other); prescan++) {
+        diffval = dst_ast_unwrap(other[prescan]);
+        if (!dst_equals(diffval, other[prescan])) break;
+    }
+    if (prescan == dst_tuple_length(other)) return dst_wrap_tuple(other);
+    tuple = dst_tuple_begin(dst_tuple_length(other));
+    for (i = 0; i < prescan; i++) {
+        tuple[i] = other[i];
+    }
+    tuple[prescan] = diffval;
+    for (i = prescan + 1; i < dst_tuple_length(other); i++) {
+        tuple[i] = dst_ast_unwrap(other[i]);
+    }
+    return dst_wrap_tuple(dst_tuple_end(tuple));
+}
+
+static Dst astunwrap_struct(const DstKV *other) {
+    DstKV *st;
+    const DstKV *prescan, *iter;
+    Dst diffval, diffkey;
+    prescan = NULL;
+    while ((prescan = dst_struct_next(other, prescan))) {
+        diffkey = dst_ast_unwrap(prescan->key);
+        diffval = dst_ast_unwrap(prescan->value);
+        if (!dst_equals(diffkey, prescan->key) ||
+            !dst_equals(diffval, prescan->value))
+            break;
+    }
+    if (!prescan) return dst_wrap_struct(other);
+    st = dst_struct_begin(dst_struct_length(other));
+    iter = NULL;
+    while ((iter = dst_struct_next(other, iter))) {
+        if (iter == prescan) break;
+        dst_struct_put(st, iter->key, iter->value);
+    }
+    dst_struct_put(st, diffkey, diffval);
+    while ((iter = dst_struct_next(other, iter))) {
+        dst_struct_put(st, 
+                dst_ast_unwrap(iter->key),
+                dst_ast_unwrap(iter->value));
+    }
+    return dst_wrap_struct(dst_struct_end(st));
+}
+
+static Dst astunwrap_table(DstTable *other) {
+    DstTable *table;
+    const DstKV *prescan, *iter;
+    Dst diffval, diffkey;
+    prescan = NULL;
+    while ((prescan = dst_table_next(other, prescan))) {
+        diffkey = dst_ast_unwrap(prescan->key);
+        diffval = dst_ast_unwrap(prescan->value);
+        if (!dst_equals(diffkey, prescan->key) ||
+            !dst_equals(diffval, prescan->value))
+            break;
+    }
+    if (!prescan) return dst_wrap_table(other);
+    table = dst_table(other->capacity);
+    iter = NULL;
+    while ((iter = dst_table_next(other, iter))) {
+        if (iter == prescan) break;
+        dst_table_put(table, iter->key, iter->value);
+    }
+    dst_table_put(table, diffkey, diffval);
+    while ((iter = dst_table_next(other, iter))) {
+        dst_table_put(table, 
+                dst_ast_unwrap(iter->key),
+                dst_ast_unwrap(iter->value));
+    }
+    return dst_wrap_table(table);
+}
+
+/* Unwrap an ast value recursively. Preserve as much structure as possible
+ * to avoid unecessary allocation. */
+Dst dst_ast_unwrap(Dst x) {
+    x = dst_ast_unwrap1(x);
+    switch (dst_type(x)) {
+        default:
+            return x;
+        case DST_ARRAY:
+            return astunwrap_array(dst_unwrap_array(x));
+        case DST_TUPLE:
+            return astunwrap_tuple(dst_unwrap_tuple(x));
+        case DST_STRUCT:
+            return astunwrap_struct(dst_unwrap_struct(x));
+        case DST_TABLE:
+            return astunwrap_table(dst_unwrap_table(x));
+    }
+}
+
--- a/src/parser/parse.c
+++ b/src/parser/parse.c
@@ -0,0 +1,551 @@
+/*
+* Copyright (c) 2017 Calvin Rose
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to
+* deal in the Software without restriction, including without limitation the
+* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+* sell copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*/
+
+#include <dst/dst.h>
+#include <dst/dstparse.h>
+#include <headerlibs/vector.h>
+
+/* Quote a value */
+static Dst quote(Dst x) {
+    Dst *t = dst_tuple_begin(2);
+    t[0] = dst_csymbolv("quote");
+    t[1] = x;
+    return dst_wrap_tuple(dst_tuple_end(t));
+}
+
+/* Check if a character is whitespace */
+static int is_whitespace(uint8_t c) {
+    return c == ' ' 
+        || c == '\t'
+        || c == '\n'
+        || c == '\r'
+        || c == '\0'
+        || c == ';'
+        || c == ',';
+}
+
+/* Code gen
+
+printf("static uint32_t symchars[8] = {\n\t");
+for (int i = 0; i < 256; i += 32) {
+    uint32_t block = 0;
+    for (int j = 0; j < 32; j++) {
+        block |= is_symbol_char_gen(i + j) << j;
+    }
+    printf("0x%08x%s", block, (i == (256 - 32)) ? "" : ", ");
+}
+printf("\n};\n");
+
+static int is_symbol_char_gen(uint8_t c) {
+    if (c >= 'a' && c <= 'z') return 1;
+    if (c >= 'A' && c <= 'Z') return 1;
+    if (c >= '0' && c <= '9') return 1;
+    return (c == '!' ||
+        c == '$' ||
+        c == '%' ||
+        c == '&' ||
+        c == '*' ||
+        c == '+' ||
+        c == '-' ||
+        c == '.' ||
+        c == '/' ||
+        c == ':' ||
+        c == '<' ||
+        c == '?' ||
+        c == '=' ||
+        c == '>' ||
+        c == '@' ||
+        c == '\\' ||
+        c == '^' ||
+        c == '_' ||
+        c == '~' ||
+        c == '|');
+}
+
+The table contains 256 bits, where each bit is 1
+if the corresponding ascci code is a symbol char, and 0
+if not. The upper characters are also considered symbol
+chars and are then checked for utf-8 compliance. */
+static uint32_t symchars[8] = {
+	0x00000000, 0xF7ffec72, 0xd7ffffff, 0x57fffffe,
+	0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+};
+
+/* Check if a character is a valid symbol character
+ * symbol chars are A-Z, a-z, 0-9, or one of !$&*+-./:<=>@\^_~| */
+static int is_symbol_char(uint8_t c) {
+    return symchars[c >> 5] & (1 << (c & 0x1F));
+}
+
+/* Validate some utf8. Useful for identifiers. Only validates
+ * the encoding, does not check for valid codepoints (they
+ * are less well defined than the encoding). */
+static int valid_utf8(const uint8_t *str, int32_t len) {
+    int32_t i = 0;
+    int32_t j;
+    while (i < len) {
+        int32_t nexti;
+        uint8_t c = str[i];
+
+        /* Check the number of bytes in code point */
+        if (c < 0x80) nexti = i + 1;
+        else if ((c >> 5) == 0x06) nexti = i + 2;
+        else if ((c >> 4) == 0x0E) nexti = i + 3;
+        else if ((c >> 3) == 0x1E) nexti = i + 4;
+        /* Don't allow 5 or 6 byte code points */
+        else return 0;
+
+        /* No overflow */
+        if (nexti > len)
+            return 0;
+
+        /* Ensure trailing bytes are well formed (10XX XXXX) */
+        for (j = i + 1; j < nexti; j++) {
+            if ((str[j] >> 6) != 2)
+                return 0;
+        }
+
+        /* Check for overlong encodings */ 
+        if ((nexti == i + 2) && str[i] < 0xC2) return 0;
+        if ((str[i] == 0xE0) && str[i + 1] < 0xA0) return 0;
+        if ((str[i] == 0xF0) && str[i + 1] < 0x90) return 0;
+
+        i = nexti;
+    }
+    return 1;
+}
+
+/* Get hex digit from a letter */
+static int to_hex(uint8_t c) {
+    if (c >= '0' && c <= '9') {
+        return c - '0';
+    } else if (c >= 'A' && c <= 'F') {
+        return 10 + c - 'A';
+    } else if (c >= 'a' && c <= 'f') {
+        return 10 + c - 'a';
+    } else {
+        return -1;
+    }
+}
+
+typedef int (*Consumer)(DstParser *p, DstParseState *state, uint8_t c);
+struct DstParseState {
+    int32_t qcount;
+    int32_t argn;
+    int flags;
+    size_t start;
+    Consumer consumer;
+};
+
+#define PFLAG_CONTAINER 1
+
+static void pushstate(DstParser *p, Consumer consumer, int flags) {
+    DstParseState s;
+    s.qcount = 0;
+    s.argn = 0;
+    s.flags = flags;
+    s.consumer = consumer;
+    s.start = p->index;
+    dst_v_push(p->states, s);
+}
+
+static void popstate(DstParser *p, Dst val) {
+    DstParseState top = dst_v_last(p->states);
+    DstParseState *newtop;
+    dst_v_pop(p->states);
+    newtop = &dst_v_last(p->states);
+    if (newtop->flags & PFLAG_CONTAINER) {
+        int32_t i, len;
+        len = newtop->qcount;
+        /* Quote the returned value qcount times */
+        for (i = 0; i < len; i++) {
+            if (p->flags & DST_PARSEFLAG_SOURCEMAP)
+                val = dst_ast_wrap(val, (int32_t) top.start, (int32_t) p->index);
+            val = quote(val);
+        }
+        newtop->qcount = 0;
+
+        /* Ast wrap */
+        if (p->flags & DST_PARSEFLAG_SOURCEMAP)
+            val = dst_ast_wrap(val, (int32_t) top.start, (int32_t) p->index);
+
+        newtop->argn++;
+        dst_v_push(p->argstack, val);
+    }
+}
+
+static uint8_t checkescape(uint8_t c) {
+    switch (c) {
+        default: return 0;
+        case 'h': return 1;
+        case 'n': return '\n'; 
+        case 't': return '\t'; 
+        case 'r': return '\r'; 
+        case '0': return '\0'; 
+        case 'z': return '\0'; 
+        case 'f': return '\f';
+        case 'e': return 27;
+        case '"': return '"'; 
+        case '\'': return '\''; 
+        case '\\': return '\\'; 
+    }
+}
+
+/* Forward declare */
+static int stringchar(DstParser *p, DstParseState *state, uint8_t c);
+
+static int escapeh(DstParser *p, DstParseState *state, uint8_t c) {
+    int digit = to_hex(c);
+    if (digit < 0) {
+        p->error = "invalid hex digit in hex escape";
+        return 1;
+    }
+    state->argn = (state->argn << 4) + digit;;
+    state->qcount--;
+    if (!state->qcount) {
+        dst_v_push(p->buf, (state->argn & 0xFF));
+        state->argn = 0;
+        state->consumer = stringchar;
+    }
+    return 1;
+}
+
+static int escape1(DstParser *p, DstParseState *state, uint8_t c) {
+    uint8_t e = checkescape(c);
+    if (!e) {
+        p->error = "invalid string escape sequence";
+        return 1;
+    }
+    if (c == 'h') {
+        state->qcount = 2;
+        state->argn = 0;
+        state->consumer = escapeh;
+    } else {
+        dst_v_push(p->buf, e);
+        state->consumer = stringchar;
+    }
+    return 1;
+}
+
+static int stringchar(DstParser *p, DstParseState *state, uint8_t c) {
+    /* Enter escape */
+    if (c == '\\') {
+        state->consumer = escape1;
+        return 1;
+    }
+    /* String end */
+    if (c == '"') {
+        /* String end */
+        Dst ret = dst_wrap_string(dst_string(p->buf, dst_v_count(p->buf)));
+        dst_v_empty(p->buf);
+        popstate(p, ret);
+        return 1;
+    }
+    /* normal char */
+    dst_v_push(p->buf, c);
+    return 1;
+}
+
+/* Check for string equality in the buffer */
+static int check_str_const(const char *cstr, const uint8_t *str, int32_t len) {
+    int32_t index;
+    for (index = 0; index < len; index++) {
+        uint8_t c = str[index];
+        uint8_t k = ((const uint8_t *)cstr)[index];
+        if (c < k) return -1;
+        if (c > k) return 1;
+        if (k == '\0') break;
+    }
+    return (cstr[index] == '\0') ? 0 : -1;
+}
+
+static int tokenchar(DstParser *p, DstParseState *state, uint8_t c) {
+    Dst numcheck, ret;
+    int32_t blen;
+    if (is_symbol_char(c)) {
+        dst_v_push(p->buf, (uint8_t) c);
+        if (c > 127) state->argn = 1; /* Use to indicate non ascii */
+        return 1;
+    }
+    /* Token finished */
+    blen = dst_v_count(p->buf);
+    numcheck = dst_scan_number(p->buf, blen);
+    if (!dst_checktype(numcheck, DST_NIL)) {
+        ret = numcheck;
+    } else if (!check_str_const("nil", p->buf, blen)) {
+        ret = dst_wrap_nil();
+    } else if (!check_str_const("false", p->buf, blen)) {
+        ret = dst_wrap_false();
+    } else if (!check_str_const("true", p->buf, blen)) {
+        ret = dst_wrap_true();
+    } else {
+        if (p->buf[0] >= '0' && p->buf[0] <= '9') {
+            p->error = "symbol literal cannot start with a digit";
+            return 0;
+        } else {
+            /* Don't do full utf8 check unless we have seen non ascii characters. */
+            int valid = (!state->argn) || valid_utf8(p->buf, blen);
+            if (!valid) {
+                p->error = "invalid utf-8 in symbol";
+                return 0;
+            }
+            if (p->buf[0] == ':') {
+                ret = dst_stringv(p->buf + 1, blen - 1);
+            } else {
+                ret = dst_symbolv(p->buf, blen);
+            }
+        }
+    }
+    dst_v_empty(p->buf);
+    popstate(p, ret);
+    return 0;
+}
+
+static int comment(DstParser *p, DstParseState *state, uint8_t c) {
+    (void) state;
+    if (c == '\n') dst_v_pop(p->states);
+    return 1;
+}
+
+/* Forward declaration */
+static int root(DstParser *p, DstParseState *state, uint8_t c);
+
+static int dotuple(DstParser *p, DstParseState *state, uint8_t c) {
+    if (c == ')') {
+        int32_t i;
+        Dst *ret = dst_tuple_begin(state->argn);
+        for (i = state->argn - 1; i >= 0; i--) {
+            ret[i] = dst_v_last(p->argstack); dst_v_pop(p->argstack);
+        }
+        popstate(p, dst_wrap_tuple(dst_tuple_end(ret)));
+        return 1;
+    }
+    return root(p, state, c);
+}
+
+static int doarray(DstParser *p, DstParseState *state, uint8_t c) {
+    if (c == ']') {
+        int32_t i;
+        DstArray *array = dst_array(state->argn);
+        for (i = state->argn - 1; i >= 0; i--) {
+            array->data[i] = dst_v_last(p->argstack); dst_v_pop(p->argstack);
+        }
+        array->count = state->argn;
+        popstate(p, dst_wrap_array(array));
+        return 1;
+    }
+    return root(p, state, c);
+}
+
+static int dostruct(DstParser *p, DstParseState *state, uint8_t c) {
+    if (c == '}') {
+        int32_t i;
+        DstKV *st;
+        if (state->argn & 1) {
+            p->error = "struct literal expects even number of arguments";
+            return 1;
+        }
+        st = dst_struct_begin(state->argn >> 1);
+        for (i = state->argn; i > 0; i -= 2) {
+            Dst value = dst_v_last(p->argstack); dst_v_pop(p->argstack);
+            Dst key = dst_v_last(p->argstack); dst_v_pop(p->argstack);
+            dst_struct_put(st, key, value);
+        }
+        popstate(p, dst_wrap_struct(dst_struct_end(st)));
+        return 1;
+    }
+    return root(p, state, c);
+}
+
+static int dotable(DstParser *p, DstParseState *state, uint8_t c) {
+    if (c == '}') {
+        int32_t i;
+        DstTable *table;
+        if (state->argn & 1) {
+            p->error = "table literal expects even number of arguments";
+            return 1;
+        }
+        table = dst_table(state->argn >> 1);
+        for (i = state->argn; i > 0; i -= 2) {
+            Dst value = dst_v_last(p->argstack); dst_v_pop(p->argstack);
+            Dst key = dst_v_last(p->argstack); dst_v_pop(p->argstack);
+            dst_table_put(table, key, value);
+        }
+        popstate(p, dst_wrap_table(table));
+        return 1;
+    }
+    return root(p, state, c);
+}
+
+static int ampersand(DstParser *p, DstParseState *state, uint8_t c) {
+    (void) state;
+    dst_v_pop(p->states);
+    if (c == '{') {
+        pushstate(p, dotable, PFLAG_CONTAINER);
+        return 1;
+    }
+    pushstate(p, tokenchar, 0);
+    dst_v_push(p->buf, '@'); /* Push the leading ampersand that was dropped */
+    return 0;
+}
+
+static int root(DstParser *p, DstParseState *state, uint8_t c) {
+    switch (c) {
+        default:
+            if (is_whitespace(c)) return 1;
+            pushstate(p, tokenchar, 0);
+            return 0;
+        case '\'':
+            state->qcount++;
+            return 1;
+        case '"':
+            pushstate(p, stringchar, 0);
+            return 1;
+        case '#':
+            pushstate(p, comment, 0);
+            return 1;
+        case '@':
+            pushstate(p, ampersand, 0);
+            return 1;
+        case ')':
+        case ']':
+        case '}':
+            p->error = "mismatched delimiter";
+            return 1;
+        case '(':
+            pushstate(p, dotuple, PFLAG_CONTAINER);
+            return 1;
+        case '[':
+            pushstate(p, doarray, PFLAG_CONTAINER);
+            return 1;
+        case '{':
+            pushstate(p, dostruct, PFLAG_CONTAINER);
+            return 1;
+    }
+}
+
+int dst_parser_consume(DstParser *parser, uint8_t c) {
+    int consumed = 0;
+    if (parser->error) return 0;
+    while (!consumed && !parser->error) {
+        DstParseState *state = &dst_v_last(parser->states);
+        consumed = state->consumer(parser, state, c);
+    }
+    parser->lookback = c;
+    parser->index++;
+    return 1;
+}
+
+DstParserStatus dst_parser_status(DstParser *parser) {
+    if (parser->error) return DST_PARSE_ERROR;
+    if (dst_v_count(parser->states) > 1) return DST_PARSE_PENDING;
+    if (dst_v_count(parser->argstack)) return DST_PARSE_FULL;
+    return DST_PARSE_ROOT;
+}
+
+const char *dst_parser_error(DstParser *parser) {
+    DstParserStatus status = dst_parser_status(parser);
+    if (status == DST_PARSE_ERROR) {
+        const char *e = parser->error;
+        dst_v_empty(parser->argstack);
+        dst_v__cnt(parser->states) = 1;
+        parser->error = NULL;
+        dst_v_empty(parser->buf);
+        return e;
+    }
+    return NULL;
+}
+
+Dst dst_parser_produce(DstParser *parser) {
+    Dst ret;
+    DstParserStatus status = dst_parser_status(parser);
+    if (status != DST_PARSE_FULL) return dst_wrap_nil();
+    ret = dst_v_last(parser->argstack);
+    dst_v_pop(parser->argstack);
+    return ret;
+}
+
+void dst_parser_init(DstParser *parser, int flags) {
+    parser->argstack = NULL;
+    parser->states = NULL;
+    parser->buf = NULL;
+    parser->error = NULL;
+    parser->index = 0;
+    parser->lookback = -1;
+    parser->flags = flags;
+    pushstate(parser, root, PFLAG_CONTAINER);
+}
+
+void dst_parser_deinit(DstParser *parser) {
+    dst_v_free(parser->argstack);
+    dst_v_free(parser->buf);
+    dst_v_free(parser->states);
+}
+
+/* C functions */
+
+static int parsermark(void *p, size_t size) {
+    int32_t i;
+    DstParser *parser = (DstParser *)p;
+    (void) size;
+    for (i = 0; i < dst_v_count(parser->argstack); i++) {
+        dst_mark(parser->argstack[i]);
+    }
+    return 0;
+}
+
+static int parsergc(void *p, size_t size) {
+    DstParser *parser = (DstParser *)p;
+    (void) size;
+    dst_parser_deinit(parser);
+    return 0;
+}
+
+DstAbstractType dst_parse_parsertype = {
+    "stl.parser",
+    parsergc,
+    parsermark
+};
+
+/* C Function parser */
+static int cfun_parser(DstArgs args) {
+    int flags;
+    if (args.n > 1) return dst_throw(args, "expected 1 argument");
+    if (args.n) {
+        if (!dst_checktype(args.v[0], DST_INTEGER)) return dst_throw(args, "expected integer");
+        flags = dst_unwrap_integer(args.v[0]);
+    } else {
+        flags = 0;
+    }
+    DstParser *p = dst_abstract(&dst_parse_parsertype, sizeof(DstParser));
+    dst_parser_init(p, 0);
+    return dst_return(args, dst_wrap_abstract(p));
+}
+
+/* Load the library */
+int dst_lib_parse(DstArgs args) {
+    DstTable *env = dst_env_arg(args);
+
+    dst_env_def(env, "parser", dst_wrap_cfunction(cfun_parser));
+
+    return 0;
+}
--- a/src/parser/strtod.c
+++ b/src/parser/strtod.c
@@ -0,0 +1,319 @@
+/*
+* Copyright (c) 2017 Calvin Rose
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to
+* deal in the Software without restriction, including without limitation the
+* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+* sell copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in
+* all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*/
+
+/* Use a custom double parser instead of libc's strtod for better portability
+ * and control. Also, uses a less strict rounding method than ieee to not incur
+ * the cost of 4000 loc and dependence on arbitary precision arithmetic.  There
+ * is no plan to use arbitrary precision arithmetic for parsing numbers, and a
+ * formal rounding mode has yet to be chosen (round towards 0 seems
+ * reasonable).
+ *
+ * This version has been modified for much greater flexibility in parsing, such
+ * as choosing the radix, supporting integer output, and returning Dsts
+ * directly. 
+ *
+ * Numbers are of the form [-+]R[rR]I.F[eE&][-+]X where R is the radix, I is
+ * the integer part, F is the fractional part, and X is the exponent. All
+ * signs, radix, decimal point, fractional part, and exponent can be ommited.
+ * The number will be considered and integer if the there is no decimal point
+ * and no exponent. Any number greater the 2^32-1 or less than -(2^32) will be
+ * coerced to a double. If there is an error, the function dst_scan_number will
+ * return a dst nil. The radix is assumed to be 10 if omitted, and the E
+ * separator for the exponent can only be used when the radix is 10. This is
+ * because E is a vaid digit in bases 15 or greater. For bases greater than 10,
+ * the letters are used as digitis. A through Z correspond to the digits 10
+ * through 35, and the lowercase letters have the same values. The radix number
+ * is always in base 10. For example, a hexidecimal number could be written
+ * '16rdeadbeef'. dst_scan_number also supports some c style syntax for
+ * hexidecimal literals. The previous number could also be written
+ * '0xdeadbeef'. Note that in this case, the number will actually be a double
+ * as it will not fit in the range for a signed 32 bit integer. The string
+ * '0xbeef' would parse to an integer as it is in the range of an int32_t. */
+
+/* TODO take down missle defence */
+
+#include <dst/dst.h>
+#include <math.h>
+
+/* Lookup table for getting values of characters when parsing numbers. Handles
+ * digits 0-9 and a-z (and A-Z). A-Z have values of 10 to 35. */
+static uint8_t digit_lookup[128] = {
+    0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+    0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+    0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+    0,1,2,3,4,5,6,7,8,9,0xff,0xff,0xff,0xff,0xff,0xff,
+    0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+    25,26,27,28,29,30,31,32,33,34,35,0xff,0xff,0xff,0xff,0xff,
+    0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
+    25,26,27,28,29,30,31,32,33,34,35,0xff,0xff,0xff,0xff,0xff
+};
+
+/* Read in a mantissa and exponent of a certain base, and give
+ * back the double value. Should properly handle 0s, Inifinties, and
+ * denormalized numbers. (When the exponent values are too large) */
+static double convert(
+        int negative,
+        uint64_t mantissa,
+        int32_t base,
+        int32_t exponent) {
+
+    int32_t exponent2 = 0;
+
+    /* Short circuit zero and huge numbers */
+    if (mantissa == 0)
+        return 0.0;
+    if (exponent > 1022)
+        return negative ? -1.0/0.0 : 1.0/0.0;
+
+    /* TODO add fast paths */
+
+    /* Convert exponent on the base into exponent2, the power of
+     * 2 the will be used. Modify the mantissa as we convert. */
+    if (exponent > 0) {
+        /* Make the mantissa large enough so no precision is lost */
+        while (mantissa <= 0x03ffffffffffffffULL && exponent > 0) {
+            mantissa *= base;
+            exponent--;
+        }
+        while (exponent > 0) {
+            /* Allow 6 bits of room when multiplying. This is because
+             * the largest base is 36, which is 6 bits. The space of 6 should
+             * prevent overflow.*/
+            mantissa >>= 1;
+            exponent2++;
+            if (mantissa <= 0x03ffffffffffffffULL) {
+                mantissa *= base;
+                exponent--;
+            }
+        }
+    } else {
+        while (exponent < 0) {
+            mantissa <<= 1;
+            exponent2--;
+            /* Ensure that the last bit is set for minimum error
+             * before dividing by the base */
+            if (mantissa > 0x7fffffffffffffffULL) {
+                mantissa /= base;
+                exponent++;
+            }
+        }
+    }
+    
+    return negative
+        ? -ldexp(mantissa, exponent2)
+        : ldexp(mantissa, exponent2);
+}
+
+/* Result of scanning a number source string. Will be further processed
+ * depending on the desired resultant type. */
+struct DstScanRes {
+    uint64_t mant;
+    int32_t ex;
+    int error;
+    int base;
+    int seenpoint;
+    int foundexp;
+    int neg;
+};
+
+/* Get the mantissa and exponent of decimal number. The
+ * mantissa will be stored in a 64 bit unsigned integer (always positive).
+ * The exponent will be in a signed 32 bit integer. Will also check if 
+ * the decimal point has been seen. Returns -1 if there is an invalid
+ * number. */
+static struct DstScanRes dst_scan_impl(
+        const uint8_t *str, 
+        int32_t len) {
+
+    struct DstScanRes res;
+    const uint8_t *end = str + len;
+
+    /* Initialize flags */
+    int seenadigit = 0;
+
+    /* Initialize result */
+    res.mant = 0;
+    res.ex = 0;
+    res.error = 0;
+    res.base = 10;
+    res.seenpoint = 0;
+    res.foundexp = 0;
+    res.neg = 0;
+
+    /* Prevent some kinds of overflow bugs relating to the exponent
+     * overflowing.  For example, if a string was passed 2GB worth of 0s after
+     * the decimal point, exponent could wrap around and become positive. It's
+     * easier to reject ridiculously large inputs than to check for overflows.
+     * */
+    if (len > INT32_MAX / 40) goto error;
+
+    /* Get sign */
+    if (str >= end) goto error;
+    if (*str == '-') {
+        res.neg = 1;
+        str++;
+    } else if (*str == '+') {
+        str++;
+    }
+
+    /* Skip leading zeros */
+    while (str < end && (*str == '0' || *str == '.')) {
+        if (res.seenpoint) res.ex--;
+        if (*str == '.') {
+            if (res.seenpoint) goto error;
+            res.seenpoint = 1;
+        }
+        seenadigit = 1;
+        str++;
+    }
+
+    /* Parse significant digits */
+    while (str < end) {
+        if (*str == '.') {
+            if (res.seenpoint) goto error;
+            res.seenpoint = 1;
+        } else if (*str == '&') {
+            res.foundexp = 1;
+            break;
+        } else if (res.base == 10 && (*str == 'E' || *str == 'e')) {
+            res.foundexp = 1;
+            break;
+        } else if (*str == 'x' || *str == 'X') {
+            if (res.seenpoint || res.mant > 0) goto error;
+            res.base = 16;
+            res.mant = 0;
+        } else if (*str == 'r' || *str == 'R')  {
+            if (res.seenpoint) goto error;
+            if (res.mant < 2 || res.mant > 36) goto error;
+            res.base = res.mant;
+            res.mant = 0;
+        } else if (*str == '_')  {
+            ;
+            /* underscores are ignored - can be used for separator */
+        } else {
+            int digit = digit_lookup[*str & 0x7F];
+            if (digit >= res.base) goto error;
+            if (res.seenpoint) res.ex--;
+            if (res.mant > 0x00ffffffffffffff)
+                res.ex++;
+            else
+                res.mant = res.base * res.mant + digit;
+            seenadigit = 1;
+        }
+        str++;
+    }
+
+    if (!seenadigit)
+        goto error;
+
+    /* Read exponent */
+    if (str < end && res.foundexp) {
+        int eneg = 0;
+        int ee = 0;
+        seenadigit = 0;
+        str++;
+        if (str >= end) goto error;
+        if (*str == '-') {
+            eneg = 1;
+            str++;
+        } else if (*str == '+') {
+            str++;
+        }
+        /* Skip leading 0s in exponent */
+        while (str < end && *str == '0') str++;
+        while (str < end && ee < (INT32_MAX / 40)) {
+            int digit = digit_lookup[*str & 0x7F];
+            if (digit >= res.base) goto error;
+            ee = res.base * ee + digit;
+            str++;
+            seenadigit = 1;
+        }
+        if (eneg) res.ex -= ee; else res.ex += ee;
+    } 
+
+    if (!seenadigit)
+        goto error;
+    
+    return res;
+
+    error:
+    res.error = 1;
+    return res;
+}
+
+/* Scan an integer from a string. If the string cannot be converted into
+ * and integer, set *err to 1 and return 0. */
+int32_t dst_scan_integer(
+        const uint8_t *str,
+        int32_t len,
+        int *err) {
+    struct DstScanRes res = dst_scan_impl(str, len);
+    int64_t i64;
+    if (res.error)
+        goto error; 
+    i64 = res.neg ? -res.mant : res.mant;
+    if (i64 > INT32_MAX || i64 < INT32_MIN)
+        goto error;
+    if (NULL != err)
+        *err = 0;
+    return (int32_t) i64;
+    error:
+    if (NULL != err)
+        *err = 1;
+    return 0;
+}
+
+/* Scan a real (double) from a string. If the string cannot be converted into
+ * and integer, set *err to 1 and return 0. */
+double dst_scan_real(
+        const uint8_t *str,
+        int32_t len,
+        int *err) {
+    struct DstScanRes res = dst_scan_impl(str, len);
+    if (res.error) {
+        if (NULL != err)
+            *err = 1;
+        return 0.0;
+    } else {
+        if (NULL != err)
+            *err = 0;
+    }
+    return convert(res.neg, res.mant, res.base, res.ex);
+}
+
+/* Scans a number from a string. Can return either an integer or a real if
+ * the number cannot be represented as an integer. Will return nil in case of
+ * an error. */
+Dst dst_scan_number(
+        const uint8_t *str,
+        int32_t len) {
+    struct DstScanRes res = dst_scan_impl(str, len);
+    if (res.error)
+        return dst_wrap_nil();
+    if (!res.foundexp && !res.seenpoint) {
+        int64_t i64 = res.neg ? -res.mant : res.mant;
+        if (i64 <= INT32_MAX && i64 >= INT32_MIN) {
+            return dst_wrap_integer((int32_t) i64);
+        }
+    }
+    return dst_wrap_real(convert(res.neg, res.mant, res.base, res.ex));
+}