/* * Copyright (c) 2017 Calvin Rose * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include #include static const char UNEXPECTED_CLOSING_DELIM[] = "Unexpected closing delimiter"; /* The type of a ParseState */ typedef enum ParseType { PTYPE_ROOT, PTYPE_FORM, PTYPE_STRING, PTYPE_TOKEN } ParseType; /* Contain a parse state that goes on the parse stack */ struct GstParseState { ParseType type; uint32_t quoteCount; union { struct { uint8_t endDelimiter; GstArray *array; } form; struct { GstBuffer *buffer; uint32_t count; uint32_t accum; enum { STRING_STATE_BASE, STRING_STATE_ESCAPE, STRING_STATE_ESCAPE_UNICODE, STRING_STATE_ESCAPE_HEX } state; } string; } buf; }; /* Handle error in parsing */ #define p_error(p, e) ((p)->error = (e), (p)->status = GST_PARSER_ERROR) /* Get the top ParseState in the parse stack */ static GstParseState *parser_peek(GstParser *p) { if (!p->count) { p_error(p, "parser stack underflow"); return NULL; } return p->data + p->count - 1; } /* Remove the top state from the ParseStack */ static GstParseState *parser_pop(GstParser * p) { if (!p->count) { p_error(p, "parser stack underflow"); return NULL; } return p->data + --p->count; } /* Quote a value */ static GstValue quote(GstParser *p, GstValue x) { /* Load a quote form to get the string literal */ GstValue *tuple; tuple = gst_tuple_begin(p->vm, 2); tuple[0] = gst_string_cv(p->vm, "quote"); tuple[1] = x; return gst_wrap_tuple(gst_tuple_end(p->vm, tuple)); } /* Add a new, empty ParseState to the ParseStack. */ static void parser_push(GstParser *p, ParseType type, uint8_t character) { GstParseState *top; if (p->count >= p->cap) { uint32_t newCap = 2 * p->count; GstParseState *data = gst_alloc(p->vm, newCap); p->data = data; p->cap = newCap; } if (p->count) { top = parser_peek(p); top->quoteCount = p->quoteCount; p->quoteCount = 0; } ++p->count; top = parser_peek(p); if (!top) return; top->type = type; switch (type) { case PTYPE_ROOT: break; case PTYPE_STRING: top->buf.string.state = STRING_STATE_BASE; case PTYPE_TOKEN: top->buf.string.buffer = gst_buffer(p->vm, 10); break; case PTYPE_FORM: top->buf.form.array = gst_array(p->vm, 10); if (character == '(') top->buf.form.endDelimiter = ')'; if (character == '[') top->buf.form.endDelimiter = ']'; if (character == '{') top->buf.form.endDelimiter = '}'; } } /* Append a value to the top-most state in the Parser's stack. */ static void parser_append(GstParser *p, GstValue x) { GstParseState *top = parser_peek(p); if (!top) return; while (top->quoteCount--) x = quote(p, x); switch (top->type) { case PTYPE_ROOT: p->value = x; p->status = GST_PARSER_FULL; break; case PTYPE_FORM: gst_array_push(p->vm, top->buf.form.array, x); break; default: p_error(p, "expected container type"); break; } } /* Check if a character is whitespace */ static int is_whitespace(uint8_t c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\0' || c == ','; } /* Check if a character is a valid symbol character */ static int is_symbol_char(uint8_t c) { if (c >= 'a' && c <= 'z') return 1; if (c >= 'A' && c <= 'Z') return 1; if (c >= '0' && c <= ':') return 1; if (c >= '<' && c <= '@') return 1; if (c >= '*' && c <= '/') return 1; if (c >= '#' && c <= '&') return 1; if (c == '_') return 1; if (c == '^') return 1; if (c == '!') return 1; return 0; } /* Get an integer power of 10 */ static double exp10(int power) { if (power == 0) return 1; if (power > 0) { double result = 10; int currentPower = 1; while (currentPower * 2 <= power) { result = result * result; currentPower *= 2; } return result * exp10(power - currentPower); } else { return 1 / exp10(-power); } } /* Read a real from a string. Returns if successfuly * parsed a real from the enitre input string. * If returned 1, output is int ret.*/ static int read_real(const uint8_t *string, const uint8_t *end, double *ret, int forceInt) { int sign = 1, x = 0; double accum = 0, exp = 1, place = 1; /* Check the sign */ if (*string == '-') { sign = -1; ++string; } else if (*string == '+') { ++string; } if (string >= end) return 0; while (string < end) { if (*string == '.' && !forceInt) { place = 0.1; } else if (!forceInt && (*string == 'e' || *string == 'E')) { /* Read the exponent */ ++string; if (string >= end) return 0; if (!read_real(string, end, &exp, 1)) return 0; exp = exp10(exp); break; } else { x = *string; if (x < '0' || x > '9') return 0; x -= '0'; if (place < 1) { accum += x * place; place *= 0.1; } else { accum *= 10; accum += x; } } ++string; } *ret = accum * sign * exp; return 1; } static int read_integer(const uint8_t *string, const uint8_t *end, int64_t *ret) { int sign = 1, x = 0; int64_t accum = 0; if (*string == '-') { sign = -1; ++string; } else if (*string == '+') { ++string; } if (string >= end) return 0; while (string < end) { x = *string; if (x < '0' || x > '9') return 0; x -= '0'; accum = accum * 10 + x; ++string; } *ret = accum * sign; return 1; } /* Checks if a string slice is equal to a string constant */ static int check_str_const(const char *ref, const uint8_t *start, const uint8_t *end) { while (*ref && start < end) { if (*ref != *(char *)start) return 0; ++ref; ++start; } return !*ref && start == end; } /* Build from the token buffer */ static GstValue build_token(GstParser *p, GstBuffer *buf) { GstValue x; GstReal real; GstInteger integer; uint8_t *data = buf->data; uint8_t *back = data + buf->count; if (read_integer(data, back, &integer)) { x.type = GST_INTEGER; x.data.integer = integer; } else if (read_real(data, back, &real, 0)) { x.type = GST_REAL; x.data.real = real; } else if (check_str_const("nil", data, back)) { x.type = GST_NIL; x.data.boolean = 0; } else if (check_str_const("false", data, back)) { x.type = GST_BOOLEAN; x.data.boolean = 0; } else if (check_str_const("true", data, back)) { x.type = GST_BOOLEAN; x.data.boolean = 1; } else { if (buf->data[0] >= '0' && buf->data[0] <= '9') { p_error(p, "symbols cannot start with digits"); x.type = GST_NIL; } else { x.type = GST_STRING; x.data.string = gst_buffer_to_string(p->vm, buf); } } return x; } /* Handle parsing a token */ static int token_state(GstParser *p, uint8_t c) { GstParseState *top = parser_peek(p); GstBuffer *buf = top->buf.string.buffer; if (is_whitespace(c) || c == ')' || c == ']' || c == '}') { parser_pop(p); parser_append(p, build_token(p, buf)); return !(c == ')' || c == ']' || c == '}'); } else if (is_symbol_char(c)) { gst_buffer_push(p->vm, buf, c); return 1; } else { p_error(p, "expected symbol character"); return 1; } } /* Get hex digit from a letter */ static int to_hex(uint8_t c) { if (c >= '0' && c <= '9') { return c - '0'; } else if (c >= 'a' && c <= 'f') { return 10 + c - 'a'; } else if (c >= 'A' && c <= 'F') { return 10 + c - 'A'; } else { return -1; } } /* Handle parsing a string literal */ static int string_state(GstParser *p, uint8_t c) { int digit; GstParseState *top = parser_peek(p); switch (top->buf.string.state) { case STRING_STATE_BASE: if (c == '\\') { top->buf.string.state = STRING_STATE_ESCAPE; } else if (c == '"') { GstValue x; x.type = GST_STRING; x.data.string = gst_buffer_to_string(p->vm, top->buf.string.buffer); parser_pop(p); parser_append(p, quote(p, x)); } else { gst_buffer_push(p->vm, top->buf.string.buffer, c); } break; case STRING_STATE_ESCAPE: { uint8_t next; switch (c) { case 'n': next = '\n'; break; case 'r': next = '\r'; break; case 't': next = '\t'; break; case 'f': next = '\f'; break; case '0': next = '\0'; break; case '"': next = '"'; break; case '\'': next = '\''; break; case 'z': next = '\0'; break; case 'h': top->buf.string.state = STRING_STATE_ESCAPE_HEX; top->buf.string.count = 0; top->buf.string.accum = 0; return 1; default: p_error(p, "unknown string escape sequence"); return 1; } gst_buffer_push(p->vm, top->buf.string.buffer, next); top->buf.string.state = STRING_STATE_BASE; } break; case STRING_STATE_ESCAPE_HEX: digit = to_hex(c); if (digit < 0) { p_error(p, "invalid hexidecimal digit"); return 1; } else { top->buf.string.accum *= 16; top->buf.string.accum += digit; } top->buf.string.accum += digit; if (++top->buf.string.count == 2) { gst_buffer_push(p->vm, top->buf.string.buffer, top->buf.string.accum); top->buf.string.state = STRING_STATE_BASE; } break; case STRING_STATE_ESCAPE_UNICODE: break; } return 1; } /* Root state of the parser */ static int root_state(GstParser *p, uint8_t c) { if (is_whitespace(c)) return 1; p->status = GST_PARSER_PENDING; if (c == ']' || c == ')' || c == '}') { p_error(p, UNEXPECTED_CLOSING_DELIM); return 1; } if (c == '(' || c == '[' || c == '{') { parser_push(p, PTYPE_FORM, c); return 1; } if (c == '"') { parser_push(p, PTYPE_STRING, c); return 1; } if (c == '\'') { p->quoteCount++; return 1; } if (is_symbol_char(c)) { parser_push(p, PTYPE_TOKEN, c); return 0; } p_error(p, "unexpected character"); return 1; } /* Handle parsing a form */ static int form_state(GstParser *p, uint8_t c) { GstParseState *top = parser_peek(p); if (c == top->buf.form.endDelimiter) { GstArray *array = top->buf.form.array; GstValue x; if (c == ']') { x.type = GST_ARRAY; x.data.array = array; } else if (c == ')') { GstValue *tup; tup = gst_tuple_begin(p->vm, array->count); gst_memcpy(tup, array->data, array->count * sizeof(GstValue)); x.type = GST_TUPLE; x.data.tuple = gst_tuple_end(p->vm, tup); } else { /* c == '{' */ uint32_t i; if (array->count % 2 != 0) { p_error(p, "table literal must have even number of elements"); return 1; } x.type = GST_TABLE; x.data.table = gst_table(p->vm, array->count); for (i = 0; i < array->count; i += 2) { gst_table_put(p->vm, x.data.table, array->data[i], array->data[i + 1]); } } parser_pop(p); parser_append(p, x); return 1; } return root_state(p, c); } /* Handle a character */ static void dispatch_char(GstParser *p, uint8_t c) { int done = 0; ++p->index; /* Dispatch character to state */ while (!done) { GstParseState *top = parser_peek(p); switch (top->type) { case PTYPE_ROOT: done = root_state(p, c); break; case PTYPE_TOKEN: done = token_state(p, c); break; case PTYPE_FORM: done = form_state(p, c); break; case PTYPE_STRING: done = string_state(p, c); break; } } } /* Parse a C style string. The first value encountered when parsed is put * in p->value. The string variable is then updated to the next char that * was not read. Returns 1 if any values were read, otherwise returns 0. * Returns the number of bytes read. */ int gst_parse_cstring(GstParser *p, const char *string) { int bytesRead = 0; while ((p->status == GST_PARSER_PENDING || p->status == GST_PARSER_ROOT) && (string[bytesRead] != '\0')) { dispatch_char(p, string[bytesRead++]); } return bytesRead; } /* Parse a gst string */ int gst_parse_string(GstParser *p, const uint8_t *string) { uint32_t i; for (i = 0; i < gst_string_length(string); ++i) { if (p->status != GST_PARSER_PENDING && p->status != GST_PARSER_ROOT) break; dispatch_char(p, string[i]); } return i; } /* Parse a single byte */ void gst_parse_byte(GstParser *p, uint8_t byte) { dispatch_char(p, byte); } /* Check if a parser has a value that needs to be handled. If * so, the parser will not parse any more input until that value * is consumed. */ int gst_parse_hasvalue(GstParser *p) { return p->status == GST_PARSER_FULL; } /* Gets a value from the parser */ GstValue gst_parse_consume(GstParser *p) { p->status = GST_PARSER_ROOT; return p->value; } /* Parser initialization (memory allocation) */ void gst_parser(GstParser *p, Gst *vm) { p->vm = vm; GstParseState *data = gst_alloc(vm, sizeof(GstParseState) * 10); p->cap = 10; p->data = data; p->count = 0; p->index = 0; p->quoteCount = 0; p->error = NULL; p->status = GST_PARSER_ROOT; p->value.type = GST_NIL; parser_push(p, PTYPE_ROOT, ' '); } /* GC mark a parser */ static void gst_stl_parser_mark(Gst *vm, void *data, uint32_t len) { uint32_t i; GstParser *p = (GstParser *) data; if (len != sizeof(GstParser)) return; gst_mark_mem(vm, p->data); gst_mark_value(vm, p->value); for (i = 0; i < p->count; ++i) { GstParseState *ps = p->data + i; switch (ps->type) { case PTYPE_ROOT: break; case PTYPE_FORM: gst_mark_value(vm, gst_wrap_array(ps->buf.form.array)); break; case PTYPE_STRING: case PTYPE_TOKEN: gst_mark_value(vm, gst_wrap_buffer(ps->buf.string.buffer)); break; } } } /***/ /* Stl functions */ /***/ /* Parse filetype */ static const GstUserType gst_stl_parsetype = { "std.parser", NULL, NULL, NULL, &gst_stl_parser_mark }; /* Create a parser */ static int gst_stl_parser(Gst *vm) { GstParser *p = gst_userdata(vm, sizeof(GstParser), &gst_stl_parsetype); gst_parser(p, vm); gst_c_return(vm, gst_wrap_userdata(p)); } /* Consume a value from the parser */ static int gst_stl_parser_consume(Gst *vm) { GstParser *p = gst_check_userdata(vm, 0, &gst_stl_parsetype); if (p == NULL) gst_c_throwc(vm, "expected parser"); if (!gst_parse_hasvalue(p)) gst_c_throwc(vm, "parser has no pending value"); gst_c_return(vm, gst_parse_consume(p)); } /* Check if the parser has a value to consume */ static int gst_stl_parser_hasvalue(Gst *vm) { GstParser *p = gst_check_userdata(vm, 0, &gst_stl_parsetype); if (p == NULL) gst_c_throwc(vm, "expected parser"); gst_c_return(vm, gst_wrap_boolean(gst_parse_hasvalue(p))); } /* Parse a single byte. Returns if the byte was successfully parsed. */ static int gst_stl_parser_byte(Gst *vm) { GstInteger b; GstParser *p = gst_check_userdata(vm, 0, &gst_stl_parsetype); if (p == NULL) gst_c_throwc(vm, "expected parser"); if (!gst_check_integer(vm, 1, &b)) gst_c_throwc(vm, "expected integer"); if (p->status == GST_PARSER_PENDING || p->status == GST_PARSER_ROOT) { dispatch_char(p, b); gst_c_return(vm, gst_wrap_boolean(1)); } else { gst_c_return(vm, gst_wrap_boolean(0)); } } /* Parse a string or buffer. Returns nil if the entire char array is parsed, * otherwise returns the remainder of what could not be parsed. */ static int gst_stl_parser_charseq(Gst *vm) { uint32_t i; uint32_t len; const uint8_t *data; GstParser *p = gst_check_userdata(vm, 0, &gst_stl_parsetype); if (p == NULL) gst_c_throwc(vm, "expected parser"); if (!gst_chararray_view(gst_arg(vm, 1), &data, &len)) gst_c_throwc(vm, "expected string/buffer"); for (i = 0; i < len; ++i) { if (p->status != GST_PARSER_PENDING && p->status != GST_PARSER_ROOT) break; dispatch_char(p, data[i]); } if (i == len) { /* No remainder */ gst_c_return(vm, gst_wrap_nil()); } else { /* We have remaining characters */ gst_c_return(vm, gst_wrap_string(gst_string_b(vm, data + i, len - i))); } } /* Get status of parser */ static int gst_stl_parser_status(Gst *vm) { GstParser *p = gst_check_userdata(vm, 0, &gst_stl_parsetype); const char *cstr; if (p == NULL) gst_c_throwc(vm, "expected parser"); switch (p->status) { case GST_PARSER_ERROR: cstr = "error"; break; case GST_PARSER_FULL: cstr = "full"; break; case GST_PARSER_PENDING: cstr = "pending"; break; case GST_PARSER_ROOT: cstr = "root"; break; default: cstr = "unknown"; break; } gst_c_return(vm, gst_string_cv(vm, cstr)); } /* The module */ static const GstModuleItem gst_parser_module[] = { {"parser", gst_stl_parser}, {"parse-byte", gst_stl_parser_byte}, {"parse-consume", gst_stl_parser_consume}, {"parse-hasvalue", gst_stl_parser_hasvalue}, {"parse-charseq", gst_stl_parser_charseq}, {"parse-status", gst_stl_parser_status}, {NULL, NULL} }; /* Load the module */ void gst_parse_load(Gst *vm) { gst_module_put(vm, "std.parse", gst_cmodule_struct(vm, gst_parser_module)); }