2017-09-09 18:39:51 +00:00
|
|
|
/*
|
2019-01-06 08:23:03 +00:00
|
|
|
* Copyright (c) 2019 Calvin Rose
|
2017-09-09 18:39:51 +00:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to
|
|
|
|
* deal in the Software without restriction, including without limitation the
|
|
|
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
|
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
#ifndef JANET_AMALG
|
2019-02-19 01:13:35 +00:00
|
|
|
#include <janet.h>
|
2019-01-06 06:49:56 +00:00
|
|
|
#include "util.h"
|
2019-01-24 05:15:58 +00:00
|
|
|
#endif
|
2018-06-29 03:36:31 +00:00
|
|
|
|
2017-09-09 18:39:51 +00:00
|
|
|
/* Check if a character is whitespace */
|
|
|
|
static int is_whitespace(uint8_t c) {
|
2018-11-16 21:24:10 +00:00
|
|
|
return c == ' '
|
2019-02-20 01:51:34 +00:00
|
|
|
|| c == '\t'
|
|
|
|
|| c == '\n'
|
|
|
|
|| c == '\r'
|
|
|
|
|| c == '\0'
|
2019-02-24 19:46:16 +00:00
|
|
|
|| c == '\v'
|
2019-02-20 01:51:34 +00:00
|
|
|
|| c == '\f';
|
2017-09-09 18:39:51 +00:00
|
|
|
}
|
|
|
|
|
2018-11-16 21:24:10 +00:00
|
|
|
/* Code generated by tools/symcharsgen.c.
|
2018-05-06 17:28:09 +00:00
|
|
|
* The table contains 256 bits, where each bit is 1
|
|
|
|
* if the corresponding ascci code is a symbol char, and 0
|
|
|
|
* if not. The upper characters are also considered symbol
|
|
|
|
* chars and are then checked for utf-8 compliance. */
|
2018-05-01 15:06:31 +00:00
|
|
|
static const uint32_t symchars[8] = {
|
2018-12-01 03:49:21 +00:00
|
|
|
0x00000000, 0xf7ffec72, 0xc7ffffff, 0x17fffffe,
|
2018-05-06 17:28:09 +00:00
|
|
|
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
2017-12-21 04:03:34 +00:00
|
|
|
};
|
|
|
|
|
2018-01-06 16:09:15 +00:00
|
|
|
/* Check if a character is a valid symbol character
|
2017-12-21 04:03:34 +00:00
|
|
|
* symbol chars are A-Z, a-z, 0-9, or one of !$&*+-./:<=>@\^_~| */
|
2017-09-09 18:39:51 +00:00
|
|
|
static int is_symbol_char(uint8_t c) {
|
2019-03-04 16:17:34 +00:00
|
|
|
return symchars[c >> 5] & ((uint32_t)1 << (c & 0x1F));
|
2017-12-21 04:03:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Validate some utf8. Useful for identifiers. Only validates
|
2019-01-06 08:23:03 +00:00
|
|
|
* the encoding, does not check for valid code points (they
|
2017-12-21 04:03:34 +00:00
|
|
|
* are less well defined than the encoding). */
|
|
|
|
static int valid_utf8(const uint8_t *str, int32_t len) {
|
|
|
|
int32_t i = 0;
|
|
|
|
int32_t j;
|
|
|
|
while (i < len) {
|
|
|
|
int32_t nexti;
|
|
|
|
uint8_t c = str[i];
|
|
|
|
|
|
|
|
/* Check the number of bytes in code point */
|
|
|
|
if (c < 0x80) nexti = i + 1;
|
|
|
|
else if ((c >> 5) == 0x06) nexti = i + 2;
|
|
|
|
else if ((c >> 4) == 0x0E) nexti = i + 3;
|
|
|
|
else if ((c >> 3) == 0x1E) nexti = i + 4;
|
|
|
|
/* Don't allow 5 or 6 byte code points */
|
|
|
|
else return 0;
|
|
|
|
|
|
|
|
/* No overflow */
|
2018-01-20 22:19:47 +00:00
|
|
|
if (nexti > len) return 0;
|
2017-12-21 04:03:34 +00:00
|
|
|
|
|
|
|
/* Ensure trailing bytes are well formed (10XX XXXX) */
|
|
|
|
for (j = i + 1; j < nexti; j++) {
|
2018-01-20 22:19:47 +00:00
|
|
|
if ((str[j] >> 6) != 2) return 0;
|
2017-12-21 04:03:34 +00:00
|
|
|
}
|
|
|
|
|
2019-01-06 08:23:03 +00:00
|
|
|
/* Check for overlong encoding */
|
2017-12-21 04:03:34 +00:00
|
|
|
if ((nexti == i + 2) && str[i] < 0xC2) return 0;
|
|
|
|
if ((str[i] == 0xE0) && str[i + 1] < 0xA0) return 0;
|
|
|
|
if ((str[i] == 0xF0) && str[i + 1] < 0x90) return 0;
|
|
|
|
|
|
|
|
i = nexti;
|
|
|
|
}
|
|
|
|
return 1;
|
2017-09-09 18:39:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Get hex digit from a letter */
|
|
|
|
static int to_hex(uint8_t c) {
|
|
|
|
if (c >= '0' && c <= '9') {
|
|
|
|
return c - '0';
|
|
|
|
} else if (c >= 'A' && c <= 'F') {
|
|
|
|
return 10 + c - 'A';
|
2017-11-21 02:39:44 +00:00
|
|
|
} else if (c >= 'a' && c <= 'f') {
|
|
|
|
return 10 + c - 'a';
|
2017-09-09 18:39:51 +00:00
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
typedef int (*Consumer)(JanetParser *p, JanetParseState *state, uint8_t c);
|
|
|
|
struct JanetParseState {
|
2018-12-01 03:49:21 +00:00
|
|
|
int32_t counter;
|
2018-01-18 22:25:45 +00:00
|
|
|
int32_t argn;
|
2018-01-17 16:36:10 +00:00
|
|
|
int flags;
|
2018-12-13 23:46:53 +00:00
|
|
|
size_t start;
|
2018-01-18 22:25:45 +00:00
|
|
|
Consumer consumer;
|
|
|
|
};
|
2017-09-09 18:39:51 +00:00
|
|
|
|
2018-05-16 02:03:45 +00:00
|
|
|
/* Define a stack on the main parser struct */
|
|
|
|
#define DEF_PARSER_STACK(NAME, T, STACK, STACKCOUNT, STACKCAP) \
|
2018-09-06 02:18:42 +00:00
|
|
|
static void NAME(JanetParser *p, T x) { \
|
2018-05-16 02:03:45 +00:00
|
|
|
size_t oldcount = p->STACKCOUNT; \
|
|
|
|
size_t newcount = oldcount + 1; \
|
|
|
|
if (newcount > p->STACKCAP) { \
|
|
|
|
T *next; \
|
|
|
|
size_t newcap = 2 * newcount; \
|
|
|
|
next = realloc(p->STACK, sizeof(T) * newcap); \
|
|
|
|
if (NULL == next) { \
|
2018-09-06 02:18:42 +00:00
|
|
|
JANET_OUT_OF_MEMORY; \
|
2018-05-16 02:03:45 +00:00
|
|
|
} \
|
|
|
|
p->STACK = next; \
|
|
|
|
p->STACKCAP = newcap; \
|
|
|
|
} \
|
|
|
|
p->STACK[oldcount] = x; \
|
|
|
|
p->STACKCOUNT = newcount; \
|
|
|
|
}
|
|
|
|
|
|
|
|
DEF_PARSER_STACK(push_buf, uint8_t, buf, bufcount, bufcap)
|
2018-09-06 02:18:42 +00:00
|
|
|
DEF_PARSER_STACK(push_arg, Janet, args, argcount, argcap)
|
|
|
|
DEF_PARSER_STACK(_pushstate, JanetParseState, states, statecount, statecap)
|
2018-05-16 02:03:45 +00:00
|
|
|
|
|
|
|
#undef DEF_PARSER_STACK
|
|
|
|
|
2018-12-01 03:49:21 +00:00
|
|
|
#define PFLAG_CONTAINER 0x100
|
|
|
|
#define PFLAG_BUFFER 0x200
|
|
|
|
#define PFLAG_PARENS 0x400
|
|
|
|
#define PFLAG_SQRBRACKETS 0x800
|
|
|
|
#define PFLAG_CURLYBRACKETS 0x1000
|
|
|
|
#define PFLAG_STRING 0x2000
|
|
|
|
#define PFLAG_LONGSTRING 0x4000
|
|
|
|
#define PFLAG_READERMAC 0x8000
|
2019-01-07 02:49:24 +00:00
|
|
|
#define PFLAG_ATSYM 0x10000
|
2018-01-17 04:18:45 +00:00
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
static void pushstate(JanetParser *p, Consumer consumer, int flags) {
|
|
|
|
JanetParseState s;
|
2018-12-01 03:49:21 +00:00
|
|
|
s.counter = 0;
|
2018-01-18 22:25:45 +00:00
|
|
|
s.argn = 0;
|
|
|
|
s.flags = flags;
|
|
|
|
s.consumer = consumer;
|
2018-12-13 23:46:53 +00:00
|
|
|
s.start = p->offset;
|
2018-05-16 02:03:45 +00:00
|
|
|
_pushstate(p, s);
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2017-09-09 18:39:51 +00:00
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
static void popstate(JanetParser *p, Janet val) {
|
2018-12-01 03:49:21 +00:00
|
|
|
for (;;) {
|
|
|
|
JanetParseState top = p->states[--p->statecount];
|
|
|
|
JanetParseState *newtop = p->states + p->statecount - 1;
|
|
|
|
if (newtop->flags & PFLAG_CONTAINER) {
|
|
|
|
/* Source mapping info */
|
2018-09-06 02:18:42 +00:00
|
|
|
if (janet_checktype(val, JANET_TUPLE)) {
|
2018-12-13 23:46:53 +00:00
|
|
|
janet_tuple_sm_start(janet_unwrap_tuple(val)) = (int32_t) top.start;
|
|
|
|
janet_tuple_sm_end(janet_unwrap_tuple(val)) = (int32_t) p->offset;
|
2018-06-29 14:37:50 +00:00
|
|
|
}
|
2018-12-01 03:49:21 +00:00
|
|
|
newtop->argn++;
|
2019-01-04 01:44:58 +00:00
|
|
|
/* Keep track of number of values in the root state */
|
|
|
|
if (p->statecount == 1) p->pending++;
|
2018-12-01 03:49:21 +00:00
|
|
|
push_arg(p, val);
|
|
|
|
return;
|
|
|
|
} else if (newtop->flags & PFLAG_READERMAC) {
|
|
|
|
Janet *t = janet_tuple_begin(2);
|
|
|
|
int c = newtop->flags & 0xFF;
|
2019-01-06 08:23:03 +00:00
|
|
|
const char *which =
|
2018-12-01 03:49:21 +00:00
|
|
|
(c == '\'') ? "quote" :
|
|
|
|
(c == ',') ? "unquote" :
|
2018-12-05 20:10:04 +00:00
|
|
|
(c == ';') ? "splice" :
|
2018-12-01 03:49:21 +00:00
|
|
|
(c == '~') ? "quasiquote" : "<unknown>";
|
|
|
|
t[0] = janet_csymbolv(which);
|
|
|
|
t[1] = val;
|
|
|
|
/* Quote source mapping info */
|
2018-12-13 23:46:53 +00:00
|
|
|
janet_tuple_sm_start(t) = (int32_t) newtop->start;
|
|
|
|
janet_tuple_sm_end(t) = (int32_t) p->offset;
|
2018-12-01 03:49:21 +00:00
|
|
|
val = janet_wrap_tuple(janet_tuple_end(t));
|
|
|
|
} else {
|
|
|
|
return;
|
2018-06-29 14:37:50 +00:00
|
|
|
}
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
|
|
|
}
|
2017-11-21 02:39:44 +00:00
|
|
|
|
2018-06-12 18:24:45 +00:00
|
|
|
static int checkescape(uint8_t c) {
|
2018-01-18 22:25:45 +00:00
|
|
|
switch (c) {
|
2019-02-20 01:51:34 +00:00
|
|
|
default:
|
|
|
|
return -1;
|
|
|
|
case 'x':
|
|
|
|
return 1;
|
|
|
|
case 'n':
|
|
|
|
return '\n';
|
|
|
|
case 't':
|
|
|
|
return '\t';
|
|
|
|
case 'r':
|
|
|
|
return '\r';
|
|
|
|
case '0':
|
|
|
|
return '\0';
|
|
|
|
case 'z':
|
|
|
|
return '\0';
|
|
|
|
case 'f':
|
|
|
|
return '\f';
|
2019-02-24 19:46:16 +00:00
|
|
|
case 'v':
|
|
|
|
return '\v';
|
2019-02-20 01:51:34 +00:00
|
|
|
case 'e':
|
|
|
|
return 27;
|
|
|
|
case '"':
|
|
|
|
return '"';
|
|
|
|
case '\\':
|
|
|
|
return '\\';
|
2017-09-09 18:39:51 +00:00
|
|
|
}
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2017-09-09 18:39:51 +00:00
|
|
|
|
2018-01-18 22:25:45 +00:00
|
|
|
/* Forward declare */
|
2018-09-06 02:18:42 +00:00
|
|
|
static int stringchar(JanetParser *p, JanetParseState *state, uint8_t c);
|
2018-01-18 22:25:45 +00:00
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
static int escapeh(JanetParser *p, JanetParseState *state, uint8_t c) {
|
2018-01-18 22:25:45 +00:00
|
|
|
int digit = to_hex(c);
|
|
|
|
if (digit < 0) {
|
|
|
|
p->error = "invalid hex digit in hex escape";
|
|
|
|
return 1;
|
2017-12-30 21:46:59 +00:00
|
|
|
}
|
2018-01-18 22:25:45 +00:00
|
|
|
state->argn = (state->argn << 4) + digit;;
|
2018-12-01 03:49:21 +00:00
|
|
|
state->counter--;
|
|
|
|
if (!state->counter) {
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, (state->argn & 0xFF));
|
2018-01-18 22:25:45 +00:00
|
|
|
state->argn = 0;
|
|
|
|
state->consumer = stringchar;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
static int escape1(JanetParser *p, JanetParseState *state, uint8_t c) {
|
2018-06-12 18:24:45 +00:00
|
|
|
int e = checkescape(c);
|
|
|
|
if (e < 0) {
|
2018-01-18 22:25:45 +00:00
|
|
|
p->error = "invalid string escape sequence";
|
|
|
|
return 1;
|
|
|
|
}
|
2018-08-23 01:41:25 +00:00
|
|
|
if (c == 'x') {
|
2018-12-01 03:49:21 +00:00
|
|
|
state->counter = 2;
|
2018-01-18 22:25:45 +00:00
|
|
|
state->argn = 0;
|
|
|
|
state->consumer = escapeh;
|
|
|
|
} else {
|
2018-06-12 18:24:45 +00:00
|
|
|
push_buf(p, (uint8_t) e);
|
2018-01-18 22:25:45 +00:00
|
|
|
state->consumer = stringchar;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
static int stringend(JanetParser *p, JanetParseState *state) {
|
|
|
|
Janet ret;
|
2019-04-21 17:34:41 +00:00
|
|
|
uint8_t *bufstart = p->buf;
|
|
|
|
int32_t buflen = (int32_t) p->bufcount;
|
|
|
|
if (state->flags & PFLAG_LONGSTRING) {
|
|
|
|
/* Check for leading newline character so we can remove it */
|
|
|
|
if (bufstart[0] == '\n') {
|
|
|
|
bufstart++;
|
|
|
|
buflen--;
|
|
|
|
}
|
|
|
|
if (buflen > 0 && bufstart[buflen - 1] == '\n') {
|
|
|
|
buflen--;
|
|
|
|
}
|
|
|
|
}
|
2018-05-06 17:28:09 +00:00
|
|
|
if (state->flags & PFLAG_BUFFER) {
|
2019-04-21 17:34:41 +00:00
|
|
|
JanetBuffer *b = janet_buffer(buflen);
|
|
|
|
janet_buffer_push_bytes(b, bufstart, buflen);
|
2018-09-06 02:18:42 +00:00
|
|
|
ret = janet_wrap_buffer(b);
|
2018-05-06 17:28:09 +00:00
|
|
|
} else {
|
2019-04-21 17:34:41 +00:00
|
|
|
ret = janet_wrap_string(janet_string(bufstart, buflen));
|
2018-05-06 17:28:09 +00:00
|
|
|
}
|
2018-05-16 02:03:45 +00:00
|
|
|
p->bufcount = 0;
|
2018-05-06 17:28:09 +00:00
|
|
|
popstate(p, ret);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
static int stringchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
2018-01-18 22:25:45 +00:00
|
|
|
/* Enter escape */
|
|
|
|
if (c == '\\') {
|
|
|
|
state->consumer = escape1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
/* String end */
|
|
|
|
if (c == '"') {
|
2018-05-06 17:28:09 +00:00
|
|
|
return stringend(p, state);
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
|
|
|
/* normal char */
|
2018-11-16 07:09:38 +00:00
|
|
|
if (c != '\n')
|
|
|
|
push_buf(p, c);
|
2018-01-18 22:25:45 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check for string equality in the buffer */
|
|
|
|
static int check_str_const(const char *cstr, const uint8_t *str, int32_t len) {
|
|
|
|
int32_t index;
|
|
|
|
for (index = 0; index < len; index++) {
|
|
|
|
uint8_t c = str[index];
|
|
|
|
uint8_t k = ((const uint8_t *)cstr)[index];
|
|
|
|
if (c < k) return -1;
|
|
|
|
if (c > k) return 1;
|
|
|
|
if (k == '\0') break;
|
|
|
|
}
|
|
|
|
return (cstr[index] == '\0') ? 0 : -1;
|
|
|
|
}
|
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
static int tokenchar(JanetParser *p, JanetParseState *state, uint8_t c) {
|
2018-12-27 18:05:29 +00:00
|
|
|
Janet ret;
|
|
|
|
double numval;
|
2018-01-18 22:25:45 +00:00
|
|
|
int32_t blen;
|
|
|
|
if (is_symbol_char(c)) {
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, (uint8_t) c);
|
2018-01-18 22:25:45 +00:00
|
|
|
if (c > 127) state->argn = 1; /* Use to indicate non ascii */
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
/* Token finished */
|
2018-08-06 01:39:48 +00:00
|
|
|
blen = (int32_t) p->bufcount;
|
2019-01-16 17:32:33 +00:00
|
|
|
int start_dig = p->buf[0] >= '0' && p->buf[0] <= '9';
|
2019-01-17 17:32:51 +00:00
|
|
|
int start_num = start_dig || p->buf[0] == '-' || p->buf[0] == '+' || p->buf[0] == '.';
|
2019-01-03 00:41:07 +00:00
|
|
|
if (p->buf[0] == ':') {
|
|
|
|
ret = janet_keywordv(p->buf + 1, blen - 1);
|
2019-01-16 17:32:33 +00:00
|
|
|
} else if (start_num && !janet_scan_number(p->buf, blen, &numval)) {
|
2018-12-27 18:05:29 +00:00
|
|
|
ret = janet_wrap_number(numval);
|
2018-01-18 22:25:45 +00:00
|
|
|
} else if (!check_str_const("nil", p->buf, blen)) {
|
2018-09-06 02:18:42 +00:00
|
|
|
ret = janet_wrap_nil();
|
2018-01-18 22:25:45 +00:00
|
|
|
} else if (!check_str_const("false", p->buf, blen)) {
|
2018-09-06 02:18:42 +00:00
|
|
|
ret = janet_wrap_false();
|
2018-01-18 22:25:45 +00:00
|
|
|
} else if (!check_str_const("true", p->buf, blen)) {
|
2018-09-06 02:18:42 +00:00
|
|
|
ret = janet_wrap_true();
|
2018-01-30 04:38:49 +00:00
|
|
|
} else if (p->buf) {
|
2019-01-16 17:32:33 +00:00
|
|
|
if (start_dig) {
|
2018-01-18 22:25:45 +00:00
|
|
|
p->error = "symbol literal cannot start with a digit";
|
|
|
|
return 0;
|
|
|
|
} else {
|
2019-01-06 08:23:03 +00:00
|
|
|
/* Don't do full utf-8 check unless we have seen non ascii characters. */
|
2018-01-18 22:25:45 +00:00
|
|
|
int valid = (!state->argn) || valid_utf8(p->buf, blen);
|
|
|
|
if (!valid) {
|
|
|
|
p->error = "invalid utf-8 in symbol";
|
|
|
|
return 0;
|
2018-01-05 21:17:55 +00:00
|
|
|
}
|
2018-09-06 02:18:42 +00:00
|
|
|
ret = janet_symbolv(p->buf, blen);
|
2017-09-09 18:39:51 +00:00
|
|
|
}
|
2018-01-30 04:38:49 +00:00
|
|
|
} else {
|
|
|
|
p->error = "empty symbol invalid";
|
|
|
|
return 0;
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2018-05-16 02:03:45 +00:00
|
|
|
p->bufcount = 0;
|
2018-01-18 22:25:45 +00:00
|
|
|
popstate(p, ret);
|
|
|
|
return 0;
|
|
|
|
}
|
2017-09-09 18:39:51 +00:00
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
static int comment(JanetParser *p, JanetParseState *state, uint8_t c) {
|
2018-01-18 22:25:45 +00:00
|
|
|
(void) state;
|
2018-05-16 02:03:45 +00:00
|
|
|
if (c == '\n') p->statecount--;
|
2018-01-18 22:25:45 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2019-02-09 17:21:11 +00:00
|
|
|
static Janet close_tuple(JanetParser *p, JanetParseState *state, int32_t flag) {
|
2019-01-07 02:49:24 +00:00
|
|
|
Janet *ret = janet_tuple_begin(state->argn);
|
2019-02-21 16:22:29 +00:00
|
|
|
janet_tuple_flag(ret) |= flag;
|
2019-01-07 02:49:24 +00:00
|
|
|
for (int32_t i = state->argn - 1; i >= 0; i--)
|
|
|
|
ret[i] = p->args[--p->argcount];
|
|
|
|
return janet_wrap_tuple(janet_tuple_end(ret));
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2017-11-21 02:39:44 +00:00
|
|
|
|
2019-01-07 02:49:24 +00:00
|
|
|
static Janet close_array(JanetParser *p, JanetParseState *state) {
|
|
|
|
JanetArray *array = janet_array(state->argn);
|
|
|
|
for (int32_t i = state->argn - 1; i >= 0; i--)
|
|
|
|
array->data[i] = p->args[--p->argcount];
|
|
|
|
array->count = state->argn;
|
|
|
|
return janet_wrap_array(array);
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2017-09-09 18:39:51 +00:00
|
|
|
|
2019-01-07 02:49:24 +00:00
|
|
|
static Janet close_struct(JanetParser *p, JanetParseState *state) {
|
|
|
|
JanetKV *st = janet_struct_begin(state->argn >> 1);
|
|
|
|
for (int32_t i = state->argn; i > 0; i -= 2) {
|
|
|
|
Janet value = p->args[--p->argcount];
|
|
|
|
Janet key = p->args[--p->argcount];
|
|
|
|
janet_struct_put(st, key, value);
|
2017-09-09 18:39:51 +00:00
|
|
|
}
|
2019-01-07 02:49:24 +00:00
|
|
|
return janet_wrap_struct(janet_struct_end(st));
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2017-09-09 18:39:51 +00:00
|
|
|
|
2019-01-07 02:49:24 +00:00
|
|
|
static Janet close_table(JanetParser *p, JanetParseState *state) {
|
|
|
|
JanetTable *table = janet_table(state->argn >> 1);
|
|
|
|
for (int32_t i = state->argn; i > 0; i -= 2) {
|
|
|
|
Janet value = p->args[--p->argcount];
|
|
|
|
Janet key = p->args[--p->argcount];
|
|
|
|
janet_table_put(table, key, value);
|
2017-12-30 21:46:59 +00:00
|
|
|
}
|
2019-01-07 02:49:24 +00:00
|
|
|
return janet_wrap_table(table);
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2017-12-30 21:46:59 +00:00
|
|
|
|
2018-12-01 03:49:21 +00:00
|
|
|
#define PFLAG_INSTRING 0x100000
|
|
|
|
#define PFLAG_END_CANDIDATE 0x200000
|
2018-09-06 02:18:42 +00:00
|
|
|
static int longstring(JanetParser *p, JanetParseState *state, uint8_t c) {
|
2018-05-06 17:28:09 +00:00
|
|
|
if (state->flags & PFLAG_INSTRING) {
|
|
|
|
/* We are inside the long string */
|
2018-05-09 21:01:58 +00:00
|
|
|
if (c == '`') {
|
2018-05-06 17:28:09 +00:00
|
|
|
state->flags |= PFLAG_END_CANDIDATE;
|
|
|
|
state->flags &= ~PFLAG_INSTRING;
|
2018-12-01 03:49:21 +00:00
|
|
|
state->counter = 1; /* Use counter to keep track of number of '=' seen */
|
2018-05-06 17:28:09 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, c);
|
2018-05-06 17:28:09 +00:00
|
|
|
return 1;
|
|
|
|
} else if (state->flags & PFLAG_END_CANDIDATE) {
|
|
|
|
int i;
|
|
|
|
/* We are checking a potential end of the string */
|
2018-12-01 03:49:21 +00:00
|
|
|
if (state->counter == state->argn) {
|
2018-05-09 21:01:58 +00:00
|
|
|
stringend(p, state);
|
|
|
|
return 0;
|
2018-05-06 17:28:09 +00:00
|
|
|
}
|
2018-12-01 03:49:21 +00:00
|
|
|
if (c == '`' && state->counter < state->argn) {
|
|
|
|
state->counter++;
|
2018-05-06 17:28:09 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
/* Failed end candidate */
|
2018-12-01 03:49:21 +00:00
|
|
|
for (i = 0; i < state->counter; i++) {
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, '`');
|
2018-05-06 17:28:09 +00:00
|
|
|
}
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, c);
|
2018-12-01 03:49:21 +00:00
|
|
|
state->counter = 0;
|
2018-05-06 17:28:09 +00:00
|
|
|
state->flags &= ~PFLAG_END_CANDIDATE;
|
|
|
|
state->flags |= PFLAG_INSTRING;
|
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
/* We are at beginning of string */
|
2018-05-09 21:01:58 +00:00
|
|
|
state->argn++;
|
|
|
|
if (c != '`') {
|
|
|
|
state->flags |= PFLAG_INSTRING;
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, c);
|
2018-05-06 17:28:09 +00:00
|
|
|
}
|
2018-05-09 21:01:58 +00:00
|
|
|
return 1;
|
2018-05-06 17:28:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-07 02:49:24 +00:00
|
|
|
static int root(JanetParser *p, JanetParseState *state, uint8_t c);
|
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
static int ampersand(JanetParser *p, JanetParseState *state, uint8_t c) {
|
2018-01-18 22:25:45 +00:00
|
|
|
(void) state;
|
2018-05-16 02:03:45 +00:00
|
|
|
p->statecount--;
|
2018-03-24 16:48:42 +00:00
|
|
|
switch (c) {
|
2019-02-20 01:51:34 +00:00
|
|
|
case '{':
|
|
|
|
pushstate(p, root, PFLAG_CONTAINER | PFLAG_CURLYBRACKETS | PFLAG_ATSYM);
|
|
|
|
return 1;
|
|
|
|
case '"':
|
|
|
|
pushstate(p, stringchar, PFLAG_BUFFER | PFLAG_STRING);
|
|
|
|
return 1;
|
|
|
|
case '`':
|
|
|
|
pushstate(p, longstring, PFLAG_BUFFER | PFLAG_LONGSTRING);
|
|
|
|
return 1;
|
|
|
|
case '[':
|
|
|
|
pushstate(p, root, PFLAG_CONTAINER | PFLAG_SQRBRACKETS | PFLAG_ATSYM);
|
|
|
|
return 1;
|
|
|
|
case '(':
|
|
|
|
pushstate(p, root, PFLAG_CONTAINER | PFLAG_PARENS | PFLAG_ATSYM);
|
|
|
|
return 1;
|
|
|
|
default:
|
|
|
|
break;
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
|
|
|
pushstate(p, tokenchar, 0);
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, '@'); /* Push the leading ampersand that was dropped */
|
2018-01-18 22:25:45 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2018-01-17 04:18:45 +00:00
|
|
|
|
2018-05-13 00:31:28 +00:00
|
|
|
/* The root state of the parser */
|
2018-09-06 02:18:42 +00:00
|
|
|
static int root(JanetParser *p, JanetParseState *state, uint8_t c) {
|
2018-01-18 22:25:45 +00:00
|
|
|
switch (c) {
|
|
|
|
default:
|
|
|
|
if (is_whitespace(c)) return 1;
|
2018-02-07 18:19:34 +00:00
|
|
|
if (!is_symbol_char(c)) {
|
|
|
|
p->error = "unexpected character";
|
|
|
|
return 1;
|
|
|
|
}
|
2018-01-18 22:25:45 +00:00
|
|
|
pushstate(p, tokenchar, 0);
|
|
|
|
return 0;
|
|
|
|
case '\'':
|
2018-12-01 03:49:21 +00:00
|
|
|
case ',':
|
|
|
|
case ';':
|
|
|
|
case '~':
|
|
|
|
pushstate(p, root, PFLAG_READERMAC | c);
|
2018-01-18 22:25:45 +00:00
|
|
|
return 1;
|
|
|
|
case '"':
|
2018-05-07 05:04:24 +00:00
|
|
|
pushstate(p, stringchar, PFLAG_STRING);
|
2018-01-18 22:25:45 +00:00
|
|
|
return 1;
|
|
|
|
case '#':
|
|
|
|
pushstate(p, comment, 0);
|
|
|
|
return 1;
|
|
|
|
case '@':
|
|
|
|
pushstate(p, ampersand, 0);
|
|
|
|
return 1;
|
2018-05-09 21:01:58 +00:00
|
|
|
case '`':
|
2018-05-07 05:04:24 +00:00
|
|
|
pushstate(p, longstring, PFLAG_LONGSTRING);
|
2018-05-06 17:28:09 +00:00
|
|
|
return 1;
|
2018-01-18 22:25:45 +00:00
|
|
|
case ')':
|
|
|
|
case ']':
|
2019-02-20 01:51:34 +00:00
|
|
|
case '}': {
|
|
|
|
Janet ds;
|
|
|
|
if (p->statecount == 1) {
|
|
|
|
p->error = "unexpected delimiter";
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if ((c == ')' && (state->flags & PFLAG_PARENS)) ||
|
|
|
|
(c == ']' && (state->flags & PFLAG_SQRBRACKETS))) {
|
|
|
|
if (state->flags & PFLAG_ATSYM) {
|
|
|
|
ds = close_array(p, state);
|
|
|
|
} else {
|
|
|
|
ds = close_tuple(p, state, c == ']' ? JANET_TUPLE_FLAG_BRACKETCTOR : 0);
|
|
|
|
}
|
|
|
|
} else if (c == '}' && (state->flags & PFLAG_CURLYBRACKETS)) {
|
|
|
|
if (state->argn & 1) {
|
|
|
|
p->error = "struct and table literals expect even number of arguments";
|
2019-01-07 02:49:24 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
if (state->flags & PFLAG_ATSYM) {
|
|
|
|
ds = close_table(p, state);
|
2019-01-07 19:49:38 +00:00
|
|
|
} else {
|
2019-02-20 01:51:34 +00:00
|
|
|
ds = close_struct(p, state);
|
2019-01-07 02:49:24 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
} else {
|
|
|
|
p->error = "mismatched delimiter";
|
|
|
|
return 1;
|
2019-01-07 02:49:24 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
popstate(p, ds);
|
|
|
|
}
|
|
|
|
return 1;
|
2018-01-18 22:25:45 +00:00
|
|
|
case '(':
|
2019-01-07 02:49:24 +00:00
|
|
|
pushstate(p, root, PFLAG_CONTAINER | PFLAG_PARENS);
|
2018-01-18 22:25:45 +00:00
|
|
|
return 1;
|
|
|
|
case '[':
|
2019-01-07 02:49:24 +00:00
|
|
|
pushstate(p, root, PFLAG_CONTAINER | PFLAG_SQRBRACKETS);
|
2018-01-18 22:25:45 +00:00
|
|
|
return 1;
|
|
|
|
case '{':
|
2019-01-07 02:49:24 +00:00
|
|
|
pushstate(p, root, PFLAG_CONTAINER | PFLAG_CURLYBRACKETS);
|
2018-01-18 22:25:45 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
2017-12-30 21:46:59 +00:00
|
|
|
|
2019-02-27 18:09:19 +00:00
|
|
|
static void janet_parser_checkdead(JanetParser *parser) {
|
|
|
|
if (parser->flag) janet_panic("parser is dead, cannot consume");
|
|
|
|
if (parser->error) janet_panic("parser has unchecked error, cannot consume");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Public API */
|
|
|
|
|
|
|
|
void janet_parser_consume(JanetParser *parser, uint8_t c) {
|
2018-01-18 22:25:45 +00:00
|
|
|
int consumed = 0;
|
2019-02-27 18:09:19 +00:00
|
|
|
janet_parser_checkdead(parser);
|
2018-12-17 17:06:50 +00:00
|
|
|
parser->offset++;
|
2018-01-18 22:25:45 +00:00
|
|
|
while (!consumed && !parser->error) {
|
2018-09-06 02:18:42 +00:00
|
|
|
JanetParseState *state = parser->states + parser->statecount - 1;
|
2018-01-18 22:25:45 +00:00
|
|
|
consumed = state->consumer(parser, state, c);
|
|
|
|
}
|
|
|
|
parser->lookback = c;
|
2019-02-27 18:09:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void janet_parser_eof(JanetParser *parser) {
|
|
|
|
janet_parser_checkdead(parser);
|
|
|
|
janet_parser_consume(parser, '\n');
|
2019-03-07 21:17:18 +00:00
|
|
|
if (parser->statecount > 1) {
|
|
|
|
parser->error = "unexpected end of source";
|
|
|
|
}
|
2019-02-27 18:09:19 +00:00
|
|
|
parser->offset--;
|
|
|
|
parser->flag = 1;
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2017-09-09 18:39:51 +00:00
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
enum JanetParserStatus janet_parser_status(JanetParser *parser) {
|
|
|
|
if (parser->error) return JANET_PARSE_ERROR;
|
2019-02-27 18:09:19 +00:00
|
|
|
if (parser->flag) return JANET_PARSE_DEAD;
|
2018-09-06 02:18:42 +00:00
|
|
|
if (parser->statecount > 1) return JANET_PARSE_PENDING;
|
|
|
|
return JANET_PARSE_ROOT;
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2017-09-09 18:39:51 +00:00
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
void janet_parser_flush(JanetParser *parser) {
|
2018-05-16 02:03:45 +00:00
|
|
|
parser->argcount = 0;
|
|
|
|
parser->statecount = 1;
|
|
|
|
parser->bufcount = 0;
|
2019-01-04 01:44:58 +00:00
|
|
|
parser->pending = 0;
|
2018-05-07 03:25:59 +00:00
|
|
|
}
|
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
const char *janet_parser_error(JanetParser *parser) {
|
|
|
|
enum JanetParserStatus status = janet_parser_status(parser);
|
|
|
|
if (status == JANET_PARSE_ERROR) {
|
2018-01-18 22:25:45 +00:00
|
|
|
const char *e = parser->error;
|
|
|
|
parser->error = NULL;
|
2018-09-06 02:18:42 +00:00
|
|
|
janet_parser_flush(parser);
|
2018-01-18 22:25:45 +00:00
|
|
|
return e;
|
|
|
|
}
|
2017-12-30 21:46:59 +00:00
|
|
|
return NULL;
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2017-12-30 21:46:59 +00:00
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
Janet janet_parser_produce(JanetParser *parser) {
|
|
|
|
Janet ret;
|
2018-05-16 02:03:45 +00:00
|
|
|
size_t i;
|
2019-01-04 01:44:58 +00:00
|
|
|
if (parser->pending == 0) return janet_wrap_nil();
|
2018-05-16 02:03:45 +00:00
|
|
|
ret = parser->args[0];
|
|
|
|
for (i = 1; i < parser->argcount; i++) {
|
|
|
|
parser->args[i - 1] = parser->args[i];
|
2018-01-24 22:59:00 +00:00
|
|
|
}
|
2019-01-04 01:44:58 +00:00
|
|
|
parser->pending--;
|
2018-05-16 02:03:45 +00:00
|
|
|
parser->argcount--;
|
2018-01-18 22:25:45 +00:00
|
|
|
return ret;
|
|
|
|
}
|
2017-09-09 18:39:51 +00:00
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
void janet_parser_init(JanetParser *parser) {
|
2018-05-16 02:03:45 +00:00
|
|
|
parser->args = NULL;
|
2018-01-18 22:25:45 +00:00
|
|
|
parser->states = NULL;
|
|
|
|
parser->buf = NULL;
|
2018-05-16 02:03:45 +00:00
|
|
|
parser->argcount = 0;
|
|
|
|
parser->argcap = 0;
|
|
|
|
parser->bufcount = 0;
|
|
|
|
parser->bufcap = 0;
|
|
|
|
parser->statecount = 0;
|
|
|
|
parser->statecap = 0;
|
2018-01-18 22:25:45 +00:00
|
|
|
parser->error = NULL;
|
|
|
|
parser->lookback = -1;
|
2018-12-13 23:46:53 +00:00
|
|
|
parser->offset = 0;
|
2019-01-04 01:44:58 +00:00
|
|
|
parser->pending = 0;
|
2019-02-27 18:09:19 +00:00
|
|
|
parser->flag = 0;
|
2018-06-29 03:36:31 +00:00
|
|
|
|
2018-01-18 22:25:45 +00:00
|
|
|
pushstate(parser, root, PFLAG_CONTAINER);
|
|
|
|
}
|
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
void janet_parser_deinit(JanetParser *parser) {
|
2018-05-16 02:03:45 +00:00
|
|
|
free(parser->args);
|
|
|
|
free(parser->buf);
|
|
|
|
free(parser->states);
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
2017-11-01 21:53:43 +00:00
|
|
|
|
2019-04-27 19:45:28 +00:00
|
|
|
int janet_parser_has_more(JanetParser *parser) {
|
2019-04-27 20:50:40 +00:00
|
|
|
return !!parser->pending;
|
2019-04-27 19:45:28 +00:00
|
|
|
}
|
|
|
|
|
2018-01-18 22:25:45 +00:00
|
|
|
/* C functions */
|
2017-11-06 03:05:47 +00:00
|
|
|
|
2018-01-18 22:25:45 +00:00
|
|
|
static int parsermark(void *p, size_t size) {
|
2018-05-16 02:03:45 +00:00
|
|
|
size_t i;
|
2018-09-06 02:18:42 +00:00
|
|
|
JanetParser *parser = (JanetParser *)p;
|
2018-01-18 22:25:45 +00:00
|
|
|
(void) size;
|
2018-05-16 02:03:45 +00:00
|
|
|
for (i = 0; i < parser->argcount; i++) {
|
2018-09-06 02:18:42 +00:00
|
|
|
janet_mark(parser->args[i]);
|
2018-01-18 22:25:45 +00:00
|
|
|
}
|
|
|
|
return 0;
|
2017-09-09 18:39:51 +00:00
|
|
|
}
|
|
|
|
|
2018-01-18 22:25:45 +00:00
|
|
|
static int parsergc(void *p, size_t size) {
|
2018-09-06 02:18:42 +00:00
|
|
|
JanetParser *parser = (JanetParser *)p;
|
2018-01-18 22:25:45 +00:00
|
|
|
(void) size;
|
2018-09-06 02:18:42 +00:00
|
|
|
janet_parser_deinit(parser);
|
2018-01-18 22:25:45 +00:00
|
|
|
return 0;
|
2017-09-09 18:39:51 +00:00
|
|
|
}
|
2018-01-16 04:31:39 +00:00
|
|
|
|
2019-02-06 00:43:41 +00:00
|
|
|
static Janet parserget(void *p, Janet key);
|
|
|
|
|
2018-09-06 02:18:42 +00:00
|
|
|
static JanetAbstractType janet_parse_parsertype = {
|
2019-01-03 00:41:07 +00:00
|
|
|
"core/parser",
|
2018-01-18 22:25:45 +00:00
|
|
|
parsergc,
|
2019-02-05 16:14:13 +00:00
|
|
|
parsermark,
|
2019-02-06 00:43:41 +00:00
|
|
|
parserget,
|
2019-02-24 01:02:54 +00:00
|
|
|
NULL,
|
|
|
|
NULL,
|
2019-03-19 17:36:26 +00:00
|
|
|
NULL,
|
2019-02-05 16:14:13 +00:00
|
|
|
NULL
|
2018-01-18 22:25:45 +00:00
|
|
|
};
|
|
|
|
|
2018-01-16 04:31:39 +00:00
|
|
|
/* C Function parser */
|
2019-01-24 05:15:58 +00:00
|
|
|
static Janet cfun_parse_parser(int32_t argc, Janet *argv) {
|
2019-01-06 01:09:03 +00:00
|
|
|
(void) argv;
|
2019-01-06 01:45:24 +00:00
|
|
|
janet_fixarity(argc, 0);
|
2018-09-06 02:18:42 +00:00
|
|
|
JanetParser *p = janet_abstract(&janet_parse_parsertype, sizeof(JanetParser));
|
|
|
|
janet_parser_init(p);
|
2019-01-06 01:09:03 +00:00
|
|
|
return janet_wrap_abstract(p);
|
|
|
|
}
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
static Janet cfun_parse_consume(int32_t argc, Janet *argv) {
|
2019-01-06 01:09:03 +00:00
|
|
|
janet_arity(argc, 2, 3);
|
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
|
|
|
JanetByteView view = janet_getbytes(argv, 1);
|
|
|
|
if (argc == 3) {
|
|
|
|
int32_t offset = janet_getinteger(argv, 2);
|
|
|
|
if (offset < 0 || offset > view.len)
|
|
|
|
janet_panicf("invalid offset %d out of range [0,%d]", offset, view.len);
|
|
|
|
view.len -= offset;
|
|
|
|
view.bytes += offset;
|
2018-11-26 14:02:07 +00:00
|
|
|
}
|
2019-01-06 01:09:03 +00:00
|
|
|
int32_t i;
|
|
|
|
for (i = 0; i < view.len; i++) {
|
|
|
|
janet_parser_consume(p, view.bytes[i]);
|
2018-09-06 02:18:42 +00:00
|
|
|
switch (janet_parser_status(p)) {
|
|
|
|
case JANET_PARSE_ROOT:
|
|
|
|
case JANET_PARSE_PENDING:
|
2018-01-27 20:15:09 +00:00
|
|
|
break;
|
|
|
|
default:
|
2019-01-06 01:09:03 +00:00
|
|
|
return janet_wrap_integer(i + 1);
|
2018-01-27 20:15:09 +00:00
|
|
|
}
|
|
|
|
}
|
2019-01-06 01:09:03 +00:00
|
|
|
return janet_wrap_integer(i);
|
2018-01-27 20:15:09 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 18:09:19 +00:00
|
|
|
static Janet cfun_parse_eof(int32_t argc, Janet *argv) {
|
|
|
|
janet_fixarity(argc, 1);
|
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
|
|
|
janet_parser_eof(p);
|
|
|
|
return argv[0];
|
|
|
|
}
|
|
|
|
|
2019-01-31 19:48:28 +00:00
|
|
|
static Janet cfun_parse_insert(int32_t argc, Janet *argv) {
|
|
|
|
janet_fixarity(argc, 2);
|
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
|
|
|
JanetParseState *s = p->states + p->statecount - 1;
|
|
|
|
if (s->consumer == tokenchar) {
|
|
|
|
janet_parser_consume(p, ' ');
|
|
|
|
p->offset--;
|
|
|
|
s = p->states + p->statecount - 1;
|
|
|
|
}
|
|
|
|
if (s->flags & PFLAG_CONTAINER) {
|
|
|
|
s->argn++;
|
|
|
|
if (p->statecount == 1) p->pending++;
|
|
|
|
push_arg(p, argv[1]);
|
|
|
|
} else if (s->flags & (PFLAG_STRING | PFLAG_LONGSTRING)) {
|
|
|
|
const uint8_t *str = janet_to_string(argv[1]);
|
|
|
|
int32_t slen = janet_string_length(str);
|
|
|
|
size_t newcount = p->bufcount + slen;
|
2019-04-13 18:38:30 +00:00
|
|
|
if (p->bufcap < newcount) {
|
2019-01-31 19:48:28 +00:00
|
|
|
size_t newcap = 2 * newcount;
|
|
|
|
p->buf = realloc(p->buf, newcap);
|
|
|
|
if (p->buf == NULL) {
|
|
|
|
JANET_OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
p->bufcap = newcap;
|
|
|
|
}
|
|
|
|
memcpy(p->buf + p->bufcount, str, slen);
|
|
|
|
p->bufcount = newcount;
|
|
|
|
} else {
|
|
|
|
janet_panic("cannot insert value into parser");
|
|
|
|
}
|
|
|
|
return argv[0];
|
|
|
|
}
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
static Janet cfun_parse_has_more(int32_t argc, Janet *argv) {
|
2019-01-06 01:45:24 +00:00
|
|
|
janet_fixarity(argc, 1);
|
2019-01-06 01:09:03 +00:00
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
|
|
|
return janet_wrap_boolean(janet_parser_has_more(p));
|
2019-01-04 01:44:58 +00:00
|
|
|
}
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
static Janet cfun_parse_byte(int32_t argc, Janet *argv) {
|
2019-01-06 01:45:24 +00:00
|
|
|
janet_fixarity(argc, 2);
|
2019-01-06 01:09:03 +00:00
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
|
|
|
int32_t i = janet_getinteger(argv, 1);
|
2018-09-06 02:18:42 +00:00
|
|
|
janet_parser_consume(p, 0xFF & i);
|
2019-01-06 01:09:03 +00:00
|
|
|
return argv[0];
|
2018-02-07 05:44:51 +00:00
|
|
|
}
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
static Janet cfun_parse_status(int32_t argc, Janet *argv) {
|
2019-01-06 01:45:24 +00:00
|
|
|
janet_fixarity(argc, 1);
|
2019-01-06 01:09:03 +00:00
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
2018-01-27 20:15:09 +00:00
|
|
|
const char *stat = NULL;
|
2018-09-06 02:18:42 +00:00
|
|
|
switch (janet_parser_status(p)) {
|
|
|
|
case JANET_PARSE_PENDING:
|
2019-01-03 00:41:07 +00:00
|
|
|
stat = "pending";
|
2018-01-27 20:15:09 +00:00
|
|
|
break;
|
2018-09-06 02:18:42 +00:00
|
|
|
case JANET_PARSE_ERROR:
|
2019-01-03 00:41:07 +00:00
|
|
|
stat = "error";
|
2018-01-27 20:15:09 +00:00
|
|
|
break;
|
2018-09-06 02:18:42 +00:00
|
|
|
case JANET_PARSE_ROOT:
|
2019-01-03 00:41:07 +00:00
|
|
|
stat = "root";
|
2018-01-27 20:15:09 +00:00
|
|
|
break;
|
2019-02-27 18:09:19 +00:00
|
|
|
case JANET_PARSE_DEAD:
|
|
|
|
stat = "dead";
|
|
|
|
break;
|
2018-01-27 20:15:09 +00:00
|
|
|
}
|
2019-01-06 01:09:03 +00:00
|
|
|
return janet_ckeywordv(stat);
|
2018-01-27 20:15:09 +00:00
|
|
|
}
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
static Janet cfun_parse_error(int32_t argc, Janet *argv) {
|
2019-01-06 01:45:24 +00:00
|
|
|
janet_fixarity(argc, 1);
|
2019-01-06 01:09:03 +00:00
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
|
|
|
const char *err = janet_parser_error(p);
|
|
|
|
if (err) return janet_cstringv(err);
|
|
|
|
return janet_wrap_nil();
|
2018-01-27 20:15:09 +00:00
|
|
|
}
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
static Janet cfun_parse_produce(int32_t argc, Janet *argv) {
|
2019-01-06 01:45:24 +00:00
|
|
|
janet_fixarity(argc, 1);
|
2019-01-06 01:09:03 +00:00
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
|
|
|
return janet_parser_produce(p);
|
|
|
|
}
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
static Janet cfun_parse_flush(int32_t argc, Janet *argv) {
|
2019-01-06 01:45:24 +00:00
|
|
|
janet_fixarity(argc, 1);
|
2019-01-06 01:09:03 +00:00
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
2018-09-06 02:18:42 +00:00
|
|
|
janet_parser_flush(p);
|
2019-01-06 01:09:03 +00:00
|
|
|
return argv[0];
|
2018-05-07 03:25:59 +00:00
|
|
|
}
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
static Janet cfun_parse_where(int32_t argc, Janet *argv) {
|
2019-01-06 01:45:24 +00:00
|
|
|
janet_fixarity(argc, 1);
|
2019-01-06 01:09:03 +00:00
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
|
|
|
return janet_wrap_integer(p->offset);
|
2018-06-29 15:12:33 +00:00
|
|
|
}
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
static Janet cfun_parse_state(int32_t argc, Janet *argv) {
|
2018-05-16 02:03:45 +00:00
|
|
|
size_t i;
|
2018-05-07 03:25:59 +00:00
|
|
|
const uint8_t *str;
|
2018-05-16 02:03:45 +00:00
|
|
|
size_t oldcount;
|
2019-01-06 01:45:24 +00:00
|
|
|
janet_fixarity(argc, 1);
|
2019-01-06 01:09:03 +00:00
|
|
|
JanetParser *p = janet_getabstract(argv, 0, &janet_parse_parsertype);
|
2018-05-16 02:03:45 +00:00
|
|
|
oldcount = p->bufcount;
|
|
|
|
for (i = 0; i < p->statecount; i++) {
|
2018-09-06 02:18:42 +00:00
|
|
|
JanetParseState *s = p->states + i;
|
2018-05-07 03:25:59 +00:00
|
|
|
if (s->flags & PFLAG_PARENS) {
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, '(');
|
2018-05-07 03:25:59 +00:00
|
|
|
} else if (s->flags & PFLAG_SQRBRACKETS) {
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, '[');
|
2018-05-07 03:25:59 +00:00
|
|
|
} else if (s->flags & PFLAG_CURLYBRACKETS) {
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, '{');
|
2018-05-07 03:25:59 +00:00
|
|
|
} else if (s->flags & PFLAG_STRING) {
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, '"');
|
2018-05-07 05:04:24 +00:00
|
|
|
} else if (s->flags & PFLAG_LONGSTRING) {
|
|
|
|
int32_t i;
|
|
|
|
for (i = 0; i < s->argn; i++) {
|
2018-05-16 02:03:45 +00:00
|
|
|
push_buf(p, '`');
|
2018-05-07 05:04:24 +00:00
|
|
|
}
|
2018-05-07 03:25:59 +00:00
|
|
|
}
|
|
|
|
}
|
2018-09-06 02:18:42 +00:00
|
|
|
str = janet_string(p->buf + oldcount, (int32_t)(p->bufcount - oldcount));
|
2018-05-16 02:03:45 +00:00
|
|
|
p->bufcount = oldcount;
|
2019-01-06 01:09:03 +00:00
|
|
|
return janet_wrap_string(str);
|
2018-05-07 03:25:59 +00:00
|
|
|
}
|
|
|
|
|
2019-02-06 00:43:41 +00:00
|
|
|
static const JanetMethod parser_methods[] = {
|
|
|
|
{"byte", cfun_parse_byte},
|
|
|
|
{"consume", cfun_parse_consume},
|
|
|
|
{"error", cfun_parse_error},
|
|
|
|
{"flush", cfun_parse_flush},
|
|
|
|
{"has-more", cfun_parse_has_more},
|
|
|
|
{"insert", cfun_parse_insert},
|
|
|
|
{"produce", cfun_parse_produce},
|
|
|
|
{"state", cfun_parse_state},
|
|
|
|
{"status", cfun_parse_status},
|
|
|
|
{"where", cfun_parse_where},
|
2019-02-27 18:09:19 +00:00
|
|
|
{"eof", cfun_parse_eof},
|
2019-02-06 00:43:41 +00:00
|
|
|
{NULL, NULL}
|
|
|
|
};
|
|
|
|
|
|
|
|
static Janet parserget(void *p, Janet key) {
|
|
|
|
(void) p;
|
|
|
|
if (!janet_checktype(key, JANET_KEYWORD)) janet_panicf("expected keyword method");
|
|
|
|
return janet_getmethod(janet_unwrap_keyword(key), parser_methods);
|
|
|
|
}
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
static const JanetReg parse_cfuns[] = {
|
2019-01-06 06:49:56 +00:00
|
|
|
{
|
2019-01-24 05:15:58 +00:00
|
|
|
"parser/new", cfun_parse_parser,
|
2019-01-06 06:49:56 +00:00
|
|
|
JDOC("(parser/new)\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"Creates and returns a new parser object. Parsers are state machines "
|
|
|
|
"that can receive bytes, and generate a stream of janet values. ")
|
2018-11-16 21:24:10 +00:00
|
|
|
},
|
2019-01-06 06:49:56 +00:00
|
|
|
{
|
2019-01-24 05:15:58 +00:00
|
|
|
"parser/has-more", cfun_parse_has_more,
|
2019-01-06 06:49:56 +00:00
|
|
|
JDOC("(parser/has-more parser)\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"Check if the parser has more values in the value queue.")
|
2019-01-04 01:44:58 +00:00
|
|
|
},
|
2019-01-06 06:49:56 +00:00
|
|
|
{
|
2019-01-24 05:15:58 +00:00
|
|
|
"parser/produce", cfun_parse_produce,
|
2019-01-06 06:49:56 +00:00
|
|
|
JDOC("(parser/produce parser)\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"Dequeue the next value in the parse queue. Will return nil if "
|
|
|
|
"no parsed values are in the queue, otherwise will dequeue the "
|
|
|
|
"next value.")
|
2018-11-16 21:24:10 +00:00
|
|
|
},
|
2019-01-06 06:49:56 +00:00
|
|
|
{
|
2019-01-24 05:15:58 +00:00
|
|
|
"parser/consume", cfun_parse_consume,
|
2019-01-06 06:49:56 +00:00
|
|
|
JDOC("(parser/consume parser bytes [, index])\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"Input bytes into the parser and parse them. Will not throw errors "
|
|
|
|
"if there is a parse error. Starts at the byte index given by index. Returns "
|
|
|
|
"the number of bytes read.")
|
2018-11-16 21:24:10 +00:00
|
|
|
},
|
2019-01-06 06:49:56 +00:00
|
|
|
{
|
2019-01-24 05:15:58 +00:00
|
|
|
"parser/byte", cfun_parse_byte,
|
2019-01-06 06:49:56 +00:00
|
|
|
JDOC("(parser/byte parser b)\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"Input a single byte into the parser byte stream. Returns the parser.")
|
2018-11-16 21:24:10 +00:00
|
|
|
},
|
2019-01-06 06:49:56 +00:00
|
|
|
{
|
2019-01-24 05:15:58 +00:00
|
|
|
"parser/error", cfun_parse_error,
|
2019-01-06 06:49:56 +00:00
|
|
|
JDOC("(parser/error parser)\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"If the parser is in the error state, returns the message associated with "
|
|
|
|
"that error. Otherwise, returns nil. Also flushes the parser state and parser "
|
|
|
|
"queue, so be sure to handle everything in the queue before calling "
|
|
|
|
"parser/error.")
|
2018-11-16 21:24:10 +00:00
|
|
|
},
|
2019-01-06 06:49:56 +00:00
|
|
|
{
|
2019-01-24 05:15:58 +00:00
|
|
|
"parser/status", cfun_parse_status,
|
2019-01-06 06:49:56 +00:00
|
|
|
JDOC("(parser/status parser)\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"Gets the current status of the parser state machine. The status will "
|
|
|
|
"be one of:\n\n"
|
|
|
|
"\t:pending - a value is being parsed.\n"
|
|
|
|
"\t:error - a parsing error was encountered.\n"
|
|
|
|
"\t:root - the parser can either read more values or safely terminate.")
|
2018-11-16 21:24:10 +00:00
|
|
|
},
|
2019-01-06 06:49:56 +00:00
|
|
|
{
|
2019-01-24 05:15:58 +00:00
|
|
|
"parser/flush", cfun_parse_flush,
|
2019-01-06 06:49:56 +00:00
|
|
|
JDOC("(parser/flush parser)\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"Clears the parser state and parse queue. Can be used to reset the parser "
|
|
|
|
"if an error was encountered. Does not reset the line and column counter, so "
|
|
|
|
"to begin parsing in a new context, create a new parser.")
|
2018-11-16 21:24:10 +00:00
|
|
|
},
|
2019-01-06 06:49:56 +00:00
|
|
|
{
|
2019-01-24 05:15:58 +00:00
|
|
|
"parser/state", cfun_parse_state,
|
2019-01-06 06:49:56 +00:00
|
|
|
JDOC("(parser/state parser)\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"Returns a string representation of the internal state of the parser. "
|
|
|
|
"Each byte in the string represents a nested data structure. For example, "
|
|
|
|
"if the parser state is '([\"', then the parser is in the middle of parsing a "
|
|
|
|
"string inside of square brackets inside parentheses. Can be used to augment a REPL prompt.")
|
2018-11-16 21:24:10 +00:00
|
|
|
},
|
2019-01-06 06:49:56 +00:00
|
|
|
{
|
2019-01-24 05:15:58 +00:00
|
|
|
"parser/where", cfun_parse_where,
|
2019-01-06 06:49:56 +00:00
|
|
|
JDOC("(parser/where parser)\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"Returns the current line number and column number of the parser's location "
|
|
|
|
"in the byte stream as a tuple (line, column). Lines and columns are counted from "
|
|
|
|
"1, (the first byte is line 1, column 1) and a newline is considered ASCII 0x0A.")
|
2018-11-16 21:24:10 +00:00
|
|
|
},
|
2019-02-27 18:09:19 +00:00
|
|
|
{
|
|
|
|
"parser/eof", cfun_parse_eof,
|
2019-05-17 08:58:06 +00:00
|
|
|
JDOC("(parser/eof parser)\n\n"
|
2019-02-27 18:09:19 +00:00
|
|
|
"Indicate that the end of file was reached to the parser. This puts the parser in the :dead state.")
|
|
|
|
},
|
2019-01-31 19:48:28 +00:00
|
|
|
{
|
|
|
|
"parser/insert", cfun_parse_insert,
|
|
|
|
JDOC("(parser/insert parser value)\n\n"
|
2019-02-20 01:51:34 +00:00
|
|
|
"Insert a value into the parser. This means that the parser state can be manipulated "
|
|
|
|
"in between chunks of bytes. This would allow a user to add extra elements to arrays "
|
|
|
|
"and tuples, for example. Returns the parser.")
|
2019-01-31 19:48:28 +00:00
|
|
|
},
|
2018-11-15 20:45:41 +00:00
|
|
|
{NULL, NULL, NULL}
|
2018-01-27 20:15:09 +00:00
|
|
|
};
|
|
|
|
|
2018-01-18 22:25:45 +00:00
|
|
|
/* Load the library */
|
2019-01-06 01:09:03 +00:00
|
|
|
void janet_lib_parse(JanetTable *env) {
|
2019-02-08 05:44:30 +00:00
|
|
|
janet_core_cfuns(env, NULL, parse_cfuns);
|
2018-01-16 04:31:39 +00:00
|
|
|
}
|