mirror of
https://github.com/janet-lang/janet
synced 2024-11-29 11:29:54 +00:00
333ae7c4f8
This way we can support fewer build configurations. Also, remove all undefined behavior due to use of memcpy with NULL pointers. GCC was exploiting this to remove NULL checks in some builds.
1330 lines
43 KiB
C
1330 lines
43 KiB
C
/*
|
|
* Copyright (c) 2020 Calvin Rose
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal in the Software without restriction, including without limitation the
|
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef JANET_AMALG
|
|
#include "features.h"
|
|
#include <janet.h>
|
|
#include <string.h>
|
|
#include "util.h"
|
|
#include "vector.h"
|
|
#include "util.h"
|
|
#endif
|
|
|
|
#ifdef JANET_PEG
|
|
|
|
/*
|
|
* Runtime
|
|
*/
|
|
|
|
/* opcodes for peg vm */
|
|
typedef enum {
|
|
RULE_LITERAL, /* [len, bytes...] */
|
|
RULE_NCHAR, /* [n] */
|
|
RULE_NOTNCHAR, /* [n] */
|
|
RULE_RANGE, /* [lo | hi << 16 (1 word)] */
|
|
RULE_SET, /* [bitmap (8 words)] */
|
|
RULE_LOOK, /* [offset, rule] */
|
|
RULE_CHOICE, /* [len, rules...] */
|
|
RULE_SEQUENCE, /* [len, rules...] */
|
|
RULE_IF, /* [rule_a, rule_b (b if a)] */
|
|
RULE_IFNOT, /* [rule_a, rule_b (b if not a)] */
|
|
RULE_NOT, /* [rule] */
|
|
RULE_BETWEEN, /* [lo, hi, rule] */
|
|
RULE_GETTAG, /* [searchtag, tag] */
|
|
RULE_CAPTURE, /* [rule, tag] */
|
|
RULE_POSITION, /* [tag] */
|
|
RULE_ARGUMENT, /* [argument-index, tag] */
|
|
RULE_CONSTANT, /* [constant, tag] */
|
|
RULE_ACCUMULATE, /* [rule, tag] */
|
|
RULE_GROUP, /* [rule, tag] */
|
|
RULE_REPLACE, /* [rule, constant, tag] */
|
|
RULE_MATCHTIME, /* [rule, constant, tag] */
|
|
RULE_ERROR, /* [rule] */
|
|
RULE_DROP, /* [rule] */
|
|
RULE_BACKMATCH, /* [tag] */
|
|
} Opcode;
|
|
|
|
/* Hold captured patterns and match state */
|
|
typedef struct {
|
|
const uint8_t *text_start;
|
|
const uint8_t *text_end;
|
|
const uint32_t *bytecode;
|
|
const Janet *constants;
|
|
JanetArray *captures;
|
|
JanetBuffer *scratch;
|
|
JanetBuffer *tags;
|
|
const Janet *extrav;
|
|
int32_t extrac;
|
|
int32_t depth;
|
|
enum {
|
|
PEG_MODE_NORMAL,
|
|
PEG_MODE_ACCUMULATE
|
|
} mode;
|
|
} PegState;
|
|
|
|
/* Allow backtrack with captures. We need
|
|
* to save state at branches, and then reload
|
|
* if one branch fails and try a new branch. */
|
|
typedef struct {
|
|
int32_t cap;
|
|
int32_t scratch;
|
|
} CapState;
|
|
|
|
/* Save the current capture state */
|
|
static CapState cap_save(PegState *s) {
|
|
CapState cs;
|
|
cs.scratch = s->scratch->count;
|
|
cs.cap = s->captures->count;
|
|
return cs;
|
|
}
|
|
|
|
/* Load a saved capture state in the case of failure */
|
|
static void cap_load(PegState *s, CapState cs) {
|
|
s->scratch->count = cs.scratch;
|
|
s->captures->count = cs.cap;
|
|
s->tags->count = cs.cap;
|
|
}
|
|
|
|
/* Add a capture */
|
|
static void pushcap(PegState *s, Janet capture, uint32_t tag) {
|
|
if (s->mode == PEG_MODE_ACCUMULATE) {
|
|
janet_to_string_b(s->scratch, capture);
|
|
}
|
|
if (tag || s->mode == PEG_MODE_NORMAL) {
|
|
janet_array_push(s->captures, capture);
|
|
janet_buffer_push_u8(s->tags, tag);
|
|
}
|
|
}
|
|
|
|
/* Prevent stack overflow */
|
|
#define down1(s) do { \
|
|
if (0 == --((s)->depth)) janet_panic("peg/match recursed too deeply"); \
|
|
} while (0)
|
|
#define up1(s) ((s)->depth++)
|
|
|
|
/* Evaluate a peg rule
|
|
* Pre-conditions: s is in a valid state
|
|
* Post-conditions: If there is a match, returns a pointer to the next text.
|
|
* All captures on the capture stack are valid. If there is no match,
|
|
* returns NULL. Extra captures from successful child expressions can be
|
|
* left on the capture stack.
|
|
*/
|
|
static const uint8_t *peg_rule(
|
|
PegState *s,
|
|
const uint32_t *rule,
|
|
const uint8_t *text) {
|
|
tail:
|
|
switch (*rule & 0x1F) {
|
|
default:
|
|
janet_panic("unexpected opcode");
|
|
return NULL;
|
|
|
|
case RULE_LITERAL: {
|
|
uint32_t len = rule[1];
|
|
if (text + len > s->text_end) return NULL;
|
|
return memcmp(text, rule + 2, len) ? NULL : text + len;
|
|
}
|
|
|
|
case RULE_NCHAR: {
|
|
uint32_t n = rule[1];
|
|
return (text + n > s->text_end) ? NULL : text + n;
|
|
}
|
|
|
|
case RULE_NOTNCHAR: {
|
|
uint32_t n = rule[1];
|
|
return (text + n > s->text_end) ? text : NULL;
|
|
}
|
|
|
|
case RULE_RANGE: {
|
|
uint8_t lo = rule[1] & 0xFF;
|
|
uint8_t hi = (rule[1] >> 16) & 0xFF;
|
|
return (text < s->text_end &&
|
|
text[0] >= lo &&
|
|
text[0] <= hi)
|
|
? text + 1
|
|
: NULL;
|
|
}
|
|
|
|
case RULE_SET: {
|
|
uint32_t word = rule[1 + (text[0] >> 5)];
|
|
uint32_t mask = (uint32_t)1 << (text[0] & 0x1F);
|
|
return (text < s->text_end && (word & mask))
|
|
? text + 1
|
|
: NULL;
|
|
}
|
|
|
|
case RULE_LOOK: {
|
|
text += ((int32_t *)rule)[1];
|
|
if (text < s->text_start || text > s->text_end) return NULL;
|
|
down1(s);
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[2], text);
|
|
up1(s);
|
|
return result ? text : NULL;
|
|
}
|
|
|
|
case RULE_CHOICE: {
|
|
uint32_t len = rule[1];
|
|
const uint32_t *args = rule + 2;
|
|
if (len == 0) return NULL;
|
|
down1(s);
|
|
CapState cs = cap_save(s);
|
|
for (uint32_t i = 0; i < len - 1; i++) {
|
|
const uint8_t *result = peg_rule(s, s->bytecode + args[i], text);
|
|
if (result) {
|
|
up1(s);
|
|
return result;
|
|
}
|
|
cap_load(s, cs);
|
|
}
|
|
up1(s);
|
|
rule = s->bytecode + args[len - 1];
|
|
goto tail;
|
|
}
|
|
|
|
case RULE_SEQUENCE: {
|
|
uint32_t len = rule[1];
|
|
const uint32_t *args = rule + 2;
|
|
if (len == 0) return text;
|
|
down1(s);
|
|
for (uint32_t i = 0; text && i < len - 1; i++)
|
|
text = peg_rule(s, s->bytecode + args[i], text);
|
|
up1(s);
|
|
if (!text) return NULL;
|
|
rule = s->bytecode + args[len - 1];
|
|
goto tail;
|
|
}
|
|
|
|
case RULE_IF:
|
|
case RULE_IFNOT: {
|
|
const uint32_t *rule_a = s->bytecode + rule[1];
|
|
const uint32_t *rule_b = s->bytecode + rule[2];
|
|
down1(s);
|
|
const uint8_t *result = peg_rule(s, rule_a, text);
|
|
up1(s);
|
|
if (rule[0] == RULE_IF ? !result : !!result) return NULL;
|
|
rule = rule_b;
|
|
goto tail;
|
|
}
|
|
|
|
case RULE_NOT: {
|
|
const uint32_t *rule_a = s->bytecode + rule[1];
|
|
down1(s);
|
|
const uint8_t *result = peg_rule(s, rule_a, text);
|
|
up1(s);
|
|
return (result) ? NULL : text;
|
|
}
|
|
|
|
case RULE_BETWEEN: {
|
|
uint32_t lo = rule[1];
|
|
uint32_t hi = rule[2];
|
|
const uint32_t *rule_a = s->bytecode + rule[3];
|
|
uint32_t captured = 0;
|
|
const uint8_t *next_text;
|
|
CapState cs = cap_save(s);
|
|
down1(s);
|
|
while (captured < hi) {
|
|
CapState cs2 = cap_save(s);
|
|
next_text = peg_rule(s, rule_a, text);
|
|
if (!next_text || next_text == text) {
|
|
cap_load(s, cs2);
|
|
break;
|
|
}
|
|
captured++;
|
|
text = next_text;
|
|
}
|
|
up1(s);
|
|
if (captured < lo) {
|
|
cap_load(s, cs);
|
|
return NULL;
|
|
}
|
|
return text;
|
|
}
|
|
|
|
/* Capturing rules */
|
|
|
|
case RULE_GETTAG: {
|
|
uint32_t search = rule[1];
|
|
uint32_t tag = rule[2];
|
|
for (int32_t i = s->tags->count - 1; i >= 0; i--) {
|
|
if (s->tags->data[i] == search) {
|
|
pushcap(s, s->captures->data[i], tag);
|
|
return text;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
case RULE_POSITION: {
|
|
pushcap(s, janet_wrap_number((double)(text - s->text_start)), rule[1]);
|
|
return text;
|
|
}
|
|
|
|
case RULE_ARGUMENT: {
|
|
int32_t index = ((int32_t *)rule)[1];
|
|
Janet capture = (index >= s->extrac) ? janet_wrap_nil() : s->extrav[index];
|
|
pushcap(s, capture, rule[2]);
|
|
return text;
|
|
}
|
|
|
|
case RULE_CONSTANT: {
|
|
pushcap(s, s->constants[rule[1]], rule[2]);
|
|
return text;
|
|
}
|
|
|
|
case RULE_CAPTURE: {
|
|
uint32_t tag = rule[2];
|
|
down1(s);
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
up1(s);
|
|
if (!result) return NULL;
|
|
/* Specialized pushcap - avoid intermediate string creation */
|
|
if (!tag && s->mode == PEG_MODE_ACCUMULATE) {
|
|
janet_buffer_push_bytes(s->scratch, text, (int32_t)(result - text));
|
|
} else {
|
|
pushcap(s, janet_stringv(text, (int32_t)(result - text)), tag);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
case RULE_ACCUMULATE: {
|
|
uint32_t tag = rule[2];
|
|
int oldmode = s->mode;
|
|
if (!tag && oldmode == PEG_MODE_ACCUMULATE) {
|
|
rule = s->bytecode + rule[1];
|
|
goto tail;
|
|
}
|
|
CapState cs = cap_save(s);
|
|
s->mode = PEG_MODE_ACCUMULATE;
|
|
down1(s);
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
up1(s);
|
|
s->mode = oldmode;
|
|
if (!result) return NULL;
|
|
Janet cap = janet_stringv(s->scratch->data + cs.scratch,
|
|
s->scratch->count - cs.scratch);
|
|
cap_load(s, cs);
|
|
pushcap(s, cap, tag);
|
|
return result;
|
|
}
|
|
|
|
case RULE_DROP: {
|
|
CapState cs = cap_save(s);
|
|
down1(s);
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
up1(s);
|
|
if (!result) return NULL;
|
|
cap_load(s, cs);
|
|
return result;
|
|
}
|
|
|
|
case RULE_GROUP: {
|
|
uint32_t tag = rule[2];
|
|
int oldmode = s->mode;
|
|
CapState cs = cap_save(s);
|
|
s->mode = PEG_MODE_NORMAL;
|
|
down1(s);
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
up1(s);
|
|
s->mode = oldmode;
|
|
if (!result) return NULL;
|
|
int32_t num_sub_captures = s->captures->count - cs.cap;
|
|
JanetArray *sub_captures = janet_array(num_sub_captures);
|
|
safe_memcpy(sub_captures->data,
|
|
s->captures->data + cs.cap,
|
|
sizeof(Janet) * num_sub_captures);
|
|
sub_captures->count = num_sub_captures;
|
|
cap_load(s, cs);
|
|
pushcap(s, janet_wrap_array(sub_captures), tag);
|
|
return result;
|
|
}
|
|
|
|
case RULE_REPLACE:
|
|
case RULE_MATCHTIME: {
|
|
uint32_t tag = rule[3];
|
|
int oldmode = s->mode;
|
|
CapState cs = cap_save(s);
|
|
s->mode = PEG_MODE_NORMAL;
|
|
down1(s);
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
up1(s);
|
|
s->mode = oldmode;
|
|
if (!result) return NULL;
|
|
|
|
Janet cap;
|
|
Janet constant = s->constants[rule[2]];
|
|
switch (janet_type(constant)) {
|
|
default:
|
|
cap = constant;
|
|
break;
|
|
case JANET_STRUCT:
|
|
cap = janet_struct_get(janet_unwrap_struct(constant),
|
|
s->captures->data[s->captures->count - 1]);
|
|
break;
|
|
case JANET_TABLE:
|
|
cap = janet_table_get(janet_unwrap_table(constant),
|
|
s->captures->data[s->captures->count - 1]);
|
|
break;
|
|
case JANET_CFUNCTION:
|
|
cap = janet_unwrap_cfunction(constant)(s->captures->count - cs.cap,
|
|
s->captures->data + cs.cap);
|
|
break;
|
|
case JANET_FUNCTION:
|
|
cap = janet_call(janet_unwrap_function(constant),
|
|
s->captures->count - cs.cap,
|
|
s->captures->data + cs.cap);
|
|
break;
|
|
}
|
|
cap_load(s, cs);
|
|
if (rule[0] == RULE_MATCHTIME && !janet_truthy(cap)) return NULL;
|
|
pushcap(s, cap, tag);
|
|
return result;
|
|
}
|
|
|
|
case RULE_ERROR: {
|
|
int oldmode = s->mode;
|
|
s->mode = PEG_MODE_NORMAL;
|
|
int32_t old_cap = s->captures->count;
|
|
down1(s);
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
up1(s);
|
|
s->mode = oldmode;
|
|
if (!result) return NULL;
|
|
if (s->captures->count > old_cap) {
|
|
/* Throw last capture */
|
|
janet_panicv(s->captures->data[s->captures->count - 1]);
|
|
} else {
|
|
/* Throw generic error */
|
|
int32_t start = (int32_t)(text - s->text_start);
|
|
int32_t end = (int32_t)(result - s->text_start);
|
|
janet_panicf("match error in range (%d:%d)", start, end);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
case RULE_BACKMATCH: {
|
|
uint32_t search = rule[1];
|
|
for (int32_t i = s->tags->count - 1; i >= 0; i--) {
|
|
if (s->tags->data[i] == search) {
|
|
Janet capture = s->captures->data[i];
|
|
if (!janet_checktype(capture, JANET_STRING))
|
|
return NULL;
|
|
const uint8_t *bytes = janet_unwrap_string(capture);
|
|
int32_t len = janet_string_length(bytes);
|
|
if (text + len > s->text_end)
|
|
return NULL;
|
|
return memcmp(text, bytes, len) ? NULL : text + len;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Compilation
|
|
*/
|
|
|
|
typedef struct {
|
|
JanetTable *grammar;
|
|
JanetTable *default_grammar;
|
|
JanetTable *tags;
|
|
Janet *constants;
|
|
uint32_t *bytecode;
|
|
Janet form;
|
|
int depth;
|
|
uint32_t nexttag;
|
|
} Builder;
|
|
|
|
/* Forward declaration to allow recursion */
|
|
static uint32_t peg_compile1(Builder *b, Janet peg);
|
|
|
|
/*
|
|
* Errors
|
|
*/
|
|
|
|
static void builder_cleanup(Builder *b) {
|
|
janet_v_free(b->constants);
|
|
janet_v_free(b->bytecode);
|
|
}
|
|
|
|
JANET_NO_RETURN static void peg_panic(Builder *b, const char *msg) {
|
|
builder_cleanup(b);
|
|
janet_panicf("grammar error in %p, %s", b->form, msg);
|
|
}
|
|
|
|
#define peg_panicf(b,...) peg_panic((b), (const char *) janet_formatc(__VA_ARGS__))
|
|
|
|
static void peg_fixarity(Builder *b, int32_t argc, int32_t arity) {
|
|
if (argc != arity) {
|
|
peg_panicf(b, "expected %d argument%s, got %d%",
|
|
arity,
|
|
arity == 1 ? "" : "s",
|
|
argc);
|
|
}
|
|
}
|
|
|
|
static void peg_arity(Builder *b, int32_t arity, int32_t min, int32_t max) {
|
|
if (min >= 0 && arity < min)
|
|
peg_panicf(b, "arity mismatch, expected at least %d, got %d", min, arity);
|
|
if (max >= 0 && arity > max)
|
|
peg_panicf(b, "arity mismatch, expected at most %d, got %d", max, arity);
|
|
}
|
|
|
|
static const uint8_t *peg_getset(Builder *b, Janet x) {
|
|
if (!janet_checktype(x, JANET_STRING))
|
|
peg_panic(b, "expected string for character set");
|
|
const uint8_t *str = janet_unwrap_string(x);
|
|
return str;
|
|
}
|
|
|
|
static const uint8_t *peg_getrange(Builder *b, Janet x) {
|
|
if (!janet_checktype(x, JANET_STRING))
|
|
peg_panic(b, "expected string for character range");
|
|
const uint8_t *str = janet_unwrap_string(x);
|
|
if (janet_string_length(str) != 2)
|
|
peg_panicf(b, "expected string to have length 2, got %v", x);
|
|
if (str[1] < str[0])
|
|
peg_panicf(b, "range %v is empty", x);
|
|
return str;
|
|
}
|
|
|
|
static int32_t peg_getinteger(Builder *b, Janet x) {
|
|
if (!janet_checkint(x))
|
|
peg_panicf(b, "expected integer, got %v", x);
|
|
return janet_unwrap_integer(x);
|
|
}
|
|
|
|
static int32_t peg_getnat(Builder *b, Janet x) {
|
|
int32_t i = peg_getinteger(b, x);
|
|
if (i < 0)
|
|
peg_panicf(b, "expected non-negative integer, got %v", x);
|
|
return i;
|
|
}
|
|
|
|
/*
|
|
* Emission
|
|
*/
|
|
|
|
static uint32_t emit_constant(Builder *b, Janet c) {
|
|
uint32_t cindex = (uint32_t) janet_v_count(b->constants);
|
|
janet_v_push(b->constants, c);
|
|
return cindex;
|
|
}
|
|
|
|
static uint32_t emit_tag(Builder *b, Janet t) {
|
|
if (!janet_checktype(t, JANET_KEYWORD))
|
|
peg_panicf(b, "expected keyword for capture tag, got %v", t);
|
|
Janet check = janet_table_get(b->tags, t);
|
|
if (janet_checktype(check, JANET_NIL)) {
|
|
uint32_t tag = b->nexttag++;
|
|
if (tag > 255) {
|
|
peg_panic(b, "too many tags - up to 255 tags are supported per peg");
|
|
}
|
|
Janet val = janet_wrap_number(tag);
|
|
janet_table_put(b->tags, t, val);
|
|
return tag;
|
|
} else {
|
|
return (uint32_t) janet_unwrap_number(check);
|
|
}
|
|
}
|
|
|
|
/* Reserve space in bytecode for a rule. When a special emits a rule,
|
|
* it must place that rule immediately on the bytecode stack. This lets
|
|
* the compiler know where the rule is going to be before it is complete,
|
|
* allowing recursive rules. */
|
|
typedef struct {
|
|
Builder *builder;
|
|
uint32_t index;
|
|
int32_t size;
|
|
} Reserve;
|
|
|
|
static Reserve reserve(Builder *b, int32_t size) {
|
|
Reserve r;
|
|
r.index = janet_v_count(b->bytecode);
|
|
r.builder = b;
|
|
r.size = size;
|
|
for (int32_t i = 0; i < size; i++)
|
|
janet_v_push(b->bytecode, 0);
|
|
return r;
|
|
}
|
|
|
|
/* Emit a rule in the builder. Returns the index of the new rule */
|
|
static void emit_rule(Reserve r, int32_t op, int32_t n, const uint32_t *body) {
|
|
janet_assert(r.size == n + 1, "bad reserve");
|
|
r.builder->bytecode[r.index] = op;
|
|
memcpy(r.builder->bytecode + r.index + 1, body, n * sizeof(uint32_t));
|
|
}
|
|
|
|
/* For RULE_LITERAL */
|
|
static void emit_bytes(Builder *b, uint32_t op, int32_t len, const uint8_t *bytes) {
|
|
uint32_t next_rule = janet_v_count(b->bytecode);
|
|
janet_v_push(b->bytecode, op);
|
|
janet_v_push(b->bytecode, len);
|
|
int32_t words = ((len + 3) >> 2);
|
|
for (int32_t i = 0; i < words; i++)
|
|
janet_v_push(b->bytecode, 0);
|
|
memcpy(b->bytecode + next_rule + 2, bytes, len);
|
|
}
|
|
|
|
/* For fixed arity rules of arities 1, 2, and 3 */
|
|
static void emit_1(Reserve r, uint32_t op, uint32_t arg) {
|
|
emit_rule(r, op, 1, &arg);
|
|
}
|
|
static void emit_2(Reserve r, uint32_t op, uint32_t arg1, uint32_t arg2) {
|
|
uint32_t arr[2] = {arg1, arg2};
|
|
emit_rule(r, op, 2, arr);
|
|
}
|
|
static void emit_3(Reserve r, uint32_t op, uint32_t arg1, uint32_t arg2, uint32_t arg3) {
|
|
uint32_t arr[3] = {arg1, arg2, arg3};
|
|
emit_rule(r, op, 3, arr);
|
|
}
|
|
|
|
/*
|
|
* Specials
|
|
*/
|
|
|
|
static void bitmap_set(uint32_t *bitmap, uint8_t c) {
|
|
bitmap[c >> 5] |= ((uint32_t)1) << (c & 0x1F);
|
|
}
|
|
|
|
static void spec_range(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_arity(b, argc, 1, -1);
|
|
if (argc == 1) {
|
|
Reserve r = reserve(b, 2);
|
|
const uint8_t *str = peg_getrange(b, argv[0]);
|
|
uint32_t arg = str[0] | (str[1] << 16);
|
|
emit_1(r, RULE_RANGE, arg);
|
|
} else {
|
|
/* Compile as a set */
|
|
Reserve r = reserve(b, 9);
|
|
uint32_t bitmap[8] = {0};
|
|
for (int32_t i = 0; i < argc; i++) {
|
|
const uint8_t *str = peg_getrange(b, argv[i]);
|
|
for (uint32_t c = str[0]; c <= str[1]; c++)
|
|
bitmap_set(bitmap, c);
|
|
}
|
|
emit_rule(r, RULE_SET, 8, bitmap);
|
|
}
|
|
}
|
|
|
|
static void spec_set(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_fixarity(b, argc, 1);
|
|
Reserve r = reserve(b, 9);
|
|
const uint8_t *str = peg_getset(b, argv[0]);
|
|
uint32_t bitmap[8] = {0};
|
|
for (int32_t i = 0; i < janet_string_length(str); i++)
|
|
bitmap_set(bitmap, str[i]);
|
|
emit_rule(r, RULE_SET, 8, bitmap);
|
|
}
|
|
|
|
static void spec_look(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_arity(b, argc, 1, 2);
|
|
Reserve r = reserve(b, 3);
|
|
int32_t rulearg = argc == 2 ? 1 : 0;
|
|
int32_t offset = argc == 2 ? peg_getinteger(b, argv[0]) : 0;
|
|
uint32_t subrule = peg_compile1(b, argv[rulearg]);
|
|
emit_2(r, RULE_LOOK, (uint32_t) offset, subrule);
|
|
}
|
|
|
|
/* Rule of the form [len, rules...] */
|
|
static void spec_variadic(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
|
uint32_t rule = janet_v_count(b->bytecode);
|
|
janet_v_push(b->bytecode, op);
|
|
janet_v_push(b->bytecode, argc);
|
|
for (int32_t i = 0; i < argc; i++)
|
|
janet_v_push(b->bytecode, 0);
|
|
for (int32_t i = 0; i < argc; i++) {
|
|
uint32_t rulei = peg_compile1(b, argv[i]);
|
|
b->bytecode[rule + 2 + i] = rulei;
|
|
}
|
|
}
|
|
|
|
static void spec_choice(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_variadic(b, argc, argv, RULE_CHOICE);
|
|
}
|
|
static void spec_sequence(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_variadic(b, argc, argv, RULE_SEQUENCE);
|
|
}
|
|
|
|
/* For (if a b) and (if-not a b) */
|
|
static void spec_branch(Builder *b, int32_t argc, const Janet *argv, uint32_t rule) {
|
|
peg_fixarity(b, argc, 2);
|
|
Reserve r = reserve(b, 3);
|
|
uint32_t rule_a = peg_compile1(b, argv[0]);
|
|
uint32_t rule_b = peg_compile1(b, argv[1]);
|
|
emit_2(r, rule, rule_a, rule_b);
|
|
}
|
|
|
|
static void spec_if(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_branch(b, argc, argv, RULE_IF);
|
|
}
|
|
static void spec_ifnot(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_branch(b, argc, argv, RULE_IFNOT);
|
|
}
|
|
|
|
static void spec_between(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_fixarity(b, argc, 3);
|
|
Reserve r = reserve(b, 4);
|
|
int32_t lo = peg_getnat(b, argv[0]);
|
|
int32_t hi = peg_getnat(b, argv[1]);
|
|
uint32_t subrule = peg_compile1(b, argv[2]);
|
|
emit_3(r, RULE_BETWEEN, lo, hi, subrule);
|
|
}
|
|
|
|
static void spec_repeater(Builder *b, int32_t argc, const Janet *argv, int32_t min) {
|
|
peg_fixarity(b, argc, 1);
|
|
Reserve r = reserve(b, 4);
|
|
uint32_t subrule = peg_compile1(b, argv[0]);
|
|
emit_3(r, RULE_BETWEEN, min, UINT32_MAX, subrule);
|
|
}
|
|
|
|
static void spec_some(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_repeater(b, argc, argv, 1);
|
|
}
|
|
static void spec_any(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_repeater(b, argc, argv, 0);
|
|
}
|
|
|
|
static void spec_atleast(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_fixarity(b, argc, 2);
|
|
Reserve r = reserve(b, 4);
|
|
int32_t n = peg_getnat(b, argv[0]);
|
|
uint32_t subrule = peg_compile1(b, argv[1]);
|
|
emit_3(r, RULE_BETWEEN, n, UINT32_MAX, subrule);
|
|
}
|
|
|
|
static void spec_atmost(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_fixarity(b, argc, 2);
|
|
Reserve r = reserve(b, 4);
|
|
int32_t n = peg_getnat(b, argv[0]);
|
|
uint32_t subrule = peg_compile1(b, argv[1]);
|
|
emit_3(r, RULE_BETWEEN, 0, n, subrule);
|
|
}
|
|
|
|
static void spec_opt(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_fixarity(b, argc, 1);
|
|
Reserve r = reserve(b, 4);
|
|
uint32_t subrule = peg_compile1(b, argv[0]);
|
|
emit_3(r, RULE_BETWEEN, 0, 1, subrule);
|
|
}
|
|
|
|
static void spec_repeat(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_fixarity(b, argc, 2);
|
|
Reserve r = reserve(b, 4);
|
|
int32_t n = peg_getnat(b, argv[0]);
|
|
uint32_t subrule = peg_compile1(b, argv[1]);
|
|
emit_3(r, RULE_BETWEEN, n, n, subrule);
|
|
}
|
|
|
|
/* Rule of the form [rule] */
|
|
static void spec_onerule(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
|
peg_fixarity(b, argc, 1);
|
|
Reserve r = reserve(b, 2);
|
|
uint32_t rule = peg_compile1(b, argv[0]);
|
|
emit_1(r, op, rule);
|
|
}
|
|
|
|
static void spec_not(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_onerule(b, argc, argv, RULE_NOT);
|
|
}
|
|
static void spec_error(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_onerule(b, argc, argv, RULE_ERROR);
|
|
}
|
|
static void spec_drop(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_onerule(b, argc, argv, RULE_DROP);
|
|
}
|
|
|
|
/* Rule of the form [rule, tag] */
|
|
static void spec_cap1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
|
peg_arity(b, argc, 1, 2);
|
|
Reserve r = reserve(b, 3);
|
|
uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
|
|
uint32_t rule = peg_compile1(b, argv[0]);
|
|
emit_2(r, op, rule, tag);
|
|
}
|
|
|
|
static void spec_capture(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_cap1(b, argc, argv, RULE_CAPTURE);
|
|
}
|
|
static void spec_accumulate(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_cap1(b, argc, argv, RULE_ACCUMULATE);
|
|
}
|
|
static void spec_group(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_cap1(b, argc, argv, RULE_GROUP);
|
|
}
|
|
|
|
static void spec_reference(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_arity(b, argc, 1, 2);
|
|
Reserve r = reserve(b, 3);
|
|
uint32_t search = emit_tag(b, argv[0]);
|
|
uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
|
|
emit_2(r, RULE_GETTAG, search, tag);
|
|
}
|
|
|
|
static void spec_tag1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
|
peg_arity(b, argc, 0, 1);
|
|
Reserve r = reserve(b, 2);
|
|
uint32_t tag = (argc) ? emit_tag(b, argv[0]) : 0;
|
|
(void) argv;
|
|
emit_1(r, op, tag);
|
|
}
|
|
|
|
static void spec_position(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_tag1(b, argc, argv, RULE_POSITION);
|
|
}
|
|
|
|
static void spec_backmatch(Builder *b, int32_t argc, const Janet *argv) {
|
|
spec_tag1(b, argc, argv, RULE_BACKMATCH);
|
|
}
|
|
|
|
static void spec_argument(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_arity(b, argc, 1, 2);
|
|
Reserve r = reserve(b, 3);
|
|
uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
|
|
int32_t index = peg_getnat(b, argv[0]);
|
|
emit_2(r, RULE_ARGUMENT, index, tag);
|
|
}
|
|
|
|
static void spec_constant(Builder *b, int32_t argc, const Janet *argv) {
|
|
janet_arity(argc, 1, 2);
|
|
Reserve r = reserve(b, 3);
|
|
uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
|
|
emit_2(r, RULE_CONSTANT, emit_constant(b, argv[0]), tag);
|
|
}
|
|
|
|
static void spec_replace(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_arity(b, argc, 2, 3);
|
|
Reserve r = reserve(b, 4);
|
|
uint32_t subrule = peg_compile1(b, argv[0]);
|
|
uint32_t constant = emit_constant(b, argv[1]);
|
|
uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
|
|
emit_3(r, RULE_REPLACE, subrule, constant, tag);
|
|
}
|
|
|
|
static void spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
|
|
peg_arity(b, argc, 2, 3);
|
|
Reserve r = reserve(b, 4);
|
|
uint32_t subrule = peg_compile1(b, argv[0]);
|
|
Janet fun = argv[1];
|
|
if (!janet_checktype(fun, JANET_FUNCTION) &&
|
|
!janet_checktype(fun, JANET_CFUNCTION)) {
|
|
peg_panicf(b, "expected function|cfunction, got %v", fun);
|
|
}
|
|
uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
|
|
uint32_t cindex = emit_constant(b, fun);
|
|
emit_3(r, RULE_MATCHTIME, subrule, cindex, tag);
|
|
}
|
|
|
|
/* Special compiler form */
|
|
typedef void (*Special)(Builder *b, int32_t argc, const Janet *argv);
|
|
typedef struct {
|
|
const char *name;
|
|
Special special;
|
|
} SpecialPair;
|
|
|
|
/* Keep in lexical order (vim :sort works well) */
|
|
static const SpecialPair peg_specials[] = {
|
|
{"!", spec_not},
|
|
{"$", spec_position},
|
|
{"%", spec_accumulate},
|
|
{"*", spec_sequence},
|
|
{"+", spec_choice},
|
|
{"->", spec_reference},
|
|
{"/", spec_replace},
|
|
{"<-", spec_capture},
|
|
{">", spec_look},
|
|
{"?", spec_opt},
|
|
{"accumulate", spec_accumulate},
|
|
{"any", spec_any},
|
|
{"argument", spec_argument},
|
|
{"at-least", spec_atleast},
|
|
{"at-most", spec_atmost},
|
|
{"backmatch", spec_backmatch},
|
|
{"backref", spec_reference},
|
|
{"between", spec_between},
|
|
{"capture", spec_capture},
|
|
{"choice", spec_choice},
|
|
{"cmt", spec_matchtime},
|
|
{"constant", spec_constant},
|
|
{"drop", spec_drop},
|
|
{"error", spec_error},
|
|
{"group", spec_group},
|
|
{"if", spec_if},
|
|
{"if-not", spec_ifnot},
|
|
{"look", spec_look},
|
|
{"not", spec_not},
|
|
{"opt", spec_opt},
|
|
{"position", spec_position},
|
|
{"quote", spec_capture},
|
|
{"range", spec_range},
|
|
{"repeat", spec_repeat},
|
|
{"replace", spec_replace},
|
|
{"sequence", spec_sequence},
|
|
{"set", spec_set},
|
|
{"some", spec_some},
|
|
};
|
|
|
|
/* Compile a janet value into a rule and return the rule index. */
|
|
static uint32_t peg_compile1(Builder *b, Janet peg) {
|
|
|
|
/* Keep track of the form being compiled for error purposes */
|
|
Janet old_form = b->form;
|
|
JanetTable *old_grammar = b->grammar;
|
|
b->form = peg;
|
|
|
|
/* Resolve keyword references */
|
|
int i = JANET_RECURSION_GUARD;
|
|
JanetTable *grammar = old_grammar;
|
|
for (; i > 0 && janet_checktype(peg, JANET_KEYWORD); --i) {
|
|
Janet nextPeg = janet_table_get_ex(grammar, peg, &grammar);
|
|
if (!grammar || janet_checktype(nextPeg, JANET_NIL)) {
|
|
nextPeg = janet_table_get(b->default_grammar, peg);
|
|
if (janet_checktype(nextPeg, JANET_NIL)) {
|
|
peg_panic(b, "unknown rule");
|
|
}
|
|
}
|
|
peg = nextPeg;
|
|
b->form = peg;
|
|
b->grammar = grammar;
|
|
}
|
|
if (i == 0)
|
|
peg_panic(b, "reference chain too deep");
|
|
|
|
/* Check cache - for tuples we check only the local cache, as
|
|
* in a different grammar, the same tuple can compile to a different
|
|
* rule - for example, (+ :a :b) depends on whatever :a and :b are bound to. */
|
|
Janet check = janet_checktype(peg, JANET_TUPLE)
|
|
? janet_table_rawget(grammar, peg)
|
|
: janet_table_get(grammar, peg);
|
|
if (!janet_checktype(check, JANET_NIL)) {
|
|
b->form = old_form;
|
|
b->grammar = old_grammar;
|
|
return (uint32_t) janet_unwrap_number(check);
|
|
}
|
|
|
|
/* Check depth */
|
|
if (b->depth-- == 0)
|
|
peg_panic(b, "peg grammar recursed too deeply");
|
|
|
|
/* The final rule to return */
|
|
uint32_t rule = janet_v_count(b->bytecode);
|
|
|
|
/* Add to cache. Do not cache structs, as we don't yet know
|
|
* what rule they will return! We can just as effectively cache
|
|
* the structs main rule. */
|
|
if (!janet_checktype(peg, JANET_STRUCT)) {
|
|
JanetTable *which_grammar = grammar;
|
|
/* If we are a primitive pattern, add to the global cache (root grammar table) */
|
|
if (!janet_checktype(peg, JANET_TUPLE)) {
|
|
while (which_grammar->proto)
|
|
which_grammar = which_grammar->proto;
|
|
}
|
|
janet_table_put(which_grammar, peg, janet_wrap_number(rule));
|
|
}
|
|
|
|
switch (janet_type(peg)) {
|
|
default:
|
|
peg_panic(b, "unexpected peg source");
|
|
return 0;
|
|
case JANET_NUMBER: {
|
|
int32_t n = peg_getinteger(b, peg);
|
|
Reserve r = reserve(b, 2);
|
|
if (n < 0) {
|
|
emit_1(r, RULE_NOTNCHAR, -n);
|
|
} else {
|
|
emit_1(r, RULE_NCHAR, n);
|
|
}
|
|
break;
|
|
}
|
|
case JANET_STRING: {
|
|
const uint8_t *str = janet_unwrap_string(peg);
|
|
int32_t len = janet_string_length(str);
|
|
emit_bytes(b, RULE_LITERAL, len, str);
|
|
break;
|
|
}
|
|
case JANET_STRUCT: {
|
|
/* Build grammar table */
|
|
const JanetKV *st = janet_unwrap_struct(peg);
|
|
JanetTable *new_grammar = janet_table(2 * janet_struct_capacity(st));
|
|
for (int32_t i = 0; i < janet_struct_capacity(st); i++) {
|
|
if (janet_checktype(st[i].key, JANET_KEYWORD)) {
|
|
janet_table_put(new_grammar, st[i].key, st[i].value);
|
|
}
|
|
}
|
|
new_grammar->proto = grammar;
|
|
b->grammar = grammar = new_grammar;
|
|
/* Run the main rule */
|
|
Janet main_rule = janet_table_rawget(grammar, janet_ckeywordv("main"));
|
|
if (janet_checktype(main_rule, JANET_NIL))
|
|
peg_panic(b, "grammar requires :main rule");
|
|
rule = peg_compile1(b, main_rule);
|
|
break;
|
|
}
|
|
case JANET_TUPLE: {
|
|
const Janet *tup = janet_unwrap_tuple(peg);
|
|
int32_t len = janet_tuple_length(tup);
|
|
if (len == 0) peg_panic(b, "tuple in grammar must have non-zero length");
|
|
if (!janet_checktype(tup[0], JANET_SYMBOL))
|
|
peg_panicf(b, "expected grammar command, found %v", tup[0]);
|
|
const uint8_t *sym = janet_unwrap_symbol(tup[0]);
|
|
const SpecialPair *sp = janet_strbinsearch(
|
|
&peg_specials,
|
|
sizeof(peg_specials) / sizeof(SpecialPair),
|
|
sizeof(SpecialPair),
|
|
sym);
|
|
if (sp) {
|
|
sp->special(b, len - 1, tup + 1);
|
|
} else {
|
|
peg_panicf(b, "unknown special %S", sym);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Increase depth again */
|
|
b->depth++;
|
|
b->form = old_form;
|
|
b->grammar = old_grammar;
|
|
return rule;
|
|
}
|
|
|
|
/*
|
|
* Post-Compilation
|
|
*/
|
|
|
|
typedef struct {
|
|
uint32_t *bytecode;
|
|
Janet *constants;
|
|
size_t bytecode_len;
|
|
uint32_t num_constants;
|
|
} Peg;
|
|
|
|
static int peg_mark(void *p, size_t size) {
|
|
(void) size;
|
|
Peg *peg = (Peg *)p;
|
|
if (NULL != peg->constants)
|
|
for (uint32_t i = 0; i < peg->num_constants; i++)
|
|
janet_mark(peg->constants[i]);
|
|
return 0;
|
|
}
|
|
|
|
static void peg_marshal(void *p, JanetMarshalContext *ctx) {
|
|
Peg *peg = (Peg *)p;
|
|
janet_marshal_size(ctx, peg->bytecode_len);
|
|
janet_marshal_int(ctx, (int32_t)peg->num_constants);
|
|
janet_marshal_abstract(ctx, p);
|
|
for (size_t i = 0; i < peg->bytecode_len; i++)
|
|
janet_marshal_int(ctx, (int32_t) peg->bytecode[i]);
|
|
for (uint32_t j = 0; j < peg->num_constants; j++)
|
|
janet_marshal_janet(ctx, peg->constants[j]);
|
|
}
|
|
|
|
/* Used to ensure that if we place several arrays in one memory chunk, each
|
|
* array will be correctly aligned */
|
|
static size_t size_padded(size_t offset, size_t size) {
|
|
size_t x = size + offset - 1;
|
|
return x - (x % size);
|
|
}
|
|
|
|
static void *peg_unmarshal(JanetMarshalContext *ctx) {
|
|
size_t bytecode_len = janet_unmarshal_size(ctx);
|
|
uint32_t num_constants = (uint32_t) janet_unmarshal_int(ctx);
|
|
|
|
/* Calculate offsets. Should match those in make_peg */
|
|
size_t bytecode_start = size_padded(sizeof(Peg), sizeof(uint32_t));
|
|
size_t bytecode_size = bytecode_len * sizeof(uint32_t);
|
|
size_t constants_start = size_padded(bytecode_start + bytecode_size, sizeof(Janet));
|
|
size_t total_size = constants_start + sizeof(Janet) * (size_t) num_constants;
|
|
|
|
/* DOS prevention? I.E. we could read bytecode and constants before
|
|
* hand so we don't allocated a ton of memory on bad, short input */
|
|
|
|
/* Allocate PEG */
|
|
char *mem = janet_unmarshal_abstract(ctx, total_size);
|
|
Peg *peg = (Peg *)mem;
|
|
uint32_t *bytecode = (uint32_t *)(mem + bytecode_start);
|
|
Janet *constants = (Janet *)(mem + constants_start);
|
|
peg->bytecode = NULL;
|
|
peg->constants = NULL;
|
|
peg->bytecode_len = bytecode_len;
|
|
peg->num_constants = num_constants;
|
|
|
|
for (size_t i = 0; i < peg->bytecode_len; i++)
|
|
bytecode[i] = (uint32_t) janet_unmarshal_int(ctx);
|
|
for (uint32_t j = 0; j < peg->num_constants; j++)
|
|
constants[j] = janet_unmarshal_janet(ctx);
|
|
|
|
/* After here, no panics except for the bad: label. */
|
|
|
|
/* Keep track at each index if an instruction was
|
|
* reference (0x01) or is in a main bytecode position
|
|
* (0x02). This lets us do a linear scan and not
|
|
* need to a depth first traversal. It is stricter
|
|
* than a dfs by not allowing certain kinds of unused
|
|
* bytecode. */
|
|
uint32_t blen = (int32_t) peg->bytecode_len;
|
|
uint32_t clen = peg->num_constants;
|
|
uint8_t *op_flags = calloc(1, blen);
|
|
if (NULL == op_flags) {
|
|
JANET_OUT_OF_MEMORY;
|
|
}
|
|
|
|
/* verify peg bytecode */
|
|
uint32_t i = 0;
|
|
while (i < blen) {
|
|
uint32_t instr = bytecode[i];
|
|
uint32_t *rule = bytecode + i;
|
|
op_flags[i] |= 0x02;
|
|
switch (instr & 0x1F) {
|
|
case RULE_LITERAL:
|
|
i += 2 + ((rule[1] + 3) >> 2);
|
|
break;
|
|
case RULE_NCHAR:
|
|
case RULE_NOTNCHAR:
|
|
case RULE_RANGE:
|
|
case RULE_POSITION:
|
|
case RULE_BACKMATCH:
|
|
/* [1 word] */
|
|
i += 2;
|
|
break;
|
|
case RULE_SET:
|
|
/* [8 words] */
|
|
i += 9;
|
|
break;
|
|
case RULE_LOOK:
|
|
/* [offset, rule] */
|
|
if (rule[2] >= blen) goto bad;
|
|
op_flags[rule[2]] |= 0x1;
|
|
i += 3;
|
|
break;
|
|
case RULE_CHOICE:
|
|
case RULE_SEQUENCE:
|
|
/* [len, rules...] */
|
|
{
|
|
uint32_t len = rule[1];
|
|
for (uint32_t j = 0; j < len; j++) {
|
|
if (rule[2 + j] >= blen) goto bad;
|
|
op_flags[rule[2 + j]] |= 0x1;
|
|
}
|
|
i += 2 + len;
|
|
}
|
|
break;
|
|
case RULE_IF:
|
|
case RULE_IFNOT:
|
|
/* [rule_a, rule_b (b if not a)] */
|
|
if (rule[1] >= blen) goto bad;
|
|
if (rule[2] >= blen) goto bad;
|
|
op_flags[rule[1]] |= 0x01;
|
|
op_flags[rule[2]] |= 0x01;
|
|
i += 3;
|
|
break;
|
|
case RULE_BETWEEN:
|
|
/* [lo, hi, rule] */
|
|
if (rule[3] >= blen) goto bad;
|
|
op_flags[rule[3]] |= 0x01;
|
|
i += 4;
|
|
break;
|
|
case RULE_ARGUMENT:
|
|
case RULE_GETTAG:
|
|
/* [searchtag, tag] */
|
|
i += 3;
|
|
break;
|
|
case RULE_CONSTANT:
|
|
/* [constant, tag] */
|
|
if (rule[1] >= clen) goto bad;
|
|
i += 3;
|
|
break;
|
|
case RULE_ACCUMULATE:
|
|
case RULE_GROUP:
|
|
case RULE_CAPTURE:
|
|
/* [rule, tag] */
|
|
if (rule[1] >= blen) goto bad;
|
|
op_flags[rule[1]] |= 0x01;
|
|
i += 3;
|
|
break;
|
|
case RULE_REPLACE:
|
|
case RULE_MATCHTIME:
|
|
/* [rule, constant, tag] */
|
|
if (rule[1] >= blen) goto bad;
|
|
if (rule[2] >= clen) goto bad;
|
|
op_flags[rule[1]] |= 0x01;
|
|
i += 4;
|
|
break;
|
|
case RULE_ERROR:
|
|
case RULE_DROP:
|
|
case RULE_NOT:
|
|
/* [rule] */
|
|
if (rule[1] >= blen) goto bad;
|
|
op_flags[rule[1]] |= 0x01;
|
|
i += 2;
|
|
break;
|
|
default:
|
|
goto bad;
|
|
}
|
|
}
|
|
|
|
/* last instruction cannot overflow */
|
|
if (i != blen) goto bad;
|
|
|
|
/* Make sure all referenced instructions are actually
|
|
* in instruction positions. */
|
|
for (i = 0; i < blen; i++)
|
|
if (op_flags[i] == 0x01) goto bad;
|
|
|
|
/* Good return */
|
|
peg->bytecode = bytecode;
|
|
peg->constants = constants;
|
|
free(op_flags);
|
|
return peg;
|
|
|
|
bad:
|
|
free(op_flags);
|
|
janet_panic("invalid peg bytecode");
|
|
}
|
|
|
|
static int cfun_peg_getter(JanetAbstract a, Janet key, Janet *out);
|
|
|
|
static const JanetAbstractType peg_type = {
|
|
"core/peg",
|
|
NULL,
|
|
peg_mark,
|
|
cfun_peg_getter,
|
|
NULL,
|
|
peg_marshal,
|
|
peg_unmarshal,
|
|
JANET_ATEND_UNMARSHAL
|
|
};
|
|
|
|
/* Convert Builder to Peg (Janet Abstract Value) */
|
|
static Peg *make_peg(Builder *b) {
|
|
size_t bytecode_start = size_padded(sizeof(Peg), sizeof(uint32_t));
|
|
size_t bytecode_size = janet_v_count(b->bytecode) * sizeof(uint32_t);
|
|
size_t constants_start = size_padded(bytecode_start + bytecode_size, sizeof(Janet));
|
|
size_t constants_size = janet_v_count(b->constants) * sizeof(Janet);
|
|
size_t total_size = constants_start + constants_size;
|
|
char *mem = janet_abstract(&peg_type, total_size);
|
|
Peg *peg = (Peg *)mem;
|
|
peg->bytecode = (uint32_t *)(mem + bytecode_start);
|
|
peg->constants = (Janet *)(mem + constants_start);
|
|
peg->num_constants = janet_v_count(b->constants);
|
|
safe_memcpy(peg->bytecode, b->bytecode, bytecode_size);
|
|
safe_memcpy(peg->constants, b->constants, constants_size);
|
|
peg->bytecode_len = janet_v_count(b->bytecode);
|
|
return peg;
|
|
}
|
|
|
|
/* Compiler entry point */
|
|
static Peg *compile_peg(Janet x) {
|
|
Builder builder;
|
|
builder.grammar = janet_table(0);
|
|
builder.default_grammar = janet_get_core_table("default-peg-grammar");
|
|
builder.tags = janet_table(0);
|
|
builder.constants = NULL;
|
|
builder.bytecode = NULL;
|
|
builder.nexttag = 1;
|
|
builder.form = x;
|
|
builder.depth = JANET_RECURSION_GUARD;
|
|
peg_compile1(&builder, x);
|
|
Peg *peg = make_peg(&builder);
|
|
builder_cleanup(&builder);
|
|
return peg;
|
|
}
|
|
|
|
/*
|
|
* C Functions
|
|
*/
|
|
|
|
static Janet cfun_peg_compile(int32_t argc, Janet *argv) {
|
|
janet_fixarity(argc, 1);
|
|
Peg *peg = compile_peg(argv[0]);
|
|
return janet_wrap_abstract(peg);
|
|
}
|
|
|
|
static Janet cfun_peg_match(int32_t argc, Janet *argv) {
|
|
janet_arity(argc, 2, -1);
|
|
Peg *peg;
|
|
if (janet_checktype(argv[0], JANET_ABSTRACT) &&
|
|
janet_abstract_type(janet_unwrap_abstract(argv[0])) == &peg_type) {
|
|
peg = janet_unwrap_abstract(argv[0]);
|
|
} else {
|
|
peg = compile_peg(argv[0]);
|
|
}
|
|
JanetByteView bytes = janet_getbytes(argv, 1);
|
|
int32_t start;
|
|
PegState s;
|
|
if (argc > 2) {
|
|
start = janet_gethalfrange(argv, 2, bytes.len, "offset");
|
|
s.extrac = argc - 3;
|
|
s.extrav = janet_tuple_n(argv + 3, argc - 3);
|
|
} else {
|
|
start = 0;
|
|
s.extrac = 0;
|
|
s.extrav = NULL;
|
|
}
|
|
s.mode = PEG_MODE_NORMAL;
|
|
s.text_start = bytes.bytes;
|
|
s.text_end = bytes.bytes + bytes.len;
|
|
s.depth = JANET_RECURSION_GUARD;
|
|
s.captures = janet_array(0);
|
|
s.scratch = janet_buffer(10);
|
|
s.tags = janet_buffer(10);
|
|
s.constants = peg->constants;
|
|
s.bytecode = peg->bytecode;
|
|
const uint8_t *result = peg_rule(&s, s.bytecode, bytes.bytes + start);
|
|
return result ? janet_wrap_array(s.captures) : janet_wrap_nil();
|
|
}
|
|
|
|
static int cfun_peg_getter(JanetAbstract a, Janet key, Janet *out) {
|
|
(void) a;
|
|
if (janet_keyeq(key, "match")) {
|
|
*out = janet_wrap_cfunction(cfun_peg_match);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static const JanetReg peg_cfuns[] = {
|
|
{
|
|
"peg/compile", cfun_peg_compile,
|
|
JDOC("(peg/compile peg)\n\n"
|
|
"Compiles a peg source data structure into a <core/peg>. This will speed up matching "
|
|
"if the same peg will be used multiple times.")
|
|
},
|
|
{
|
|
"peg/match", cfun_peg_match,
|
|
JDOC("(peg/match peg text &opt start & args)\n\n"
|
|
"Match a Parsing Expression Grammar to a byte string and return an array of captured values. "
|
|
"Returns nil if text does not match the language defined by peg. The syntax of PEGs are very "
|
|
"similar to those defined by LPeg, and have similar capabilities.")
|
|
},
|
|
{NULL, NULL, NULL}
|
|
};
|
|
|
|
/* Load the peg module */
|
|
void janet_lib_peg(JanetTable *env) {
|
|
janet_core_cfuns(env, NULL, peg_cfuns);
|
|
janet_register_abstract_type(&peg_type);
|
|
}
|
|
|
|
#endif /* ifdef JANET_PEG */
|