2019-01-12 00:22:24 +00:00
|
|
|
/*
|
2023-01-07 21:03:35 +00:00
|
|
|
* Copyright (c) 2023 Calvin Rose
|
2019-01-12 00:22:24 +00:00
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to
|
|
|
|
* deal in the Software without restriction, including without limitation the
|
|
|
|
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
|
|
* sell copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2019-01-24 05:15:58 +00:00
|
|
|
#ifndef JANET_AMALG
|
2019-12-31 00:06:15 +00:00
|
|
|
#include "features.h"
|
2019-02-19 01:13:35 +00:00
|
|
|
#include <janet.h>
|
2019-01-12 00:22:24 +00:00
|
|
|
#include <string.h>
|
|
|
|
#include "util.h"
|
2019-01-14 04:47:11 +00:00
|
|
|
#include "vector.h"
|
2019-01-18 04:43:46 +00:00
|
|
|
#include "util.h"
|
2019-01-24 05:15:58 +00:00
|
|
|
#endif
|
2019-01-12 00:22:24 +00:00
|
|
|
|
2019-02-18 02:25:30 +00:00
|
|
|
#ifdef JANET_PEG
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
/*
|
|
|
|
* Runtime
|
|
|
|
*/
|
|
|
|
|
2019-01-12 00:22:24 +00:00
|
|
|
/* Hold captured patterns and match state */
|
|
|
|
typedef struct {
|
|
|
|
const uint8_t *text_start;
|
|
|
|
const uint8_t *text_end;
|
2019-01-14 04:47:11 +00:00
|
|
|
const uint32_t *bytecode;
|
|
|
|
const Janet *constants;
|
2019-01-12 02:09:49 +00:00
|
|
|
JanetArray *captures;
|
2019-01-12 15:16:25 +00:00
|
|
|
JanetBuffer *scratch;
|
2019-01-17 03:38:11 +00:00
|
|
|
JanetBuffer *tags;
|
2021-01-06 01:51:00 +00:00
|
|
|
JanetArray *tagged_captures;
|
2019-01-12 16:04:47 +00:00
|
|
|
const Janet *extrav;
|
2020-11-27 00:32:56 +00:00
|
|
|
int32_t *linemap;
|
2019-01-12 16:04:47 +00:00
|
|
|
int32_t extrac;
|
2019-01-12 22:31:15 +00:00
|
|
|
int32_t depth;
|
2020-11-27 00:32:56 +00:00
|
|
|
int32_t linemaplen;
|
2021-01-06 01:51:00 +00:00
|
|
|
int32_t has_backref;
|
2019-01-12 22:31:15 +00:00
|
|
|
enum {
|
2019-01-14 04:47:11 +00:00
|
|
|
PEG_MODE_NORMAL,
|
2019-04-06 15:38:00 +00:00
|
|
|
PEG_MODE_ACCUMULATE
|
2019-01-12 22:31:15 +00:00
|
|
|
} mode;
|
2019-01-14 04:47:11 +00:00
|
|
|
} PegState;
|
2019-01-12 00:22:24 +00:00
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
/* Allow backtrack with captures. We need
|
|
|
|
* to save state at branches, and then reload
|
|
|
|
* if one branch fails and try a new branch. */
|
|
|
|
typedef struct {
|
|
|
|
int32_t cap;
|
2021-01-06 01:51:00 +00:00
|
|
|
int32_t tcap;
|
2019-01-14 04:47:11 +00:00
|
|
|
int32_t scratch;
|
|
|
|
} CapState;
|
|
|
|
|
|
|
|
/* Save the current capture state */
|
|
|
|
static CapState cap_save(PegState *s) {
|
|
|
|
CapState cs;
|
|
|
|
cs.scratch = s->scratch->count;
|
|
|
|
cs.cap = s->captures->count;
|
2021-01-06 01:51:00 +00:00
|
|
|
cs.tcap = s->tagged_captures->count;
|
2019-01-14 04:47:11 +00:00
|
|
|
return cs;
|
|
|
|
}
|
2019-01-12 00:22:24 +00:00
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
/* Load a saved capture state in the case of failure */
|
|
|
|
static void cap_load(PegState *s, CapState cs) {
|
|
|
|
s->scratch->count = cs.scratch;
|
|
|
|
s->captures->count = cs.cap;
|
2021-01-06 01:51:00 +00:00
|
|
|
s->tags->count = cs.tcap;
|
|
|
|
s->tagged_captures->count = cs.tcap;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Load a saved capture state in the case of success. Keeps
|
|
|
|
* tagged captures around for backref. */
|
|
|
|
static void cap_load_keept(PegState *s, CapState cs) {
|
|
|
|
s->scratch->count = cs.scratch;
|
|
|
|
s->captures->count = cs.cap;
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-01-12 15:16:25 +00:00
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
/* Add a capture */
|
2019-01-17 03:38:11 +00:00
|
|
|
static void pushcap(PegState *s, Janet capture, uint32_t tag) {
|
2019-04-06 15:38:00 +00:00
|
|
|
if (s->mode == PEG_MODE_ACCUMULATE) {
|
2019-01-14 04:47:11 +00:00
|
|
|
janet_to_string_b(s->scratch, capture);
|
2019-04-06 15:38:00 +00:00
|
|
|
}
|
2021-01-06 01:51:00 +00:00
|
|
|
if (s->mode == PEG_MODE_NORMAL) {
|
2019-01-14 04:47:11 +00:00
|
|
|
janet_array_push(s->captures, capture);
|
2021-01-06 01:51:00 +00:00
|
|
|
}
|
|
|
|
if (s->has_backref) {
|
|
|
|
janet_array_push(s->tagged_captures, capture);
|
2019-01-17 03:38:11 +00:00
|
|
|
janet_buffer_push_u8(s->tags, tag);
|
|
|
|
}
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-01-12 00:22:24 +00:00
|
|
|
|
2020-11-27 00:32:56 +00:00
|
|
|
/* Lazily generate line map to get line and column information for PegState.
|
|
|
|
* line and column are 1-indexed. */
|
|
|
|
typedef struct {
|
|
|
|
int32_t line;
|
|
|
|
int32_t col;
|
|
|
|
} LineCol;
|
|
|
|
static LineCol get_linecol_from_position(PegState *s, int32_t position) {
|
|
|
|
/* Generate if not made yet */
|
|
|
|
if (s->linemaplen < 0) {
|
|
|
|
int32_t newline_count = 0;
|
|
|
|
for (const uint8_t *c = s->text_start; c < s->text_end; c++) {
|
|
|
|
if (*c == '\n') newline_count++;
|
|
|
|
}
|
2020-11-27 18:21:23 +00:00
|
|
|
int32_t *mem = janet_smalloc(sizeof(int32_t) * newline_count);
|
2020-11-27 00:32:56 +00:00
|
|
|
size_t index = 0;
|
|
|
|
for (const uint8_t *c = s->text_start; c < s->text_end; c++) {
|
|
|
|
if (*c == '\n') mem[index++] = (int32_t)(c - s->text_start);
|
|
|
|
}
|
|
|
|
s->linemaplen = newline_count;
|
|
|
|
s->linemap = mem;
|
|
|
|
}
|
|
|
|
/* Do binary search for line. Slightly modified from classic binary search:
|
|
|
|
* - if we find that our current character is a line break, just return immediately.
|
|
|
|
* a newline character is consider to be on the same line as the character before
|
|
|
|
* (\n is line terminator, not line separator).
|
|
|
|
* - in the not-found case, we still want to find the greatest-indexed newline that
|
|
|
|
* is before position. we use that to calcuate the line and column.
|
|
|
|
* - in the case that lo = 0 and s->linemap[0] is still greater than position, we
|
|
|
|
* are on the first line and our column is position + 1. */
|
|
|
|
int32_t hi = s->linemaplen; /* hi is greater than the actual line */
|
|
|
|
int32_t lo = 0; /* lo is less than or equal to the actual line */
|
|
|
|
LineCol ret;
|
|
|
|
while (lo + 1 < hi) {
|
|
|
|
int32_t mid = lo + (hi - lo) / 2;
|
|
|
|
if (s->linemap[mid] >= position) {
|
|
|
|
hi = mid;
|
|
|
|
} else {
|
|
|
|
lo = mid;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* first line case */
|
|
|
|
if (s->linemaplen == 0 || (lo == 0 && s->linemap[0] >= position)) {
|
|
|
|
ret.line = 1;
|
|
|
|
ret.col = position + 1;
|
|
|
|
} else {
|
|
|
|
ret.line = lo + 2;
|
|
|
|
ret.col = position - s->linemap[lo];
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-09-27 17:18:12 +00:00
|
|
|
/* Convert a uint64_t to a int64_t by wrapping to a maximum number of bytes */
|
|
|
|
static int64_t peg_convert_u64_s64(uint64_t from, int width) {
|
|
|
|
int shift = 8 * (8 - width);
|
|
|
|
return ((int64_t)(from << shift)) >> shift;
|
|
|
|
}
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
/* Prevent stack overflow */
|
|
|
|
#define down1(s) do { \
|
|
|
|
if (0 == --((s)->depth)) janet_panic("peg/match recursed too deeply"); \
|
|
|
|
} while (0)
|
|
|
|
#define up1(s) ((s)->depth++)
|
|
|
|
|
2019-01-17 02:11:55 +00:00
|
|
|
/* Evaluate a peg rule
|
|
|
|
* Pre-conditions: s is in a valid state
|
|
|
|
* Post-conditions: If there is a match, returns a pointer to the next text.
|
|
|
|
* All captures on the capture stack are valid. If there is no match,
|
|
|
|
* returns NULL. Extra captures from successful child expressions can be
|
2019-04-06 15:38:00 +00:00
|
|
|
* left on the capture stack.
|
2019-01-17 02:11:55 +00:00
|
|
|
*/
|
2019-01-14 04:47:11 +00:00
|
|
|
static const uint8_t *peg_rule(
|
2019-02-20 01:51:34 +00:00
|
|
|
PegState *s,
|
|
|
|
const uint32_t *rule,
|
|
|
|
const uint8_t *text) {
|
2019-01-14 04:47:11 +00:00
|
|
|
tail:
|
2019-02-20 01:51:34 +00:00
|
|
|
switch (*rule & 0x1F) {
|
2019-01-14 04:47:11 +00:00
|
|
|
default:
|
|
|
|
janet_panic("unexpected opcode");
|
|
|
|
return NULL;
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_LITERAL: {
|
|
|
|
uint32_t len = rule[1];
|
|
|
|
if (text + len > s->text_end) return NULL;
|
|
|
|
return memcmp(text, rule + 2, len) ? NULL : text + len;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_NCHAR: {
|
|
|
|
uint32_t n = rule[1];
|
|
|
|
return (text + n > s->text_end) ? NULL : text + n;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_NOTNCHAR: {
|
|
|
|
uint32_t n = rule[1];
|
|
|
|
return (text + n > s->text_end) ? text : NULL;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_RANGE: {
|
|
|
|
uint8_t lo = rule[1] & 0xFF;
|
|
|
|
uint8_t hi = (rule[1] >> 16) & 0xFF;
|
|
|
|
return (text < s->text_end &&
|
|
|
|
text[0] >= lo &&
|
|
|
|
text[0] <= hi)
|
|
|
|
? text + 1
|
|
|
|
: NULL;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_SET: {
|
2022-09-05 19:11:03 +00:00
|
|
|
if (text >= s->text_end) return NULL;
|
2019-02-20 01:51:34 +00:00
|
|
|
uint32_t word = rule[1 + (text[0] >> 5)];
|
|
|
|
uint32_t mask = (uint32_t)1 << (text[0] & 0x1F);
|
2022-09-06 01:13:15 +00:00
|
|
|
return (word & mask)
|
2019-02-20 01:51:34 +00:00
|
|
|
? text + 1
|
|
|
|
: NULL;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_LOOK: {
|
|
|
|
text += ((int32_t *)rule)[1];
|
|
|
|
if (text < s->text_start || text > s->text_end) return NULL;
|
|
|
|
down1(s);
|
|
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[2], text);
|
|
|
|
up1(s);
|
2020-05-21 06:22:08 +00:00
|
|
|
text -= ((int32_t *)rule)[1];
|
2019-02-20 01:51:34 +00:00
|
|
|
return result ? text : NULL;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_CHOICE: {
|
|
|
|
uint32_t len = rule[1];
|
|
|
|
const uint32_t *args = rule + 2;
|
|
|
|
if (len == 0) return NULL;
|
|
|
|
down1(s);
|
|
|
|
CapState cs = cap_save(s);
|
|
|
|
for (uint32_t i = 0; i < len - 1; i++) {
|
|
|
|
const uint8_t *result = peg_rule(s, s->bytecode + args[i], text);
|
|
|
|
if (result) {
|
|
|
|
up1(s);
|
|
|
|
return result;
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
cap_load(s, cs);
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
up1(s);
|
|
|
|
rule = s->bytecode + args[len - 1];
|
|
|
|
goto tail;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_SEQUENCE: {
|
|
|
|
uint32_t len = rule[1];
|
|
|
|
const uint32_t *args = rule + 2;
|
|
|
|
if (len == 0) return text;
|
|
|
|
down1(s);
|
|
|
|
for (uint32_t i = 0; text && i < len - 1; i++)
|
|
|
|
text = peg_rule(s, s->bytecode + args[i], text);
|
|
|
|
up1(s);
|
|
|
|
if (!text) return NULL;
|
|
|
|
rule = s->bytecode + args[len - 1];
|
|
|
|
goto tail;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2022-09-12 22:05:31 +00:00
|
|
|
case RULE_IF: {
|
2019-02-20 01:51:34 +00:00
|
|
|
const uint32_t *rule_a = s->bytecode + rule[1];
|
|
|
|
const uint32_t *rule_b = s->bytecode + rule[2];
|
|
|
|
down1(s);
|
|
|
|
const uint8_t *result = peg_rule(s, rule_a, text);
|
|
|
|
up1(s);
|
2022-09-12 22:05:31 +00:00
|
|
|
if (!result) return NULL;
|
2019-02-20 01:51:34 +00:00
|
|
|
rule = rule_b;
|
|
|
|
goto tail;
|
|
|
|
}
|
2022-09-12 22:05:31 +00:00
|
|
|
case RULE_IFNOT: {
|
|
|
|
const uint32_t *rule_a = s->bytecode + rule[1];
|
|
|
|
const uint32_t *rule_b = s->bytecode + rule[2];
|
|
|
|
down1(s);
|
|
|
|
CapState cs = cap_save(s);
|
|
|
|
const uint8_t *result = peg_rule(s, rule_a, text);
|
|
|
|
if (!!result) {
|
|
|
|
up1(s);
|
|
|
|
return NULL;
|
|
|
|
} else {
|
|
|
|
cap_load(s, cs);
|
|
|
|
up1(s);
|
|
|
|
rule = rule_b;
|
|
|
|
goto tail;
|
|
|
|
}
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_NOT: {
|
|
|
|
const uint32_t *rule_a = s->bytecode + rule[1];
|
|
|
|
down1(s);
|
2022-09-12 22:05:31 +00:00
|
|
|
CapState cs = cap_save(s);
|
2019-02-20 01:51:34 +00:00
|
|
|
const uint8_t *result = peg_rule(s, rule_a, text);
|
2022-09-12 22:05:31 +00:00
|
|
|
if (result) {
|
|
|
|
up1(s);
|
|
|
|
return NULL;
|
|
|
|
} else {
|
|
|
|
cap_load(s, cs);
|
|
|
|
up1(s);
|
|
|
|
return text;
|
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2020-06-11 02:18:50 +00:00
|
|
|
case RULE_THRU:
|
|
|
|
case RULE_TO: {
|
|
|
|
const uint32_t *rule_a = s->bytecode + rule[1];
|
2022-11-05 21:38:52 +00:00
|
|
|
const uint8_t *next_text = NULL;
|
2020-06-11 02:18:50 +00:00
|
|
|
CapState cs = cap_save(s);
|
|
|
|
down1(s);
|
2021-02-19 22:10:03 +00:00
|
|
|
while (text <= s->text_end) {
|
2020-06-11 02:18:50 +00:00
|
|
|
CapState cs2 = cap_save(s);
|
|
|
|
next_text = peg_rule(s, rule_a, text);
|
2022-04-30 00:21:10 +00:00
|
|
|
if (next_text) {
|
|
|
|
if (rule[0] == RULE_TO) cap_load(s, cs2);
|
|
|
|
break;
|
|
|
|
}
|
2022-04-30 00:15:56 +00:00
|
|
|
cap_load(s, cs2);
|
2020-06-11 02:18:50 +00:00
|
|
|
text++;
|
|
|
|
}
|
|
|
|
up1(s);
|
2021-02-19 22:10:03 +00:00
|
|
|
if (text > s->text_end) {
|
2020-06-11 02:18:50 +00:00
|
|
|
cap_load(s, cs);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return rule[0] == RULE_TO ? text : next_text;
|
|
|
|
}
|
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_BETWEEN: {
|
|
|
|
uint32_t lo = rule[1];
|
|
|
|
uint32_t hi = rule[2];
|
|
|
|
const uint32_t *rule_a = s->bytecode + rule[3];
|
|
|
|
uint32_t captured = 0;
|
|
|
|
const uint8_t *next_text;
|
|
|
|
CapState cs = cap_save(s);
|
|
|
|
down1(s);
|
|
|
|
while (captured < hi) {
|
|
|
|
CapState cs2 = cap_save(s);
|
|
|
|
next_text = peg_rule(s, rule_a, text);
|
|
|
|
if (!next_text || next_text == text) {
|
|
|
|
cap_load(s, cs2);
|
|
|
|
break;
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
captured++;
|
|
|
|
text = next_text;
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
up1(s);
|
|
|
|
if (captured < lo) {
|
|
|
|
cap_load(s, cs);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return text;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-01-17 03:38:11 +00:00
|
|
|
/* Capturing rules */
|
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_GETTAG: {
|
|
|
|
uint32_t search = rule[1];
|
|
|
|
uint32_t tag = rule[2];
|
|
|
|
for (int32_t i = s->tags->count - 1; i >= 0; i--) {
|
|
|
|
if (s->tags->data[i] == search) {
|
2021-01-06 01:51:00 +00:00
|
|
|
pushcap(s, s->tagged_captures->data[i], tag);
|
2019-02-20 01:51:34 +00:00
|
|
|
return text;
|
2019-01-17 03:38:11 +00:00
|
|
|
}
|
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
2019-01-17 03:38:11 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_POSITION: {
|
|
|
|
pushcap(s, janet_wrap_number((double)(text - s->text_start)), rule[1]);
|
|
|
|
return text;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2020-11-27 00:32:56 +00:00
|
|
|
case RULE_LINE: {
|
2020-11-27 18:21:23 +00:00
|
|
|
LineCol lc = get_linecol_from_position(s, (int32_t)(text - s->text_start));
|
2020-11-27 00:32:56 +00:00
|
|
|
pushcap(s, janet_wrap_number((double)(lc.line)), rule[1]);
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
|
|
|
case RULE_COLUMN: {
|
2020-11-27 18:21:23 +00:00
|
|
|
LineCol lc = get_linecol_from_position(s, (int32_t)(text - s->text_start));
|
2020-11-27 00:32:56 +00:00
|
|
|
pushcap(s, janet_wrap_number((double)(lc.col)), rule[1]);
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_ARGUMENT: {
|
|
|
|
int32_t index = ((int32_t *)rule)[1];
|
|
|
|
Janet capture = (index >= s->extrac) ? janet_wrap_nil() : s->extrav[index];
|
|
|
|
pushcap(s, capture, rule[2]);
|
|
|
|
return text;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_CONSTANT: {
|
|
|
|
pushcap(s, s->constants[rule[1]], rule[2]);
|
|
|
|
return text;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_CAPTURE: {
|
|
|
|
down1(s);
|
|
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
|
|
up1(s);
|
|
|
|
if (!result) return NULL;
|
|
|
|
/* Specialized pushcap - avoid intermediate string creation */
|
2021-01-06 01:51:00 +00:00
|
|
|
if (!s->has_backref && s->mode == PEG_MODE_ACCUMULATE) {
|
2019-02-20 01:51:34 +00:00
|
|
|
janet_buffer_push_bytes(s->scratch, text, (int32_t)(result - text));
|
|
|
|
} else {
|
2021-01-06 01:51:00 +00:00
|
|
|
uint32_t tag = rule[2];
|
2019-02-20 01:51:34 +00:00
|
|
|
pushcap(s, janet_stringv(text, (int32_t)(result - text)), tag);
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
return result;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2021-09-19 18:02:16 +00:00
|
|
|
case RULE_CAPTURE_NUM: {
|
|
|
|
down1(s);
|
|
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
|
|
up1(s);
|
|
|
|
if (!result) return NULL;
|
|
|
|
/* check number parsing */
|
|
|
|
double x = 0.0;
|
2021-09-21 23:02:42 +00:00
|
|
|
int32_t base = (int32_t) rule[2];
|
|
|
|
if (janet_scan_number_base(text, (int32_t)(result - text), base, &x)) return NULL;
|
2021-09-19 18:02:16 +00:00
|
|
|
/* Specialized pushcap - avoid intermediate string creation */
|
|
|
|
if (!s->has_backref && s->mode == PEG_MODE_ACCUMULATE) {
|
|
|
|
janet_buffer_push_bytes(s->scratch, text, (int32_t)(result - text));
|
|
|
|
} else {
|
2021-09-21 23:02:42 +00:00
|
|
|
uint32_t tag = rule[3];
|
2021-09-19 18:02:16 +00:00
|
|
|
pushcap(s, janet_wrap_number(x), tag);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_ACCUMULATE: {
|
|
|
|
uint32_t tag = rule[2];
|
|
|
|
int oldmode = s->mode;
|
2019-04-06 15:38:00 +00:00
|
|
|
if (!tag && oldmode == PEG_MODE_ACCUMULATE) {
|
2019-02-20 01:51:34 +00:00
|
|
|
rule = s->bytecode + rule[1];
|
|
|
|
goto tail;
|
2019-01-17 23:10:04 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
CapState cs = cap_save(s);
|
|
|
|
s->mode = PEG_MODE_ACCUMULATE;
|
|
|
|
down1(s);
|
|
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
|
|
up1(s);
|
|
|
|
s->mode = oldmode;
|
|
|
|
if (!result) return NULL;
|
2019-04-06 15:38:00 +00:00
|
|
|
Janet cap = janet_stringv(s->scratch->data + cs.scratch,
|
2019-04-13 23:32:31 +00:00
|
|
|
s->scratch->count - cs.scratch);
|
2021-01-06 01:51:00 +00:00
|
|
|
cap_load_keept(s, cs);
|
2019-02-20 01:51:34 +00:00
|
|
|
pushcap(s, cap, tag);
|
|
|
|
return result;
|
|
|
|
}
|
2019-01-17 23:10:04 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_DROP: {
|
|
|
|
CapState cs = cap_save(s);
|
|
|
|
down1(s);
|
|
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
|
|
up1(s);
|
|
|
|
if (!result) return NULL;
|
|
|
|
cap_load(s, cs);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
case RULE_GROUP: {
|
|
|
|
uint32_t tag = rule[2];
|
|
|
|
int oldmode = s->mode;
|
|
|
|
CapState cs = cap_save(s);
|
|
|
|
s->mode = PEG_MODE_NORMAL;
|
|
|
|
down1(s);
|
|
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
|
|
up1(s);
|
|
|
|
s->mode = oldmode;
|
|
|
|
if (!result) return NULL;
|
|
|
|
int32_t num_sub_captures = s->captures->count - cs.cap;
|
|
|
|
JanetArray *sub_captures = janet_array(num_sub_captures);
|
2020-01-29 05:38:52 +00:00
|
|
|
safe_memcpy(sub_captures->data,
|
|
|
|
s->captures->data + cs.cap,
|
|
|
|
sizeof(Janet) * num_sub_captures);
|
2019-02-20 01:51:34 +00:00
|
|
|
sub_captures->count = num_sub_captures;
|
2021-01-06 01:51:00 +00:00
|
|
|
cap_load_keept(s, cs);
|
2019-02-20 01:51:34 +00:00
|
|
|
pushcap(s, janet_wrap_array(sub_captures), tag);
|
|
|
|
return result;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
|
|
|
case RULE_REPLACE:
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_MATCHTIME: {
|
|
|
|
uint32_t tag = rule[3];
|
|
|
|
int oldmode = s->mode;
|
|
|
|
CapState cs = cap_save(s);
|
|
|
|
s->mode = PEG_MODE_NORMAL;
|
|
|
|
down1(s);
|
|
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
|
|
up1(s);
|
|
|
|
s->mode = oldmode;
|
|
|
|
if (!result) return NULL;
|
|
|
|
|
2020-03-06 16:01:04 +00:00
|
|
|
Janet cap = janet_wrap_nil();
|
2019-02-20 01:51:34 +00:00
|
|
|
Janet constant = s->constants[rule[2]];
|
|
|
|
switch (janet_type(constant)) {
|
|
|
|
default:
|
|
|
|
cap = constant;
|
|
|
|
break;
|
|
|
|
case JANET_STRUCT:
|
2020-03-06 16:01:04 +00:00
|
|
|
if (s->captures->count) {
|
|
|
|
cap = janet_struct_get(janet_unwrap_struct(constant),
|
|
|
|
s->captures->data[s->captures->count - 1]);
|
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
break;
|
|
|
|
case JANET_TABLE:
|
2020-03-06 16:01:04 +00:00
|
|
|
if (s->captures->count) {
|
|
|
|
cap = janet_table_get(janet_unwrap_table(constant),
|
|
|
|
s->captures->data[s->captures->count - 1]);
|
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
break;
|
|
|
|
case JANET_CFUNCTION:
|
|
|
|
cap = janet_unwrap_cfunction(constant)(s->captures->count - cs.cap,
|
|
|
|
s->captures->data + cs.cap);
|
|
|
|
break;
|
|
|
|
case JANET_FUNCTION:
|
|
|
|
cap = janet_call(janet_unwrap_function(constant),
|
|
|
|
s->captures->count - cs.cap,
|
|
|
|
s->captures->data + cs.cap);
|
|
|
|
break;
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2021-01-06 01:51:00 +00:00
|
|
|
cap_load_keept(s, cs);
|
2019-02-20 01:51:34 +00:00
|
|
|
if (rule[0] == RULE_MATCHTIME && !janet_truthy(cap)) return NULL;
|
|
|
|
pushcap(s, cap, tag);
|
|
|
|
return result;
|
|
|
|
}
|
2019-01-17 02:11:55 +00:00
|
|
|
|
2019-02-20 01:51:34 +00:00
|
|
|
case RULE_ERROR: {
|
|
|
|
int oldmode = s->mode;
|
|
|
|
s->mode = PEG_MODE_NORMAL;
|
|
|
|
int32_t old_cap = s->captures->count;
|
|
|
|
down1(s);
|
|
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
|
|
up1(s);
|
|
|
|
s->mode = oldmode;
|
|
|
|
if (!result) return NULL;
|
|
|
|
if (s->captures->count > old_cap) {
|
|
|
|
/* Throw last capture */
|
|
|
|
janet_panicv(s->captures->data[s->captures->count - 1]);
|
|
|
|
} else {
|
|
|
|
/* Throw generic error */
|
|
|
|
int32_t start = (int32_t)(text - s->text_start);
|
2020-11-27 00:57:24 +00:00
|
|
|
LineCol lc = get_linecol_from_position(s, start);
|
|
|
|
janet_panicf("match error at line %d, column %d", lc.line, lc.col);
|
2019-01-15 21:04:47 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
2019-08-24 22:57:01 +00:00
|
|
|
|
|
|
|
case RULE_BACKMATCH: {
|
|
|
|
uint32_t search = rule[1];
|
|
|
|
for (int32_t i = s->tags->count - 1; i >= 0; i--) {
|
|
|
|
if (s->tags->data[i] == search) {
|
2021-01-06 01:51:00 +00:00
|
|
|
Janet capture = s->tagged_captures->data[i];
|
2019-08-24 22:57:01 +00:00
|
|
|
if (!janet_checktype(capture, JANET_STRING))
|
|
|
|
return NULL;
|
2019-08-30 00:09:43 +00:00
|
|
|
const uint8_t *bytes = janet_unwrap_string(capture);
|
2019-08-24 22:57:01 +00:00
|
|
|
int32_t len = janet_string_length(bytes);
|
|
|
|
if (text + len > s->text_end)
|
|
|
|
return NULL;
|
|
|
|
return memcmp(text, bytes, len) ? NULL : text + len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-05-02 15:37:39 +00:00
|
|
|
case RULE_LENPREFIX: {
|
|
|
|
int oldmode = s->mode;
|
|
|
|
s->mode = PEG_MODE_NORMAL;
|
|
|
|
const uint8_t *next_text;
|
|
|
|
CapState cs = cap_save(s);
|
|
|
|
down1(s);
|
|
|
|
next_text = peg_rule(s, s->bytecode + rule[1], text);
|
|
|
|
up1(s);
|
|
|
|
if (NULL == next_text) return NULL;
|
|
|
|
s->mode = oldmode;
|
|
|
|
int32_t num_sub_captures = s->captures->count - cs.cap;
|
|
|
|
Janet lencap;
|
|
|
|
if (num_sub_captures <= 0 ||
|
|
|
|
(lencap = s->captures->data[cs.cap], !janet_checkint(lencap))) {
|
|
|
|
cap_load(s, cs);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
int32_t nrep = janet_unwrap_integer(lencap);
|
|
|
|
/* drop captures from len pattern */
|
|
|
|
cap_load(s, cs);
|
|
|
|
for (int32_t i = 0; i < nrep; i++) {
|
|
|
|
down1(s);
|
|
|
|
next_text = peg_rule(s, s->bytecode + rule[2], next_text);
|
|
|
|
up1(s);
|
|
|
|
if (NULL == next_text) {
|
|
|
|
cap_load(s, cs);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return next_text;
|
|
|
|
}
|
|
|
|
|
2020-09-27 17:18:12 +00:00
|
|
|
case RULE_READINT: {
|
|
|
|
uint32_t tag = rule[2];
|
|
|
|
uint32_t signedness = rule[1] & 0x10;
|
|
|
|
uint32_t endianess = rule[1] & 0x20;
|
|
|
|
int width = (int)(rule[1] & 0xF);
|
|
|
|
if (text + width > s->text_end) return NULL;
|
|
|
|
uint64_t accum = 0;
|
|
|
|
if (endianess) {
|
|
|
|
/* BE */
|
|
|
|
for (int i = 0; i < width; i++) accum = (accum << 8) | text[i];
|
|
|
|
} else {
|
|
|
|
/* LE */
|
|
|
|
for (int i = width - 1; i >= 0; i--) accum = (accum << 8) | text[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
Janet capture_value;
|
|
|
|
/* We can only parse integeres of greater than 6 bytes reliable if int-types are enabled.
|
|
|
|
* Otherwise, we may lose precision, so 6 is the maximum size when int-types are disabled. */
|
|
|
|
#ifdef JANET_INT_TYPES
|
|
|
|
if (width > 6) {
|
|
|
|
if (signedness) {
|
|
|
|
capture_value = janet_wrap_s64(peg_convert_u64_s64(accum, width));
|
|
|
|
} else {
|
|
|
|
capture_value = janet_wrap_u64(accum);
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
double double_value;
|
|
|
|
if (signedness) {
|
|
|
|
double_value = (double)(peg_convert_u64_s64(accum, width));
|
|
|
|
} else {
|
|
|
|
double_value = (double)accum;
|
|
|
|
}
|
|
|
|
capture_value = janet_wrap_number(double_value);
|
|
|
|
}
|
|
|
|
|
|
|
|
pushcap(s, capture_value, tag);
|
|
|
|
return text + width;
|
|
|
|
}
|
|
|
|
|
2021-02-26 23:25:09 +00:00
|
|
|
case RULE_UNREF: {
|
|
|
|
int32_t tcap = s->tags->count;
|
|
|
|
down1(s);
|
|
|
|
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
|
|
|
|
up1(s);
|
|
|
|
if (!result) return NULL;
|
|
|
|
int32_t final_tcap = s->tags->count;
|
|
|
|
/* Truncate tagged captures to not include items of the given tag */
|
|
|
|
int32_t w = tcap;
|
|
|
|
/* If no tag is given, drop ALL tagged captures */
|
|
|
|
if (rule[2]) {
|
|
|
|
for (int32_t i = tcap; i < final_tcap; i++) {
|
|
|
|
if (s->tags->data[i] != (0xFF & rule[2])) {
|
|
|
|
s->tags->data[w] = s->tags->data[i];
|
|
|
|
s->tagged_captures->data[w] = s->tagged_captures->data[i];
|
|
|
|
w++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
s->tags->count = w;
|
|
|
|
s->tagged_captures->count = w;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
/*
|
|
|
|
* Compilation
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
JanetTable *grammar;
|
2019-12-15 02:39:14 +00:00
|
|
|
JanetTable *default_grammar;
|
2019-01-17 03:38:11 +00:00
|
|
|
JanetTable *tags;
|
2019-01-14 04:47:11 +00:00
|
|
|
Janet *constants;
|
|
|
|
uint32_t *bytecode;
|
|
|
|
Janet form;
|
|
|
|
int depth;
|
2019-01-17 03:38:11 +00:00
|
|
|
uint32_t nexttag;
|
2021-01-06 01:51:00 +00:00
|
|
|
int has_backref;
|
2019-01-14 04:47:11 +00:00
|
|
|
} Builder;
|
|
|
|
|
|
|
|
/* Forward declaration to allow recursion */
|
2019-02-17 04:33:24 +00:00
|
|
|
static uint32_t peg_compile1(Builder *b, Janet peg);
|
2019-01-12 00:22:24 +00:00
|
|
|
|
2019-01-12 02:09:49 +00:00
|
|
|
/*
|
2019-01-14 04:47:11 +00:00
|
|
|
* Errors
|
2019-01-12 02:09:49 +00:00
|
|
|
*/
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
static void builder_cleanup(Builder *b) {
|
|
|
|
janet_v_free(b->constants);
|
|
|
|
janet_v_free(b->bytecode);
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
|
|
|
|
2019-05-31 14:10:20 +00:00
|
|
|
JANET_NO_RETURN static void peg_panic(Builder *b, const char *msg) {
|
2019-01-14 04:47:11 +00:00
|
|
|
builder_cleanup(b);
|
|
|
|
janet_panicf("grammar error in %p, %s", b->form, msg);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define peg_panicf(b,...) peg_panic((b), (const char *) janet_formatc(__VA_ARGS__))
|
|
|
|
|
|
|
|
static void peg_fixarity(Builder *b, int32_t argc, int32_t arity) {
|
|
|
|
if (argc != arity) {
|
2020-03-04 04:26:26 +00:00
|
|
|
peg_panicf(b, "expected %d argument%s, got %d",
|
2019-02-20 01:51:34 +00:00
|
|
|
arity,
|
|
|
|
arity == 1 ? "" : "s",
|
|
|
|
argc);
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
static void peg_arity(Builder *b, int32_t arity, int32_t min, int32_t max) {
|
|
|
|
if (min >= 0 && arity < min)
|
|
|
|
peg_panicf(b, "arity mismatch, expected at least %d, got %d", min, arity);
|
|
|
|
if (max >= 0 && arity > max)
|
|
|
|
peg_panicf(b, "arity mismatch, expected at most %d, got %d", max, arity);
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
static const uint8_t *peg_getset(Builder *b, Janet x) {
|
|
|
|
if (!janet_checktype(x, JANET_STRING))
|
2019-04-06 15:38:00 +00:00
|
|
|
peg_panic(b, "expected string for character set");
|
2019-01-14 04:47:11 +00:00
|
|
|
const uint8_t *str = janet_unwrap_string(x);
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const uint8_t *peg_getrange(Builder *b, Janet x) {
|
|
|
|
if (!janet_checktype(x, JANET_STRING))
|
2019-04-06 15:38:00 +00:00
|
|
|
peg_panic(b, "expected string for character range");
|
2019-01-14 04:47:11 +00:00
|
|
|
const uint8_t *str = janet_unwrap_string(x);
|
|
|
|
if (janet_string_length(str) != 2)
|
|
|
|
peg_panicf(b, "expected string to have length 2, got %v", x);
|
|
|
|
if (str[1] < str[0])
|
|
|
|
peg_panicf(b, "range %v is empty", x);
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int32_t peg_getinteger(Builder *b, Janet x) {
|
|
|
|
if (!janet_checkint(x))
|
|
|
|
peg_panicf(b, "expected integer, got %v", x);
|
|
|
|
return janet_unwrap_integer(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int32_t peg_getnat(Builder *b, Janet x) {
|
|
|
|
int32_t i = peg_getinteger(b, x);
|
|
|
|
if (i < 0)
|
|
|
|
peg_panicf(b, "expected non-negative integer, got %v", x);
|
|
|
|
return i;
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
|
|
|
|
2019-01-17 03:38:11 +00:00
|
|
|
/*
|
|
|
|
* Emission
|
|
|
|
*/
|
|
|
|
|
|
|
|
static uint32_t emit_constant(Builder *b, Janet c) {
|
|
|
|
uint32_t cindex = (uint32_t) janet_v_count(b->constants);
|
|
|
|
janet_v_push(b->constants, c);
|
|
|
|
return cindex;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t emit_tag(Builder *b, Janet t) {
|
|
|
|
if (!janet_checktype(t, JANET_KEYWORD))
|
|
|
|
peg_panicf(b, "expected keyword for capture tag, got %v", t);
|
|
|
|
Janet check = janet_table_get(b->tags, t);
|
|
|
|
if (janet_checktype(check, JANET_NIL)) {
|
|
|
|
uint32_t tag = b->nexttag++;
|
|
|
|
if (tag > 255) {
|
2019-04-06 15:38:00 +00:00
|
|
|
peg_panic(b, "too many tags - up to 255 tags are supported per peg");
|
2019-01-17 03:38:11 +00:00
|
|
|
}
|
|
|
|
Janet val = janet_wrap_number(tag);
|
|
|
|
janet_table_put(b->tags, t, val);
|
|
|
|
return tag;
|
|
|
|
} else {
|
|
|
|
return (uint32_t) janet_unwrap_number(check);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Reserve space in bytecode for a rule. When a special emits a rule,
|
|
|
|
* it must place that rule immediately on the bytecode stack. This lets
|
|
|
|
* the compiler know where the rule is going to be before it is complete,
|
|
|
|
* allowing recursive rules. */
|
|
|
|
typedef struct {
|
|
|
|
Builder *builder;
|
|
|
|
uint32_t index;
|
|
|
|
int32_t size;
|
|
|
|
} Reserve;
|
|
|
|
|
|
|
|
static Reserve reserve(Builder *b, int32_t size) {
|
|
|
|
Reserve r;
|
|
|
|
r.index = janet_v_count(b->bytecode);
|
|
|
|
r.builder = b;
|
|
|
|
r.size = size;
|
|
|
|
for (int32_t i = 0; i < size; i++)
|
|
|
|
janet_v_push(b->bytecode, 0);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Emit a rule in the builder. Returns the index of the new rule */
|
|
|
|
static void emit_rule(Reserve r, int32_t op, int32_t n, const uint32_t *body) {
|
|
|
|
janet_assert(r.size == n + 1, "bad reserve");
|
|
|
|
r.builder->bytecode[r.index] = op;
|
|
|
|
memcpy(r.builder->bytecode + r.index + 1, body, n * sizeof(uint32_t));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* For RULE_LITERAL */
|
|
|
|
static void emit_bytes(Builder *b, uint32_t op, int32_t len, const uint8_t *bytes) {
|
|
|
|
uint32_t next_rule = janet_v_count(b->bytecode);
|
|
|
|
janet_v_push(b->bytecode, op);
|
|
|
|
janet_v_push(b->bytecode, len);
|
|
|
|
int32_t words = ((len + 3) >> 2);
|
|
|
|
for (int32_t i = 0; i < words; i++)
|
|
|
|
janet_v_push(b->bytecode, 0);
|
|
|
|
memcpy(b->bytecode + next_rule + 2, bytes, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* For fixed arity rules of arities 1, 2, and 3 */
|
|
|
|
static void emit_1(Reserve r, uint32_t op, uint32_t arg) {
|
|
|
|
emit_rule(r, op, 1, &arg);
|
|
|
|
}
|
|
|
|
static void emit_2(Reserve r, uint32_t op, uint32_t arg1, uint32_t arg2) {
|
|
|
|
uint32_t arr[2] = {arg1, arg2};
|
|
|
|
emit_rule(r, op, 2, arr);
|
|
|
|
}
|
|
|
|
static void emit_3(Reserve r, uint32_t op, uint32_t arg1, uint32_t arg2, uint32_t arg3) {
|
|
|
|
uint32_t arr[3] = {arg1, arg2, arg3};
|
|
|
|
emit_rule(r, op, 3, arr);
|
|
|
|
}
|
|
|
|
|
2019-01-12 15:16:25 +00:00
|
|
|
/*
|
2019-01-14 04:47:11 +00:00
|
|
|
* Specials
|
2019-01-12 15:16:25 +00:00
|
|
|
*/
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
static void bitmap_set(uint32_t *bitmap, uint8_t c) {
|
|
|
|
bitmap[c >> 5] |= ((uint32_t)1) << (c & 0x1F);
|
|
|
|
}
|
|
|
|
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_range(Builder *b, int32_t argc, const Janet *argv) {
|
2019-01-14 04:47:11 +00:00
|
|
|
peg_arity(b, argc, 1, -1);
|
|
|
|
if (argc == 1) {
|
2019-01-14 20:06:35 +00:00
|
|
|
Reserve r = reserve(b, 2);
|
2019-01-14 04:47:11 +00:00
|
|
|
const uint8_t *str = peg_getrange(b, argv[0]);
|
|
|
|
uint32_t arg = str[0] | (str[1] << 16);
|
2019-01-14 20:06:35 +00:00
|
|
|
emit_1(r, RULE_RANGE, arg);
|
2019-01-14 04:47:11 +00:00
|
|
|
} else {
|
|
|
|
/* Compile as a set */
|
2019-01-14 20:06:35 +00:00
|
|
|
Reserve r = reserve(b, 9);
|
2019-01-14 04:47:11 +00:00
|
|
|
uint32_t bitmap[8] = {0};
|
|
|
|
for (int32_t i = 0; i < argc; i++) {
|
|
|
|
const uint8_t *str = peg_getrange(b, argv[i]);
|
|
|
|
for (uint32_t c = str[0]; c <= str[1]; c++)
|
|
|
|
bitmap_set(bitmap, c);
|
|
|
|
}
|
2019-01-14 20:06:35 +00:00
|
|
|
emit_rule(r, RULE_SET, 8, bitmap);
|
2019-01-12 15:16:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_set(Builder *b, int32_t argc, const Janet *argv) {
|
2019-01-14 04:47:11 +00:00
|
|
|
peg_fixarity(b, argc, 1);
|
2019-01-14 20:06:35 +00:00
|
|
|
Reserve r = reserve(b, 9);
|
2019-01-14 04:47:11 +00:00
|
|
|
const uint8_t *str = peg_getset(b, argv[0]);
|
|
|
|
uint32_t bitmap[8] = {0};
|
|
|
|
for (int32_t i = 0; i < janet_string_length(str); i++)
|
|
|
|
bitmap_set(bitmap, str[i]);
|
2019-01-14 20:06:35 +00:00
|
|
|
emit_rule(r, RULE_SET, 8, bitmap);
|
2019-01-12 02:09:49 +00:00
|
|
|
}
|
|
|
|
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_look(Builder *b, int32_t argc, const Janet *argv) {
|
2019-01-14 04:47:11 +00:00
|
|
|
peg_arity(b, argc, 1, 2);
|
2019-01-14 20:06:35 +00:00
|
|
|
Reserve r = reserve(b, 3);
|
2019-01-14 04:47:11 +00:00
|
|
|
int32_t rulearg = argc == 2 ? 1 : 0;
|
|
|
|
int32_t offset = argc == 2 ? peg_getinteger(b, argv[0]) : 0;
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t subrule = peg_compile1(b, argv[rulearg]);
|
2019-01-14 20:06:35 +00:00
|
|
|
emit_2(r, RULE_LOOK, (uint32_t) offset, subrule);
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Rule of the form [len, rules...] */
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_variadic(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
2019-01-14 04:47:11 +00:00
|
|
|
uint32_t rule = janet_v_count(b->bytecode);
|
|
|
|
janet_v_push(b->bytecode, op);
|
|
|
|
janet_v_push(b->bytecode, argc);
|
|
|
|
for (int32_t i = 0; i < argc; i++)
|
|
|
|
janet_v_push(b->bytecode, 0);
|
|
|
|
for (int32_t i = 0; i < argc; i++) {
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t rulei = peg_compile1(b, argv[i]);
|
2019-01-14 04:47:11 +00:00
|
|
|
b->bytecode[rule + 2 + i] = rulei;
|
2019-01-12 22:31:15 +00:00
|
|
|
}
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-01-12 15:16:25 +00:00
|
|
|
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_choice(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_variadic(b, argc, argv, RULE_CHOICE);
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_sequence(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_variadic(b, argc, argv, RULE_SEQUENCE);
|
2019-01-12 02:09:49 +00:00
|
|
|
}
|
|
|
|
|
2019-01-15 01:41:32 +00:00
|
|
|
/* For (if a b) and (if-not a b) */
|
|
|
|
static void spec_branch(Builder *b, int32_t argc, const Janet *argv, uint32_t rule) {
|
2019-01-14 04:47:11 +00:00
|
|
|
peg_fixarity(b, argc, 2);
|
2019-01-14 20:06:35 +00:00
|
|
|
Reserve r = reserve(b, 3);
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t rule_a = peg_compile1(b, argv[0]);
|
|
|
|
uint32_t rule_b = peg_compile1(b, argv[1]);
|
2019-01-15 01:41:32 +00:00
|
|
|
emit_2(r, rule, rule_a, rule_b);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void spec_if(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_branch(b, argc, argv, RULE_IF);
|
|
|
|
}
|
|
|
|
static void spec_ifnot(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_branch(b, argc, argv, RULE_IFNOT);
|
2019-01-12 02:09:49 +00:00
|
|
|
}
|
2020-05-02 15:37:39 +00:00
|
|
|
static void spec_lenprefix(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_branch(b, argc, argv, RULE_LENPREFIX);
|
|
|
|
}
|
2019-01-12 02:09:49 +00:00
|
|
|
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_between(Builder *b, int32_t argc, const Janet *argv) {
|
2019-01-14 04:47:11 +00:00
|
|
|
peg_fixarity(b, argc, 3);
|
2019-01-14 20:06:35 +00:00
|
|
|
Reserve r = reserve(b, 4);
|
2019-01-14 04:47:11 +00:00
|
|
|
int32_t lo = peg_getnat(b, argv[0]);
|
|
|
|
int32_t hi = peg_getnat(b, argv[1]);
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t subrule = peg_compile1(b, argv[2]);
|
2019-01-14 20:06:35 +00:00
|
|
|
emit_3(r, RULE_BETWEEN, lo, hi, subrule);
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-01-12 22:31:15 +00:00
|
|
|
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_repeater(Builder *b, int32_t argc, const Janet *argv, int32_t min) {
|
2019-01-14 04:47:11 +00:00
|
|
|
peg_fixarity(b, argc, 1);
|
2019-01-14 20:06:35 +00:00
|
|
|
Reserve r = reserve(b, 4);
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t subrule = peg_compile1(b, argv[0]);
|
2019-01-14 20:06:35 +00:00
|
|
|
emit_3(r, RULE_BETWEEN, min, UINT32_MAX, subrule);
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-01-12 15:16:25 +00:00
|
|
|
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_some(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_repeater(b, argc, argv, 1);
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_any(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_repeater(b, argc, argv, 0);
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-01-12 22:31:15 +00:00
|
|
|
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_atleast(Builder *b, int32_t argc, const Janet *argv) {
|
2019-01-14 04:47:11 +00:00
|
|
|
peg_fixarity(b, argc, 2);
|
2019-01-14 20:06:35 +00:00
|
|
|
Reserve r = reserve(b, 4);
|
2019-01-14 04:47:11 +00:00
|
|
|
int32_t n = peg_getnat(b, argv[0]);
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t subrule = peg_compile1(b, argv[1]);
|
2019-01-14 20:06:35 +00:00
|
|
|
emit_3(r, RULE_BETWEEN, n, UINT32_MAX, subrule);
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2019-01-12 22:31:15 +00:00
|
|
|
|
2019-01-14 20:06:35 +00:00
|
|
|
static void spec_atmost(Builder *b, int32_t argc, const Janet *argv) {
|
2019-01-14 04:47:11 +00:00
|
|
|
peg_fixarity(b, argc, 2);
|
2019-01-14 20:06:35 +00:00
|
|
|
Reserve r = reserve(b, 4);
|
2019-01-14 04:47:11 +00:00
|
|
|
int32_t n = peg_getnat(b, argv[0]);
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t subrule = peg_compile1(b, argv[1]);
|
2019-01-14 20:06:35 +00:00
|
|
|
emit_3(r, RULE_BETWEEN, 0, n, subrule);
|
2019-01-12 15:16:25 +00:00
|
|
|
}
|
|
|
|
|
2019-01-15 19:08:03 +00:00
|
|
|
static void spec_opt(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
peg_fixarity(b, argc, 1);
|
|
|
|
Reserve r = reserve(b, 4);
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t subrule = peg_compile1(b, argv[0]);
|
2019-01-15 19:08:03 +00:00
|
|
|
emit_3(r, RULE_BETWEEN, 0, 1, subrule);
|
|
|
|
}
|
|
|
|
|
2020-01-15 01:58:03 +00:00
|
|
|
static void spec_repeat(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
peg_fixarity(b, argc, 2);
|
|
|
|
Reserve r = reserve(b, 4);
|
|
|
|
int32_t n = peg_getnat(b, argv[0]);
|
|
|
|
uint32_t subrule = peg_compile1(b, argv[1]);
|
|
|
|
emit_3(r, RULE_BETWEEN, n, n, subrule);
|
|
|
|
}
|
2019-01-17 03:38:11 +00:00
|
|
|
|
|
|
|
/* Rule of the form [rule] */
|
|
|
|
static void spec_onerule(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
|
|
|
peg_fixarity(b, argc, 1);
|
|
|
|
Reserve r = reserve(b, 2);
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t rule = peg_compile1(b, argv[0]);
|
2019-01-17 03:38:11 +00:00
|
|
|
emit_1(r, op, rule);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void spec_not(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_onerule(b, argc, argv, RULE_NOT);
|
|
|
|
}
|
|
|
|
static void spec_error(Builder *b, int32_t argc, const Janet *argv) {
|
2020-11-27 00:57:24 +00:00
|
|
|
if (argc == 0) {
|
|
|
|
Reserve r = reserve(b, 2);
|
|
|
|
uint32_t rule = peg_compile1(b, janet_wrap_number(0));
|
|
|
|
emit_1(r, RULE_ERROR, rule);
|
|
|
|
} else {
|
|
|
|
spec_onerule(b, argc, argv, RULE_ERROR);
|
|
|
|
}
|
2019-01-17 03:38:11 +00:00
|
|
|
}
|
2020-06-11 02:18:50 +00:00
|
|
|
static void spec_to(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_onerule(b, argc, argv, RULE_TO);
|
|
|
|
}
|
|
|
|
static void spec_thru(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_onerule(b, argc, argv, RULE_THRU);
|
|
|
|
}
|
2021-02-26 23:25:09 +00:00
|
|
|
static void spec_drop(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_onerule(b, argc, argv, RULE_DROP);
|
|
|
|
}
|
2019-01-17 03:38:11 +00:00
|
|
|
|
|
|
|
/* Rule of the form [rule, tag] */
|
|
|
|
static void spec_cap1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
|
|
|
peg_arity(b, argc, 1, 2);
|
|
|
|
Reserve r = reserve(b, 3);
|
|
|
|
uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t rule = peg_compile1(b, argv[0]);
|
2019-01-17 03:38:11 +00:00
|
|
|
emit_2(r, op, rule, tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void spec_capture(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_cap1(b, argc, argv, RULE_CAPTURE);
|
|
|
|
}
|
|
|
|
static void spec_accumulate(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_cap1(b, argc, argv, RULE_ACCUMULATE);
|
|
|
|
}
|
|
|
|
static void spec_group(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_cap1(b, argc, argv, RULE_GROUP);
|
|
|
|
}
|
2021-02-26 23:25:09 +00:00
|
|
|
static void spec_unref(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_cap1(b, argc, argv, RULE_UNREF);
|
|
|
|
}
|
2019-01-17 03:38:11 +00:00
|
|
|
|
2021-09-21 23:02:42 +00:00
|
|
|
static void spec_capture_number(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
peg_arity(b, argc, 1, 3);
|
|
|
|
Reserve r = reserve(b, 4);
|
|
|
|
uint32_t base = 0;
|
|
|
|
if (argc >= 2) {
|
|
|
|
if (!janet_checktype(argv[1], JANET_NIL)) {
|
|
|
|
if (!janet_checkint(argv[1])) goto error;
|
|
|
|
base = (uint32_t) janet_unwrap_integer(argv[1]);
|
|
|
|
if (base < 2 || base > 36) goto error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
|
|
|
|
uint32_t rule = peg_compile1(b, argv[0]);
|
|
|
|
emit_3(r, RULE_CAPTURE_NUM, rule, base, tag);
|
|
|
|
return;
|
|
|
|
error:
|
|
|
|
peg_panicf(b, "expected integer between 2 and 36, got %v", argv[2]);
|
|
|
|
}
|
|
|
|
|
2019-01-17 03:38:11 +00:00
|
|
|
static void spec_reference(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
peg_arity(b, argc, 1, 2);
|
2019-01-14 20:06:35 +00:00
|
|
|
Reserve r = reserve(b, 3);
|
2019-01-17 03:38:11 +00:00
|
|
|
uint32_t search = emit_tag(b, argv[0]);
|
|
|
|
uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
|
2021-01-06 01:51:00 +00:00
|
|
|
b->has_backref = 1;
|
2019-01-17 03:38:11 +00:00
|
|
|
emit_2(r, RULE_GETTAG, search, tag);
|
|
|
|
}
|
|
|
|
|
2019-08-24 22:57:01 +00:00
|
|
|
static void spec_tag1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
|
2019-01-17 03:38:11 +00:00
|
|
|
peg_arity(b, argc, 0, 1);
|
|
|
|
Reserve r = reserve(b, 2);
|
|
|
|
uint32_t tag = (argc) ? emit_tag(b, argv[0]) : 0;
|
|
|
|
(void) argv;
|
2019-08-24 22:57:01 +00:00
|
|
|
emit_1(r, op, tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void spec_position(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_tag1(b, argc, argv, RULE_POSITION);
|
|
|
|
}
|
2020-11-27 00:32:56 +00:00
|
|
|
static void spec_line(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_tag1(b, argc, argv, RULE_LINE);
|
|
|
|
}
|
|
|
|
static void spec_column(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_tag1(b, argc, argv, RULE_COLUMN);
|
|
|
|
}
|
2019-08-24 22:57:01 +00:00
|
|
|
|
|
|
|
static void spec_backmatch(Builder *b, int32_t argc, const Janet *argv) {
|
2021-01-06 01:51:00 +00:00
|
|
|
b->has_backref = 1;
|
2019-08-24 22:57:01 +00:00
|
|
|
spec_tag1(b, argc, argv, RULE_BACKMATCH);
|
2019-01-17 03:38:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void spec_argument(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
peg_arity(b, argc, 1, 2);
|
|
|
|
Reserve r = reserve(b, 3);
|
|
|
|
uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
|
|
|
|
int32_t index = peg_getnat(b, argv[0]);
|
|
|
|
emit_2(r, RULE_ARGUMENT, index, tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void spec_constant(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
janet_arity(argc, 1, 2);
|
|
|
|
Reserve r = reserve(b, 3);
|
|
|
|
uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
|
|
|
|
emit_2(r, RULE_CONSTANT, emit_constant(b, argv[0]), tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void spec_replace(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
peg_arity(b, argc, 2, 3);
|
|
|
|
Reserve r = reserve(b, 4);
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t subrule = peg_compile1(b, argv[0]);
|
2019-01-17 03:38:11 +00:00
|
|
|
uint32_t constant = emit_constant(b, argv[1]);
|
|
|
|
uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
|
|
|
|
emit_3(r, RULE_REPLACE, subrule, constant, tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
peg_arity(b, argc, 2, 3);
|
|
|
|
Reserve r = reserve(b, 4);
|
2019-02-17 04:33:24 +00:00
|
|
|
uint32_t subrule = peg_compile1(b, argv[0]);
|
2019-01-14 16:45:45 +00:00
|
|
|
Janet fun = argv[1];
|
|
|
|
if (!janet_checktype(fun, JANET_FUNCTION) &&
|
|
|
|
!janet_checktype(fun, JANET_CFUNCTION)) {
|
|
|
|
peg_panicf(b, "expected function|cfunction, got %v", fun);
|
|
|
|
}
|
2019-01-17 03:38:11 +00:00
|
|
|
uint32_t tag = (argc == 3) ? emit_tag(b, argv[2]) : 0;
|
2019-01-14 16:45:45 +00:00
|
|
|
uint32_t cindex = emit_constant(b, fun);
|
2019-01-17 03:38:11 +00:00
|
|
|
emit_3(r, RULE_MATCHTIME, subrule, cindex, tag);
|
2019-01-14 16:45:45 +00:00
|
|
|
}
|
|
|
|
|
2020-09-27 17:18:12 +00:00
|
|
|
#ifdef JANET_INT_TYPES
|
|
|
|
#define JANET_MAX_READINT_WIDTH 8
|
|
|
|
#else
|
|
|
|
#define JANET_MAX_READINT_WIDTH 6
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static void spec_readint(Builder *b, int32_t argc, const Janet *argv, uint32_t mask) {
|
|
|
|
peg_arity(b, argc, 1, 2);
|
|
|
|
Reserve r = reserve(b, 3);
|
2021-02-09 23:33:46 +00:00
|
|
|
uint32_t tag = (argc == 2) ? emit_tag(b, argv[1]) : 0;
|
2020-09-27 17:18:12 +00:00
|
|
|
int32_t width = peg_getnat(b, argv[0]);
|
|
|
|
if ((width < 0) || (width > JANET_MAX_READINT_WIDTH)) {
|
|
|
|
peg_panicf(b, "width must be between 0 and %d, got %d", JANET_MAX_READINT_WIDTH, width);
|
|
|
|
}
|
|
|
|
emit_2(r, RULE_READINT, mask | ((uint32_t) width), tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void spec_uint_le(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_readint(b, argc, argv, 0x0u);
|
|
|
|
}
|
|
|
|
static void spec_int_le(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_readint(b, argc, argv, 0x10u);
|
|
|
|
}
|
|
|
|
static void spec_uint_be(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_readint(b, argc, argv, 0x20u);
|
|
|
|
}
|
|
|
|
static void spec_int_be(Builder *b, int32_t argc, const Janet *argv) {
|
|
|
|
spec_readint(b, argc, argv, 0x30u);
|
|
|
|
}
|
|
|
|
|
2019-01-14 16:45:45 +00:00
|
|
|
/* Special compiler form */
|
2019-01-14 20:06:35 +00:00
|
|
|
typedef void (*Special)(Builder *b, int32_t argc, const Janet *argv);
|
2019-01-12 15:16:25 +00:00
|
|
|
typedef struct {
|
|
|
|
const char *name;
|
2019-01-14 04:47:11 +00:00
|
|
|
Special special;
|
|
|
|
} SpecialPair;
|
|
|
|
|
2019-01-15 21:04:47 +00:00
|
|
|
/* Keep in lexical order (vim :sort works well) */
|
2019-02-17 04:33:24 +00:00
|
|
|
static const SpecialPair peg_specials[] = {
|
2019-01-14 04:47:11 +00:00
|
|
|
{"!", spec_not},
|
2019-01-15 03:17:13 +00:00
|
|
|
{"$", spec_position},
|
2019-01-17 02:11:55 +00:00
|
|
|
{"%", spec_accumulate},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"*", spec_sequence},
|
|
|
|
{"+", spec_choice},
|
2019-01-17 03:38:11 +00:00
|
|
|
{"->", spec_reference},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"/", spec_replace},
|
2019-01-15 01:41:32 +00:00
|
|
|
{"<-", spec_capture},
|
2019-01-14 04:47:11 +00:00
|
|
|
{">", spec_look},
|
2019-01-15 19:08:03 +00:00
|
|
|
{"?", spec_opt},
|
2019-01-17 02:11:55 +00:00
|
|
|
{"accumulate", spec_accumulate},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"any", spec_any},
|
|
|
|
{"argument", spec_argument},
|
|
|
|
{"at-least", spec_atleast},
|
|
|
|
{"at-most", spec_atmost},
|
2019-08-24 22:57:01 +00:00
|
|
|
{"backmatch", spec_backmatch},
|
2019-01-17 03:38:11 +00:00
|
|
|
{"backref", spec_reference},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"between", spec_between},
|
|
|
|
{"capture", spec_capture},
|
|
|
|
{"choice", spec_choice},
|
2019-01-14 16:45:45 +00:00
|
|
|
{"cmt", spec_matchtime},
|
2020-11-27 00:32:56 +00:00
|
|
|
{"column", spec_column},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"constant", spec_constant},
|
2019-01-17 23:10:04 +00:00
|
|
|
{"drop", spec_drop},
|
2019-01-15 21:04:47 +00:00
|
|
|
{"error", spec_error},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"group", spec_group},
|
2019-01-15 01:41:32 +00:00
|
|
|
{"if", spec_if},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"if-not", spec_ifnot},
|
2020-09-27 17:18:12 +00:00
|
|
|
{"int", spec_int_le},
|
|
|
|
{"int-be", spec_int_be},
|
2020-05-02 15:37:39 +00:00
|
|
|
{"lenprefix", spec_lenprefix},
|
2020-11-27 00:32:56 +00:00
|
|
|
{"line", spec_line},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"look", spec_look},
|
|
|
|
{"not", spec_not},
|
2021-09-19 18:02:16 +00:00
|
|
|
{"number", spec_capture_number},
|
2019-01-15 19:08:03 +00:00
|
|
|
{"opt", spec_opt},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"position", spec_position},
|
2019-01-18 00:28:42 +00:00
|
|
|
{"quote", spec_capture},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"range", spec_range},
|
2020-01-15 01:58:03 +00:00
|
|
|
{"repeat", spec_repeat},
|
2019-01-14 04:47:11 +00:00
|
|
|
{"replace", spec_replace},
|
|
|
|
{"sequence", spec_sequence},
|
|
|
|
{"set", spec_set},
|
|
|
|
{"some", spec_some},
|
2020-06-11 02:18:50 +00:00
|
|
|
{"thru", spec_thru},
|
|
|
|
{"to", spec_to},
|
2020-09-27 17:18:12 +00:00
|
|
|
{"uint", spec_uint_le},
|
|
|
|
{"uint-be", spec_uint_be},
|
2021-02-26 23:25:09 +00:00
|
|
|
{"unref", spec_unref},
|
2019-01-12 00:22:24 +00:00
|
|
|
};
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
/* Compile a janet value into a rule and return the rule index. */
|
2019-02-17 04:33:24 +00:00
|
|
|
static uint32_t peg_compile1(Builder *b, Janet peg) {
|
2019-01-14 04:47:11 +00:00
|
|
|
|
|
|
|
/* Keep track of the form being compiled for error purposes */
|
|
|
|
Janet old_form = b->form;
|
2019-08-30 13:47:27 +00:00
|
|
|
JanetTable *old_grammar = b->grammar;
|
2019-01-14 04:47:11 +00:00
|
|
|
b->form = peg;
|
|
|
|
|
2019-08-30 13:47:27 +00:00
|
|
|
/* Resolve keyword references */
|
|
|
|
int i = JANET_RECURSION_GUARD;
|
|
|
|
JanetTable *grammar = old_grammar;
|
|
|
|
for (; i > 0 && janet_checktype(peg, JANET_KEYWORD); --i) {
|
2019-12-15 02:39:14 +00:00
|
|
|
Janet nextPeg = janet_table_get_ex(grammar, peg, &grammar);
|
|
|
|
if (!grammar || janet_checktype(nextPeg, JANET_NIL)) {
|
2021-05-21 02:57:22 +00:00
|
|
|
nextPeg = (b->default_grammar == NULL)
|
2021-05-28 20:12:05 +00:00
|
|
|
? janet_wrap_nil()
|
|
|
|
: janet_table_get(b->default_grammar, peg);
|
2019-12-15 02:39:14 +00:00
|
|
|
if (janet_checktype(nextPeg, JANET_NIL)) {
|
|
|
|
peg_panic(b, "unknown rule");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
peg = nextPeg;
|
2019-08-30 13:47:27 +00:00
|
|
|
b->form = peg;
|
|
|
|
b->grammar = grammar;
|
|
|
|
}
|
|
|
|
if (i == 0)
|
|
|
|
peg_panic(b, "reference chain too deep");
|
|
|
|
|
|
|
|
/* Check cache - for tuples we check only the local cache, as
|
|
|
|
* in a different grammar, the same tuple can compile to a different
|
|
|
|
* rule - for example, (+ :a :b) depends on whatever :a and :b are bound to. */
|
|
|
|
Janet check = janet_checktype(peg, JANET_TUPLE)
|
|
|
|
? janet_table_rawget(grammar, peg)
|
|
|
|
: janet_table_get(grammar, peg);
|
|
|
|
if (!janet_checktype(check, JANET_NIL)) {
|
|
|
|
b->form = old_form;
|
|
|
|
b->grammar = old_grammar;
|
|
|
|
return (uint32_t) janet_unwrap_number(check);
|
|
|
|
}
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
/* Check depth */
|
2019-08-30 13:47:27 +00:00
|
|
|
if (b->depth-- == 0)
|
2019-01-14 04:47:11 +00:00
|
|
|
peg_panic(b, "peg grammar recursed too deeply");
|
|
|
|
|
|
|
|
/* The final rule to return */
|
2019-01-14 20:06:35 +00:00
|
|
|
uint32_t rule = janet_v_count(b->bytecode);
|
2019-08-30 00:09:43 +00:00
|
|
|
|
2019-08-30 13:47:27 +00:00
|
|
|
/* Add to cache. Do not cache structs, as we don't yet know
|
|
|
|
* what rule they will return! We can just as effectively cache
|
|
|
|
* the structs main rule. */
|
|
|
|
if (!janet_checktype(peg, JANET_STRUCT)) {
|
|
|
|
JanetTable *which_grammar = grammar;
|
|
|
|
/* If we are a primitive pattern, add to the global cache (root grammar table) */
|
|
|
|
if (!janet_checktype(peg, JANET_TUPLE)) {
|
|
|
|
while (which_grammar->proto)
|
|
|
|
which_grammar = which_grammar->proto;
|
|
|
|
}
|
|
|
|
janet_table_put(which_grammar, peg, janet_wrap_number(rule));
|
|
|
|
}
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
switch (janet_type(peg)) {
|
2019-01-12 00:22:24 +00:00
|
|
|
default:
|
2019-04-06 15:38:00 +00:00
|
|
|
peg_panic(b, "unexpected peg source");
|
2019-01-14 04:47:11 +00:00
|
|
|
return 0;
|
2019-02-20 01:51:34 +00:00
|
|
|
case JANET_NUMBER: {
|
|
|
|
int32_t n = peg_getinteger(b, peg);
|
|
|
|
Reserve r = reserve(b, 2);
|
|
|
|
if (n < 0) {
|
|
|
|
emit_1(r, RULE_NOTNCHAR, -n);
|
|
|
|
} else {
|
|
|
|
emit_1(r, RULE_NCHAR, n);
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case JANET_STRING: {
|
|
|
|
const uint8_t *str = janet_unwrap_string(peg);
|
|
|
|
int32_t len = janet_string_length(str);
|
|
|
|
emit_bytes(b, RULE_LITERAL, len, str);
|
|
|
|
break;
|
|
|
|
}
|
2021-10-23 14:59:36 +00:00
|
|
|
case JANET_TABLE: {
|
|
|
|
/* Build grammar table */
|
|
|
|
JanetTable *new_grammar = janet_table_clone(janet_unwrap_table(peg));
|
|
|
|
new_grammar->proto = grammar;
|
|
|
|
b->grammar = grammar = new_grammar;
|
|
|
|
/* Run the main rule */
|
|
|
|
Janet main_rule = janet_table_rawget(grammar, janet_ckeywordv("main"));
|
|
|
|
if (janet_checktype(main_rule, JANET_NIL))
|
|
|
|
peg_panic(b, "grammar requires :main rule");
|
|
|
|
rule = peg_compile1(b, main_rule);
|
|
|
|
break;
|
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
case JANET_STRUCT: {
|
2019-08-30 13:47:27 +00:00
|
|
|
/* Build grammar table */
|
|
|
|
const JanetKV *st = janet_unwrap_struct(peg);
|
|
|
|
JanetTable *new_grammar = janet_table(2 * janet_struct_capacity(st));
|
|
|
|
for (int32_t i = 0; i < janet_struct_capacity(st); i++) {
|
|
|
|
if (janet_checktype(st[i].key, JANET_KEYWORD)) {
|
|
|
|
janet_table_put(new_grammar, st[i].key, st[i].value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
new_grammar->proto = grammar;
|
|
|
|
b->grammar = grammar = new_grammar;
|
|
|
|
/* Run the main rule */
|
|
|
|
Janet main_rule = janet_table_rawget(grammar, janet_ckeywordv("main"));
|
|
|
|
if (janet_checktype(main_rule, JANET_NIL))
|
|
|
|
peg_panic(b, "grammar requires :main rule");
|
|
|
|
rule = peg_compile1(b, main_rule);
|
2019-02-20 01:51:34 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case JANET_TUPLE: {
|
|
|
|
const Janet *tup = janet_unwrap_tuple(peg);
|
|
|
|
int32_t len = janet_tuple_length(tup);
|
|
|
|
if (len == 0) peg_panic(b, "tuple in grammar must have non-zero length");
|
2020-06-11 16:23:43 +00:00
|
|
|
if (janet_checkint(tup[0])) {
|
|
|
|
int32_t n = janet_unwrap_integer(tup[0]);
|
|
|
|
if (n < 0) {
|
|
|
|
peg_panicf(b, "expected non-negative integer, got %d", n);
|
|
|
|
}
|
|
|
|
spec_repeat(b, len, tup);
|
|
|
|
break;
|
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
if (!janet_checktype(tup[0], JANET_SYMBOL))
|
|
|
|
peg_panicf(b, "expected grammar command, found %v", tup[0]);
|
|
|
|
const uint8_t *sym = janet_unwrap_symbol(tup[0]);
|
|
|
|
const SpecialPair *sp = janet_strbinsearch(
|
|
|
|
&peg_specials,
|
|
|
|
sizeof(peg_specials) / sizeof(SpecialPair),
|
|
|
|
sizeof(SpecialPair),
|
|
|
|
sym);
|
2019-02-22 17:10:27 +00:00
|
|
|
if (sp) {
|
|
|
|
sp->special(b, len - 1, tup + 1);
|
|
|
|
} else {
|
2019-02-20 01:51:34 +00:00
|
|
|
peg_panicf(b, "unknown special %S", sym);
|
2019-02-22 17:10:27 +00:00
|
|
|
}
|
2019-02-20 01:51:34 +00:00
|
|
|
break;
|
|
|
|
}
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
2019-01-14 04:47:11 +00:00
|
|
|
|
|
|
|
/* Increase depth again */
|
|
|
|
b->depth++;
|
|
|
|
b->form = old_form;
|
2019-08-30 13:47:27 +00:00
|
|
|
b->grammar = old_grammar;
|
2019-01-14 04:47:11 +00:00
|
|
|
return rule;
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
|
|
|
|
2019-01-14 04:47:11 +00:00
|
|
|
/*
|
|
|
|
* Post-Compilation
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int peg_mark(void *p, size_t size) {
|
|
|
|
(void) size;
|
2020-03-14 15:12:47 +00:00
|
|
|
JanetPeg *peg = (JanetPeg *)p;
|
2019-06-20 03:23:27 +00:00
|
|
|
if (NULL != peg->constants)
|
|
|
|
for (uint32_t i = 0; i < peg->num_constants; i++)
|
|
|
|
janet_mark(peg->constants[i]);
|
2019-01-14 04:47:11 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-06-18 03:40:02 +00:00
|
|
|
static void peg_marshal(void *p, JanetMarshalContext *ctx) {
|
2020-03-14 15:12:47 +00:00
|
|
|
JanetPeg *peg = (JanetPeg *)p;
|
2019-06-18 03:40:02 +00:00
|
|
|
janet_marshal_size(ctx, peg->bytecode_len);
|
|
|
|
janet_marshal_int(ctx, (int32_t)peg->num_constants);
|
2019-12-07 04:12:18 +00:00
|
|
|
janet_marshal_abstract(ctx, p);
|
2019-06-18 03:40:02 +00:00
|
|
|
for (size_t i = 0; i < peg->bytecode_len; i++)
|
|
|
|
janet_marshal_int(ctx, (int32_t) peg->bytecode[i]);
|
|
|
|
for (uint32_t j = 0; j < peg->num_constants; j++)
|
|
|
|
janet_marshal_janet(ctx, peg->constants[j]);
|
|
|
|
}
|
2019-01-14 04:47:11 +00:00
|
|
|
|
2019-02-24 18:43:38 +00:00
|
|
|
/* Used to ensure that if we place several arrays in one memory chunk, each
|
|
|
|
* array will be correctly aligned */
|
|
|
|
static size_t size_padded(size_t offset, size_t size) {
|
|
|
|
size_t x = size + offset - 1;
|
|
|
|
return x - (x % size);
|
|
|
|
}
|
|
|
|
|
2019-12-07 04:12:18 +00:00
|
|
|
static void *peg_unmarshal(JanetMarshalContext *ctx) {
|
|
|
|
size_t bytecode_len = janet_unmarshal_size(ctx);
|
|
|
|
uint32_t num_constants = (uint32_t) janet_unmarshal_int(ctx);
|
2019-06-18 03:40:02 +00:00
|
|
|
|
|
|
|
/* Calculate offsets. Should match those in make_peg */
|
2020-03-14 15:12:47 +00:00
|
|
|
size_t bytecode_start = size_padded(sizeof(JanetPeg), sizeof(uint32_t));
|
2019-12-07 04:12:18 +00:00
|
|
|
size_t bytecode_size = bytecode_len * sizeof(uint32_t);
|
2019-06-18 03:40:02 +00:00
|
|
|
size_t constants_start = size_padded(bytecode_start + bytecode_size, sizeof(Janet));
|
2020-01-03 04:02:57 +00:00
|
|
|
size_t total_size = constants_start + sizeof(Janet) * (size_t) num_constants;
|
2019-12-07 04:12:18 +00:00
|
|
|
|
|
|
|
/* DOS prevention? I.E. we could read bytecode and constants before
|
|
|
|
* hand so we don't allocated a ton of memory on bad, short input */
|
|
|
|
|
|
|
|
/* Allocate PEG */
|
|
|
|
char *mem = janet_unmarshal_abstract(ctx, total_size);
|
2020-03-14 15:12:47 +00:00
|
|
|
JanetPeg *peg = (JanetPeg *)mem;
|
2019-06-18 03:40:02 +00:00
|
|
|
uint32_t *bytecode = (uint32_t *)(mem + bytecode_start);
|
|
|
|
Janet *constants = (Janet *)(mem + constants_start);
|
2019-06-20 03:23:27 +00:00
|
|
|
peg->bytecode = NULL;
|
|
|
|
peg->constants = NULL;
|
2019-12-07 04:12:18 +00:00
|
|
|
peg->bytecode_len = bytecode_len;
|
|
|
|
peg->num_constants = num_constants;
|
2019-06-20 16:37:57 +00:00
|
|
|
|
2019-06-18 03:40:02 +00:00
|
|
|
for (size_t i = 0; i < peg->bytecode_len; i++)
|
|
|
|
bytecode[i] = (uint32_t) janet_unmarshal_int(ctx);
|
|
|
|
for (uint32_t j = 0; j < peg->num_constants; j++)
|
|
|
|
constants[j] = janet_unmarshal_janet(ctx);
|
|
|
|
|
2019-06-18 17:00:23 +00:00
|
|
|
/* After here, no panics except for the bad: label. */
|
|
|
|
|
|
|
|
/* Keep track at each index if an instruction was
|
|
|
|
* reference (0x01) or is in a main bytecode position
|
|
|
|
* (0x02). This lets us do a linear scan and not
|
|
|
|
* need to a depth first traversal. It is stricter
|
|
|
|
* than a dfs by not allowing certain kinds of unused
|
|
|
|
* bytecode. */
|
2019-06-19 13:45:56 +00:00
|
|
|
uint32_t blen = (int32_t) peg->bytecode_len;
|
2019-06-18 17:00:23 +00:00
|
|
|
uint32_t clen = peg->num_constants;
|
2021-03-23 10:00:48 +00:00
|
|
|
uint8_t *op_flags = janet_calloc(1, blen);
|
2019-06-18 17:00:23 +00:00
|
|
|
if (NULL == op_flags) {
|
|
|
|
JANET_OUT_OF_MEMORY;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* verify peg bytecode */
|
2021-01-06 01:51:00 +00:00
|
|
|
int32_t has_backref = 0;
|
2019-06-18 17:00:23 +00:00
|
|
|
uint32_t i = 0;
|
|
|
|
while (i < blen) {
|
|
|
|
uint32_t instr = bytecode[i];
|
|
|
|
uint32_t *rule = bytecode + i;
|
|
|
|
op_flags[i] |= 0x02;
|
|
|
|
switch (instr & 0x1F) {
|
|
|
|
case RULE_LITERAL:
|
|
|
|
i += 2 + ((rule[1] + 3) >> 2);
|
|
|
|
break;
|
|
|
|
case RULE_NCHAR:
|
|
|
|
case RULE_NOTNCHAR:
|
|
|
|
case RULE_RANGE:
|
|
|
|
case RULE_POSITION:
|
2020-11-27 00:32:56 +00:00
|
|
|
case RULE_LINE:
|
|
|
|
case RULE_COLUMN:
|
2021-01-06 01:51:00 +00:00
|
|
|
/* [1 word] */
|
|
|
|
i += 2;
|
|
|
|
break;
|
2019-08-24 22:57:01 +00:00
|
|
|
case RULE_BACKMATCH:
|
2019-06-18 17:00:23 +00:00
|
|
|
/* [1 word] */
|
|
|
|
i += 2;
|
2021-01-06 01:51:00 +00:00
|
|
|
has_backref = 1;
|
2019-06-18 17:00:23 +00:00
|
|
|
break;
|
|
|
|
case RULE_SET:
|
|
|
|
/* [8 words] */
|
|
|
|
i += 9;
|
|
|
|
break;
|
|
|
|
case RULE_LOOK:
|
|
|
|
/* [offset, rule] */
|
|
|
|
if (rule[2] >= blen) goto bad;
|
|
|
|
op_flags[rule[2]] |= 0x1;
|
|
|
|
i += 3;
|
|
|
|
break;
|
|
|
|
case RULE_CHOICE:
|
|
|
|
case RULE_SEQUENCE:
|
|
|
|
/* [len, rules...] */
|
|
|
|
{
|
|
|
|
uint32_t len = rule[1];
|
|
|
|
for (uint32_t j = 0; j < len; j++) {
|
|
|
|
if (rule[2 + j] >= blen) goto bad;
|
|
|
|
op_flags[rule[2 + j]] |= 0x1;
|
|
|
|
}
|
|
|
|
i += 2 + len;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case RULE_IF:
|
|
|
|
case RULE_IFNOT:
|
2020-05-02 15:37:39 +00:00
|
|
|
case RULE_LENPREFIX:
|
2019-06-18 17:00:23 +00:00
|
|
|
/* [rule_a, rule_b (b if not a)] */
|
|
|
|
if (rule[1] >= blen) goto bad;
|
|
|
|
if (rule[2] >= blen) goto bad;
|
|
|
|
op_flags[rule[1]] |= 0x01;
|
|
|
|
op_flags[rule[2]] |= 0x01;
|
|
|
|
i += 3;
|
|
|
|
break;
|
|
|
|
case RULE_BETWEEN:
|
|
|
|
/* [lo, hi, rule] */
|
|
|
|
if (rule[3] >= blen) goto bad;
|
|
|
|
op_flags[rule[3]] |= 0x01;
|
|
|
|
i += 4;
|
|
|
|
break;
|
|
|
|
case RULE_ARGUMENT:
|
2021-01-06 01:51:00 +00:00
|
|
|
/* [searchtag, tag] */
|
|
|
|
i += 3;
|
|
|
|
break;
|
2019-06-18 17:00:23 +00:00
|
|
|
case RULE_GETTAG:
|
|
|
|
/* [searchtag, tag] */
|
|
|
|
i += 3;
|
2021-01-06 01:51:00 +00:00
|
|
|
has_backref = 1;
|
2019-06-18 17:00:23 +00:00
|
|
|
break;
|
|
|
|
case RULE_CONSTANT:
|
|
|
|
/* [constant, tag] */
|
|
|
|
if (rule[1] >= clen) goto bad;
|
|
|
|
i += 3;
|
|
|
|
break;
|
2021-09-21 23:02:42 +00:00
|
|
|
case RULE_CAPTURE_NUM:
|
|
|
|
/* [rule, base, tag] */
|
|
|
|
if (rule[1] >= blen) goto bad;
|
|
|
|
op_flags[rule[1]] |= 0x01;
|
|
|
|
i += 4;
|
|
|
|
break;
|
2019-06-18 17:00:23 +00:00
|
|
|
case RULE_ACCUMULATE:
|
|
|
|
case RULE_GROUP:
|
|
|
|
case RULE_CAPTURE:
|
2021-02-26 23:25:09 +00:00
|
|
|
case RULE_UNREF:
|
2019-06-18 17:00:23 +00:00
|
|
|
/* [rule, tag] */
|
|
|
|
if (rule[1] >= blen) goto bad;
|
|
|
|
op_flags[rule[1]] |= 0x01;
|
|
|
|
i += 3;
|
|
|
|
break;
|
|
|
|
case RULE_REPLACE:
|
|
|
|
case RULE_MATCHTIME:
|
|
|
|
/* [rule, constant, tag] */
|
|
|
|
if (rule[1] >= blen) goto bad;
|
|
|
|
if (rule[2] >= clen) goto bad;
|
|
|
|
op_flags[rule[1]] |= 0x01;
|
2019-06-20 03:23:27 +00:00
|
|
|
i += 4;
|
2019-06-18 17:00:23 +00:00
|
|
|
break;
|
|
|
|
case RULE_ERROR:
|
|
|
|
case RULE_DROP:
|
|
|
|
case RULE_NOT:
|
2020-06-11 02:18:50 +00:00
|
|
|
case RULE_TO:
|
|
|
|
case RULE_THRU:
|
2019-06-18 17:00:23 +00:00
|
|
|
/* [rule] */
|
|
|
|
if (rule[1] >= blen) goto bad;
|
|
|
|
op_flags[rule[1]] |= 0x01;
|
|
|
|
i += 2;
|
|
|
|
break;
|
2020-09-27 17:18:12 +00:00
|
|
|
case RULE_READINT:
|
|
|
|
/* [ width | (endianess << 5) | (signedness << 6), tag ] */
|
|
|
|
if (rule[1] > JANET_MAX_READINT_WIDTH) goto bad;
|
|
|
|
i += 3;
|
|
|
|
break;
|
2019-06-18 17:00:23 +00:00
|
|
|
default:
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* last instruction cannot overflow */
|
|
|
|
if (i != blen) goto bad;
|
|
|
|
|
|
|
|
/* Make sure all referenced instructions are actually
|
|
|
|
* in instruction positions. */
|
|
|
|
for (i = 0; i < blen; i++)
|
|
|
|
if (op_flags[i] == 0x01) goto bad;
|
|
|
|
|
|
|
|
/* Good return */
|
2019-06-20 03:23:27 +00:00
|
|
|
peg->bytecode = bytecode;
|
|
|
|
peg->constants = constants;
|
2021-01-06 01:51:00 +00:00
|
|
|
peg->has_backref = has_backref;
|
2021-03-23 10:00:48 +00:00
|
|
|
janet_free(op_flags);
|
2019-12-07 04:12:18 +00:00
|
|
|
return peg;
|
2019-06-18 17:00:23 +00:00
|
|
|
|
|
|
|
bad:
|
2021-03-23 10:00:48 +00:00
|
|
|
janet_free(op_flags);
|
2019-06-18 17:00:23 +00:00
|
|
|
janet_panic("invalid peg bytecode");
|
2019-06-18 03:40:02 +00:00
|
|
|
}
|
|
|
|
|
2019-12-15 02:39:14 +00:00
|
|
|
static int cfun_peg_getter(JanetAbstract a, Janet key, Janet *out);
|
2021-01-12 05:14:07 +00:00
|
|
|
static Janet peg_next(void *p, Janet key);
|
2019-12-15 02:39:14 +00:00
|
|
|
|
2020-03-14 15:12:47 +00:00
|
|
|
const JanetAbstractType janet_peg_type = {
|
2019-06-18 03:40:02 +00:00
|
|
|
"core/peg",
|
|
|
|
NULL,
|
|
|
|
peg_mark,
|
2019-12-15 02:39:14 +00:00
|
|
|
cfun_peg_getter,
|
2021-01-12 05:14:07 +00:00
|
|
|
NULL, /* put */
|
2019-06-18 03:40:02 +00:00
|
|
|
peg_marshal,
|
|
|
|
peg_unmarshal,
|
2021-01-12 05:14:07 +00:00
|
|
|
NULL, /* tostring */
|
|
|
|
NULL, /* compare */
|
|
|
|
NULL, /* hash */
|
|
|
|
peg_next,
|
|
|
|
JANET_ATEND_NEXT
|
2019-06-18 03:40:02 +00:00
|
|
|
};
|
|
|
|
|
2020-03-14 15:12:47 +00:00
|
|
|
/* Convert Builder to JanetPeg (Janet Abstract Value) */
|
|
|
|
static JanetPeg *make_peg(Builder *b) {
|
|
|
|
size_t bytecode_start = size_padded(sizeof(JanetPeg), sizeof(uint32_t));
|
2019-01-14 04:47:11 +00:00
|
|
|
size_t bytecode_size = janet_v_count(b->bytecode) * sizeof(uint32_t);
|
2019-02-24 18:43:38 +00:00
|
|
|
size_t constants_start = size_padded(bytecode_start + bytecode_size, sizeof(Janet));
|
2019-01-14 04:47:11 +00:00
|
|
|
size_t constants_size = janet_v_count(b->constants) * sizeof(Janet);
|
2019-02-24 18:43:38 +00:00
|
|
|
size_t total_size = constants_start + constants_size;
|
2020-03-14 15:12:47 +00:00
|
|
|
char *mem = janet_abstract(&janet_peg_type, total_size);
|
|
|
|
JanetPeg *peg = (JanetPeg *)mem;
|
2019-02-24 18:43:38 +00:00
|
|
|
peg->bytecode = (uint32_t *)(mem + bytecode_start);
|
|
|
|
peg->constants = (Janet *)(mem + constants_start);
|
2019-01-14 04:47:11 +00:00
|
|
|
peg->num_constants = janet_v_count(b->constants);
|
2020-01-29 05:38:52 +00:00
|
|
|
safe_memcpy(peg->bytecode, b->bytecode, bytecode_size);
|
|
|
|
safe_memcpy(peg->constants, b->constants, constants_size);
|
2019-06-18 03:40:02 +00:00
|
|
|
peg->bytecode_len = janet_v_count(b->bytecode);
|
2021-01-06 01:51:00 +00:00
|
|
|
peg->has_backref = b->has_backref;
|
2019-01-14 04:47:11 +00:00
|
|
|
return peg;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Compiler entry point */
|
2020-03-14 15:12:47 +00:00
|
|
|
static JanetPeg *compile_peg(Janet x) {
|
2019-01-14 04:47:11 +00:00
|
|
|
Builder builder;
|
|
|
|
builder.grammar = janet_table(0);
|
2021-01-23 19:54:02 +00:00
|
|
|
builder.default_grammar = NULL;
|
|
|
|
{
|
|
|
|
Janet default_grammarv = janet_dyn("peg-grammar");
|
|
|
|
if (janet_checktype(default_grammarv, JANET_TABLE)) {
|
|
|
|
builder.default_grammar = janet_unwrap_table(default_grammarv);
|
|
|
|
}
|
|
|
|
}
|
2019-01-17 03:38:11 +00:00
|
|
|
builder.tags = janet_table(0);
|
2019-01-14 04:47:11 +00:00
|
|
|
builder.constants = NULL;
|
|
|
|
builder.bytecode = NULL;
|
2019-01-17 03:38:11 +00:00
|
|
|
builder.nexttag = 1;
|
2019-01-14 04:47:11 +00:00
|
|
|
builder.form = x;
|
|
|
|
builder.depth = JANET_RECURSION_GUARD;
|
2021-01-06 01:51:00 +00:00
|
|
|
builder.has_backref = 0;
|
2019-02-17 04:33:24 +00:00
|
|
|
peg_compile1(&builder, x);
|
2020-03-14 15:12:47 +00:00
|
|
|
JanetPeg *peg = make_peg(&builder);
|
2019-01-14 04:47:11 +00:00
|
|
|
builder_cleanup(&builder);
|
|
|
|
return peg;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* C Functions
|
|
|
|
*/
|
|
|
|
|
2021-07-25 19:54:25 +00:00
|
|
|
JANET_CORE_FN(cfun_peg_compile,
|
2021-07-26 02:47:52 +00:00
|
|
|
"(peg/compile peg)",
|
|
|
|
"Compiles a peg source data structure into a <core/peg>. This will speed up matching "
|
|
|
|
"if the same peg will be used multiple times. Will also use `(dyn :peg-grammar)` to suppliment "
|
|
|
|
"the grammar of the peg for otherwise undefined peg keywords.") {
|
2019-01-14 04:47:11 +00:00
|
|
|
janet_fixarity(argc, 1);
|
2020-03-14 15:12:47 +00:00
|
|
|
JanetPeg *peg = compile_peg(argv[0]);
|
2019-01-14 04:47:11 +00:00
|
|
|
return janet_wrap_abstract(peg);
|
|
|
|
}
|
2019-01-12 00:22:24 +00:00
|
|
|
|
2020-06-30 00:13:06 +00:00
|
|
|
/* Common data for peg cfunctions */
|
|
|
|
typedef struct {
|
2020-03-14 15:12:47 +00:00
|
|
|
JanetPeg *peg;
|
2020-06-30 00:13:06 +00:00
|
|
|
PegState s;
|
|
|
|
JanetByteView bytes;
|
2020-07-02 02:26:11 +00:00
|
|
|
JanetByteView repl;
|
2020-06-30 00:13:06 +00:00
|
|
|
int32_t start;
|
|
|
|
} PegCall;
|
|
|
|
|
|
|
|
/* Initialize state for peg cfunctions */
|
2020-07-02 02:26:11 +00:00
|
|
|
static PegCall peg_cfun_init(int32_t argc, Janet *argv, int get_replace) {
|
2020-06-30 00:13:06 +00:00
|
|
|
PegCall ret;
|
2020-07-02 02:26:11 +00:00
|
|
|
int32_t min = get_replace ? 3 : 2;
|
|
|
|
janet_arity(argc, get_replace, -1);
|
2019-01-14 04:47:11 +00:00
|
|
|
if (janet_checktype(argv[0], JANET_ABSTRACT) &&
|
2020-03-14 15:12:47 +00:00
|
|
|
janet_abstract_type(janet_unwrap_abstract(argv[0])) == &janet_peg_type) {
|
2020-06-30 00:13:06 +00:00
|
|
|
ret.peg = janet_unwrap_abstract(argv[0]);
|
2019-01-14 04:47:11 +00:00
|
|
|
} else {
|
2020-06-30 00:13:06 +00:00
|
|
|
ret.peg = compile_peg(argv[0]);
|
2019-01-14 04:47:11 +00:00
|
|
|
}
|
2020-07-02 02:26:11 +00:00
|
|
|
if (get_replace) {
|
|
|
|
ret.repl = janet_getbytes(argv, 1);
|
|
|
|
ret.bytes = janet_getbytes(argv, 2);
|
|
|
|
} else {
|
|
|
|
ret.bytes = janet_getbytes(argv, 1);
|
|
|
|
}
|
|
|
|
if (argc > min) {
|
|
|
|
ret.start = janet_gethalfrange(argv, min, ret.bytes.len, "offset");
|
|
|
|
ret.s.extrac = argc - min - 1;
|
|
|
|
ret.s.extrav = janet_tuple_n(argv + min + 1, argc - min - 1);
|
2019-01-12 16:04:47 +00:00
|
|
|
} else {
|
2020-06-30 00:13:06 +00:00
|
|
|
ret.start = 0;
|
|
|
|
ret.s.extrac = 0;
|
|
|
|
ret.s.extrav = NULL;
|
|
|
|
}
|
|
|
|
ret.s.mode = PEG_MODE_NORMAL;
|
|
|
|
ret.s.text_start = ret.bytes.bytes;
|
|
|
|
ret.s.text_end = ret.bytes.bytes + ret.bytes.len;
|
|
|
|
ret.s.depth = JANET_RECURSION_GUARD;
|
|
|
|
ret.s.captures = janet_array(0);
|
2021-01-06 01:51:00 +00:00
|
|
|
ret.s.tagged_captures = janet_array(0);
|
2020-06-30 00:13:06 +00:00
|
|
|
ret.s.scratch = janet_buffer(10);
|
|
|
|
ret.s.tags = janet_buffer(10);
|
|
|
|
ret.s.constants = ret.peg->constants;
|
|
|
|
ret.s.bytecode = ret.peg->bytecode;
|
2020-11-27 00:32:56 +00:00
|
|
|
ret.s.linemap = NULL;
|
|
|
|
ret.s.linemaplen = -1;
|
2021-01-06 01:51:00 +00:00
|
|
|
ret.s.has_backref = ret.peg->has_backref;
|
2020-06-30 00:13:06 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-07-02 02:26:11 +00:00
|
|
|
static void peg_call_reset(PegCall *c) {
|
2022-09-12 23:58:48 +00:00
|
|
|
c->s.depth = JANET_RECURSION_GUARD;
|
2020-07-02 02:26:11 +00:00
|
|
|
c->s.captures->count = 0;
|
2022-09-12 23:58:48 +00:00
|
|
|
c->s.tagged_captures->count = 0;
|
2020-07-02 02:26:11 +00:00
|
|
|
c->s.scratch->count = 0;
|
|
|
|
c->s.tags->count = 0;
|
|
|
|
}
|
|
|
|
|
2021-07-25 19:54:25 +00:00
|
|
|
JANET_CORE_FN(cfun_peg_match,
|
2021-07-26 02:47:52 +00:00
|
|
|
"(peg/match peg text &opt start & args)",
|
|
|
|
"Match a Parsing Expression Grammar to a byte string and return an array of captured values. "
|
|
|
|
"Returns nil if text does not match the language defined by peg. The syntax of PEGs is documented on the Janet website.") {
|
2020-07-02 02:26:11 +00:00
|
|
|
PegCall c = peg_cfun_init(argc, argv, 0);
|
2020-06-30 00:13:06 +00:00
|
|
|
const uint8_t *result = peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + c.start);
|
|
|
|
return result ? janet_wrap_array(c.s.captures) : janet_wrap_nil();
|
|
|
|
}
|
|
|
|
|
2021-07-25 19:54:25 +00:00
|
|
|
JANET_CORE_FN(cfun_peg_find,
|
2021-07-26 02:47:52 +00:00
|
|
|
"(peg/find peg text &opt start & args)",
|
|
|
|
"Find first index where the peg matches in text. Returns an integer, or nil if not found.") {
|
2020-07-02 02:26:11 +00:00
|
|
|
PegCall c = peg_cfun_init(argc, argv, 0);
|
2020-06-30 00:13:06 +00:00
|
|
|
for (int32_t i = c.start; i < c.bytes.len; i++) {
|
2020-07-02 02:26:11 +00:00
|
|
|
peg_call_reset(&c);
|
2020-06-30 00:13:06 +00:00
|
|
|
if (peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i))
|
|
|
|
return janet_wrap_integer(i);
|
|
|
|
}
|
|
|
|
return janet_wrap_nil();
|
|
|
|
}
|
|
|
|
|
2021-07-25 19:54:25 +00:00
|
|
|
JANET_CORE_FN(cfun_peg_find_all,
|
2021-07-26 02:47:52 +00:00
|
|
|
"(peg/find-all peg text &opt start & args)",
|
|
|
|
"Find all indexes where the peg matches in text. Returns an array of integers.") {
|
2020-07-02 02:26:11 +00:00
|
|
|
PegCall c = peg_cfun_init(argc, argv, 0);
|
2020-06-30 00:13:06 +00:00
|
|
|
JanetArray *ret = janet_array(0);
|
|
|
|
for (int32_t i = c.start; i < c.bytes.len; i++) {
|
2020-07-02 02:26:11 +00:00
|
|
|
peg_call_reset(&c);
|
2020-06-30 00:13:06 +00:00
|
|
|
if (peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i))
|
|
|
|
janet_array_push(ret, janet_wrap_integer(i));
|
2019-01-12 16:04:47 +00:00
|
|
|
}
|
2020-06-30 00:13:06 +00:00
|
|
|
return janet_wrap_array(ret);
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
|
|
|
|
2020-07-02 02:26:11 +00:00
|
|
|
static Janet cfun_peg_replace_generic(int32_t argc, Janet *argv, int only_one) {
|
|
|
|
PegCall c = peg_cfun_init(argc, argv, 1);
|
|
|
|
JanetBuffer *ret = janet_buffer(0);
|
|
|
|
int32_t trail = 0;
|
|
|
|
for (int32_t i = c.start; i < c.bytes.len;) {
|
|
|
|
peg_call_reset(&c);
|
|
|
|
const uint8_t *result = peg_rule(&c.s, c.s.bytecode, c.bytes.bytes + i);
|
|
|
|
if (NULL != result) {
|
|
|
|
if (trail < i) {
|
|
|
|
janet_buffer_push_bytes(ret, c.bytes.bytes + trail, (i - trail));
|
|
|
|
trail = i;
|
|
|
|
}
|
2020-07-03 17:25:24 +00:00
|
|
|
int32_t nexti = (int32_t)(result - c.bytes.bytes);
|
2020-07-02 02:26:11 +00:00
|
|
|
janet_buffer_push_bytes(ret, c.repl.bytes, c.repl.len);
|
|
|
|
trail = nexti;
|
|
|
|
if (nexti == i) nexti++;
|
|
|
|
i = nexti;
|
|
|
|
if (only_one) break;
|
|
|
|
} else {
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (trail < c.bytes.len) {
|
|
|
|
janet_buffer_push_bytes(ret, c.bytes.bytes + trail, (c.bytes.len - trail));
|
|
|
|
}
|
|
|
|
return janet_wrap_buffer(ret);
|
|
|
|
}
|
|
|
|
|
2021-07-25 19:54:25 +00:00
|
|
|
JANET_CORE_FN(cfun_peg_replace_all,
|
2021-07-26 02:47:52 +00:00
|
|
|
"(peg/replace-all peg repl text &opt start & args)",
|
|
|
|
"Replace all matches of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement.") {
|
2020-07-02 02:26:11 +00:00
|
|
|
return cfun_peg_replace_generic(argc, argv, 0);
|
|
|
|
}
|
|
|
|
|
2021-07-25 19:54:25 +00:00
|
|
|
JANET_CORE_FN(cfun_peg_replace,
|
2021-07-26 02:47:52 +00:00
|
|
|
"(peg/replace peg repl text &opt start & args)",
|
|
|
|
"Replace first match of peg in text with repl, returning a new buffer. The peg does not need to make captures to do replacement. "
|
|
|
|
"If no matches are found, returns the input string in a new buffer.") {
|
2020-07-02 02:26:11 +00:00
|
|
|
return cfun_peg_replace_generic(argc, argv, 1);
|
|
|
|
}
|
|
|
|
|
2020-06-30 00:13:06 +00:00
|
|
|
static JanetMethod peg_methods[] = {
|
|
|
|
{"match", cfun_peg_match},
|
|
|
|
{"find", cfun_peg_find},
|
|
|
|
{"find-all", cfun_peg_find_all},
|
2020-07-02 02:26:11 +00:00
|
|
|
{"replace", cfun_peg_replace},
|
|
|
|
{"replace-all", cfun_peg_replace_all},
|
2020-06-30 00:13:06 +00:00
|
|
|
{NULL, NULL}
|
|
|
|
};
|
|
|
|
|
2019-12-15 02:39:14 +00:00
|
|
|
static int cfun_peg_getter(JanetAbstract a, Janet key, Janet *out) {
|
|
|
|
(void) a;
|
2020-06-30 00:13:06 +00:00
|
|
|
if (!janet_checktype(key, JANET_KEYWORD))
|
|
|
|
return 0;
|
|
|
|
return janet_getmethod(janet_unwrap_keyword(key), peg_methods, out);
|
2019-12-15 02:39:14 +00:00
|
|
|
}
|
|
|
|
|
2021-01-12 05:14:07 +00:00
|
|
|
static Janet peg_next(void *p, Janet key) {
|
|
|
|
(void) p;
|
|
|
|
return janet_nextmethod(peg_methods, key);
|
|
|
|
}
|
|
|
|
|
2019-01-12 00:22:24 +00:00
|
|
|
/* Load the peg module */
|
|
|
|
void janet_lib_peg(JanetTable *env) {
|
2021-07-25 19:54:25 +00:00
|
|
|
JanetRegExt cfuns[] = {
|
|
|
|
JANET_CORE_REG("peg/compile", cfun_peg_compile),
|
|
|
|
JANET_CORE_REG("peg/match", cfun_peg_match),
|
|
|
|
JANET_CORE_REG("peg/find", cfun_peg_find),
|
|
|
|
JANET_CORE_REG("peg/find-all", cfun_peg_find_all),
|
|
|
|
JANET_CORE_REG("peg/replace", cfun_peg_replace),
|
|
|
|
JANET_CORE_REG("peg/replace-all", cfun_peg_replace_all),
|
|
|
|
JANET_REG_END
|
|
|
|
};
|
|
|
|
janet_core_cfuns_ext(env, NULL, cfuns);
|
2020-03-14 15:12:47 +00:00
|
|
|
janet_register_abstract_type(&janet_peg_type);
|
2019-01-12 00:22:24 +00:00
|
|
|
}
|
2019-02-18 02:22:03 +00:00
|
|
|
|
|
|
|
#endif /* ifdef JANET_PEG */
|