From ff0d3a0081b4735ec71a024010c13148402ea920 Mon Sep 17 00:00:00 2001 From: Calvin Rose Date: Sun, 13 Jan 2019 23:47:11 -0500 Subject: [PATCH] Compile pegs to bytecode with (peg/compile). Peg performance is improved, and peg syntax has been expanded with a few more keywords. --- src/core/peg.c | 1231 ++++++++++++++++++++++++++++++--------------- src/core/vector.h | 1 - test/suite3.janet | 43 +- 3 files changed, 855 insertions(+), 420 deletions(-) diff --git a/src/core/peg.c b/src/core/peg.c index 54e66d2c..1f3d67d9 100644 --- a/src/core/peg.c +++ b/src/core/peg.c @@ -23,478 +23,888 @@ #include #include #include "util.h" +#include "vector.h" -/* Potential opcodes for peg vm. - * These are not yet implemented, but efficiently express the current semantics - * of the current implementation. +/* + * Runtime + */ + +/* opcodes for peg vm */ typedef enum { - POP_LITERAL, [len, bytes...] - POP_NCHAR, [n] - POP_RANGE, [lo | hi << 16 (1 word)] - POP_SET, [bitmap (8 words)] - POP_LOOK, [offset, rule] - POP_CHOICE, [len, rules...] - POP_SEQUENCE, [len, rules...] - POP_IFNOT, [a, b (a if not b)] - POP_NOT, [a] - POP_ATLEAST, [n, rule] - POP_BETWEEN, [lo, hi, rule] - POP_CAPTURE, [rule] - POP_POSITION, [] - POP_SUBSTITUTE, [rule] - POP_GROUP, [rule] - POP_CONSTANT, [constant] - POP_REPLACE, [constant] - POP_REPINDEX, [capture index] - POP_ARGUMENT [argument index] + RULE_LITERAL, /* [len, bytes...] */ + RULE_NCHAR, /* [n] */ + RULE_NOTNCHAR, /* [n] */ + RULE_RANGE, /* [lo | hi << 16 (1 word)] */ + RULE_SET, /* [bitmap (8 words)] */ + RULE_LOOK, /* [offset, rule] */ + RULE_CHOICE, /* [len, rules...] */ + RULE_SEQUENCE, /* [len, rules...] */ + RULE_IFNOT, /* [rule_a, rule_b (a if not b)] */ + RULE_NOT, /* [rule] */ + RULE_BETWEEN, /* [lo, hi, rule] */ + RULE_CAPTURE, /* [rule] */ + RULE_POSITION, /* [] */ + RULE_ARGUMENT, /* [argument-index] */ + RULE_REPINDEX, /* [capture-index] */ + RULE_BACKINDEX, /* [capture-index] */ + RULE_CONSTANT, /* [constant] */ + RULE_SUBSTITUTE, /* [rule] */ + RULE_GROUP, /* [rule] */ + RULE_REPLACE, /* [rule, constant] */ } Opcode; -*/ - -/* TODO - * - Compilation - compile peg to binary form - one grammar, patterns reference each other by index - * and bytecode "opcodes" identify primitive patterns and pattern "constructors". Main pattern is - * pattern index 0. The logic of patterns would not change much, but we could elide arity checking, - * expensive keyword lookups, and unused patterns. We could also combine certain pattern types into - * more efficient types. */ /* Hold captured patterns and match state */ typedef struct { const uint8_t *text_start; const uint8_t *text_end; const uint8_t *subst_end; - JanetTable *grammar; + const uint32_t *bytecode; + const Janet *constants; JanetArray *captures; JanetBuffer *scratch; const Janet *extrav; int32_t extrac; int32_t depth; enum { - PEG_NORMAL, - PEG_SUBSTITUTE, - PEG_NOCAPTURE + PEG_MODE_NORMAL, + PEG_MODE_SUBSTITUTE, + PEG_MODE_NOCAPTURE } mode; -} State; +} PegState; -/* Forward declaration */ -static int32_t match(State *s, Janet peg, const uint8_t *text); +/* Allow backtrack with captures. We need + * to save state at branches, and then reload + * if one branch fails and try a new branch. */ +typedef struct { + const uint8_t *subst_end; + int32_t cap; + int32_t scratch; +} CapState; -/* Special matcher form */ -typedef int32_t (*Matcher)(State *s, int32_t argc, const Janet *argv, const uint8_t *text); - -/* A "Matcher" is a function that is used to match a pattern at an anchored position. It takes some - * optional arguments, and returns either the number of bytes matched, or -1 for no match. It can also - * append values to the capture array of State *s, panic on bad arguments, and call match recursively. */ - -/* - * Primitive Pattern Types - */ - -/* Match a character range */ -static int32_t match_range(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - if (s->text_end <= text) - return -1; - for (int32_t i = 0; i < argc; i++) { - const uint8_t *range = janet_getstring(argv, i); - int32_t length = janet_string_length(range); - if (length != 2) janet_panicf("arguments to range must have length 2"); - uint8_t lo = range[0]; - uint8_t hi = range[1]; - if (text[0] >= lo && text[0] <= hi) return 1; - } - return -1; +/* Save the current capture state */ +static CapState cap_save(PegState *s) { + CapState cs; + cs.subst_end = s->subst_end; + cs.scratch = s->scratch->count; + cs.cap = s->captures->count; + return cs; } -/* Match 1 of any character in argv[0] */ -static int32_t match_set(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 1); - const uint8_t *set = janet_getstring(argv, 0); - int32_t len = janet_string_length(set); - if (s->text_end <= text) return -1; - for (int32_t i = 0; i < len; i++) - if (set[i] == text[0]) return 1; - return -1; +/* Load a saved capture state in the case of failure */ +static void cap_load(PegState *s, CapState cs) { + s->subst_end = cs.subst_end; + s->scratch->count = cs.scratch; + s->captures->count = cs.cap; } -/* - * Look ahead/behind - */ - -/* Match 0 length if match argv[0] */ -static int32_t match_lookahead(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 1); - return match(s, argv[0], text) >= 0 ? 0 : -1; -} - -/* Match 0 length if match argv[0] at text + offset */ -static int32_t match_lookoffset(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 2); - text += janet_getinteger(argv, 0); - if (text < s->text_start || text > s->text_end) - return -1; - return match(s, argv[1], text) >= 0 ? 0 : -1; -} - -/* - * Combining Pattern Types - */ - -/* Match the first of argv[0], argv[1], argv[2], ... */ -static int32_t match_choice(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - for (int32_t i = 0; i < argc; i++) { - int32_t result = match(s, argv[i], text); - if (result >= 0) return result; - } - return -1; -} - -/* Match argv[0] then argv[1] then argv[2] ... Fail if any match fails. */ -static int32_t match_sequence(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - int32_t traversed = 0; - for (int32_t i = 0; i < argc; i++) { - int32_t result = match(s, argv[i], text + traversed); - if (result < 0) return -1; - traversed += result; - } - return traversed; -} - -/* Match argv[0] if not argv[1] */ -static int32_t match_minus(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 2); - if (match(s, argv[1], text) < 0) - return match(s, argv[0], text); - return -1; -} - -/* Match zero length if not match argv[0] */ -static int32_t match_not(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 1); - if (match(s, argv[0], text) < 0) - return 0; - return -1; -} - -/* Match at least argv[0] repetitions of argv[1]. Will match as many repetitions as possible. */ -static int32_t match_atleast(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 2); - int32_t n = janet_getinteger(argv, 0); - int32_t captured = 0; - int32_t total_length = 0; - int32_t result; - /* Greedy match until match fails */ - while ((result = match(s, argv[1], text + total_length)) > 0) { - captured++; - total_length += result; - } - return captured >= n ? total_length : -1; -} - -/* Match at most argv[0] repetitions of argv[1]. Will match as many repetitions as possible. */ -static int32_t match_atmost(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 2); - int32_t n = janet_getinteger(argv, 0); - int32_t captured = 0; - int32_t total_length = 0; - int32_t result; - /* Greedy match until match fails or n captured */ - while (captured < n && (result = match(s, argv[1], text + total_length)) > 0) { - captured++; - total_length += result; - } - /* always matches */ - return total_length; -} - -/* Match between argv[0] and argv[1] repetitions of argv[2]. Will match as many repetitions as possible. */ -static int32_t match_between(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 3); - int32_t lo = janet_getinteger(argv, 0); - int32_t hi = janet_getinteger(argv, 1); - int32_t captured = 0; - int32_t total_length = 0; - int32_t result; - /* Greedy match until match fails or n captured */ - while (captured < hi && (result = match(s, argv[2], text + total_length)) > 0) { - captured++; - total_length += result; - } - /* always matches */ - return captured >= lo ? total_length : -1; -} - -/* - * Captures - */ - -static void push_capture(State *s, Janet capture, const uint8_t *text, int32_t nbytes) { - if (s->mode == PEG_SUBSTITUTE) { - /* Substitution mode, append as string to scratch buffer */ - /* But first append in-between text */ +/* Add a capture */ +static void pushcap(PegState *s, + Janet capture, + const uint8_t *text, + const uint8_t *result) { + if (s->mode == PEG_MODE_SUBSTITUTE) { janet_buffer_push_bytes(s->scratch, s->subst_end, text - s->subst_end); janet_to_string_b(s->scratch, capture); - s->subst_end = text + nbytes; - } else if (s->mode == PEG_NORMAL) { - /* Normal mode, append to captures */ + s->subst_end = result; + } else if (s->mode == PEG_MODE_NORMAL) { janet_array_push(s->captures, capture); } } -/* Capture a value */ -static int32_t match_capture(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 1); - int oldmode = s->mode; - /* We can't have overlapping captures during substitution, so we can - * turn off the child captures if subsituting */ - if (s->mode == PEG_SUBSTITUTE) s->mode = PEG_NOCAPTURE; - int32_t result = match(s, argv[0], text); - s->mode = oldmode; - if (result < 0) return -1; - push_capture(s, janet_stringv(text, result), text, result); - return result; -} +/* Prevent stack overflow */ +#define down1(s) do { \ + if (0 == --((s)->depth)) janet_panic("peg/match recursed too deeply"); \ +} while (0) +#define up1(s) ((s)->depth++) -/* Capture position */ -static int32_t match_position(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 0); - (void) argv; - push_capture(s, janet_wrap_number(text - s->text_start), text, 0); - return 0; -} - -/* Capture group */ -static int32_t match_group(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 1); - int32_t old_count = s->captures->count; - - int oldmode = s->mode; - if (oldmode != PEG_NOCAPTURE) s->mode = PEG_NORMAL; - int32_t result = match(s, argv[0], text); - s->mode = oldmode; - if (result < 0) return -1; - - if (oldmode != PEG_NOCAPTURE) { - /* Collect sub-captures into an array by popping new values off of the capture stack, - * and then putting them in a new array. Then, push the new array back onto the capture stack. */ - int32_t num_sub_captures = s->captures->count - old_count; - JanetArray *sub_captures = janet_array(num_sub_captures); - memcpy(sub_captures->data, s->captures->data + old_count, sizeof(Janet) * num_sub_captures); - sub_captures->count = num_sub_captures; - s->captures->count = old_count; - push_capture(s, janet_wrap_array(sub_captures), text, result); - } - - return result; -} - -/* Capture a constant */ -static int32_t match_capture_constant(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 1); - push_capture(s, argv[0], text, 0); - return 0; -} - -/* Capture nth extra argument to peg/match */ -static int32_t match_capture_arg(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 1); - int32_t n = janet_getargindex(argv, 0, s->extrac, "n"); - push_capture(s, s->extrav[n], text, 0); - return 0; -} - -/* Capture replace */ -static int32_t match_replace(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { - janet_fixarity(argc, 2); - - int oldmode = s->mode; - int32_t old_count = s->captures->count; - if (oldmode == PEG_SUBSTITUTE) s->mode = PEG_NORMAL; - int32_t result = match(s, argv[0], text); - s->mode = oldmode; - - if (result < 0) return -1; - if (oldmode == PEG_NOCAPTURE) return result; - - Janet capture; - switch (janet_type(argv[1])) { +/* Evaluate a peg rule */ +static const uint8_t *peg_rule( + PegState *s, + const uint32_t *rule, + const uint8_t *text) { +tail: + switch(*rule & 0x1F) { default: - capture = argv[1]; - break; - case JANET_STRUCT: - capture = janet_struct_get(janet_unwrap_struct(argv[1]), janet_stringv(text, result)); - break; - case JANET_TABLE: - capture = janet_table_get(janet_unwrap_table(argv[1]), janet_stringv(text, result)); - break; - case JANET_CFUNCTION: + janet_panic("unexpected opcode"); + return NULL; + case RULE_LITERAL: { - janet_array_push(s->captures, janet_stringv(text, result)); - JanetCFunction cfunc = janet_unwrap_cfunction(argv[1]); - capture = cfunc(s->captures->count - old_count, s->captures->data + old_count); - break; + uint32_t len = rule[1]; + if (text + len > s->text_end) return NULL; + return memcmp(text, rule + 2, len) ? NULL : text + len; } - case JANET_FUNCTION: + case RULE_NCHAR: { - janet_array_push(s->captures, janet_stringv(text, result)); - capture = janet_call(janet_unwrap_function(argv[1]), - s->captures->count - old_count, s->captures->data + old_count); - break; + uint32_t n = rule[1]; + return (text + n > s->text_end) ? NULL : text + n; } - case JANET_NUMBER: + case RULE_NOTNCHAR: { - int32_t index = janet_getargindex(argv, 1, s->captures->count, "capture"); - capture = s->captures->data[index]; - break; + uint32_t n = rule[1]; + return (text + n > s->text_end) ? text : NULL; + } + case RULE_RANGE: + { + uint8_t lo = rule[1] & 0xFF; + uint8_t hi = (rule[1] >> 16) & 0xFF; + return (text < s->text_end && + text[0] >= lo && + text[0] <= hi) + ? text + 1 + : NULL; + } + case RULE_SET: + { + uint32_t word = rule[1 + (text[0] >> 5)]; + uint32_t mask = (uint32_t)1 << (text[0] & 0x1F); + return (text < s->text_end && (word & mask)) + ? text + 1 + : NULL; + } + case RULE_LOOK: + { + text += ((int32_t *)rule)[1]; + if (text < s->text_start || text > s->text_end) return NULL; + int oldmode = s->mode; + s->mode = PEG_MODE_NOCAPTURE; + down1(s); + const uint8_t *result = peg_rule(s, s->bytecode + rule[2], text); + up1(s); + s->mode = oldmode; + return result ? text : NULL; + } + case RULE_CHOICE: + { + uint32_t len = rule[1]; + const uint32_t *args = rule + 2; + if (len == 0) return NULL; + down1(s); + CapState cs = cap_save(s); + for (uint32_t i = 0; i < len - 1; i++) { + const uint8_t *result = peg_rule(s, s->bytecode + args[i], text); + if (result) { + up1(s); + return result; + } + cap_load(s, cs); + } + up1(s); + rule = s->bytecode + args[len - 1]; + goto tail; + } + case RULE_SEQUENCE: + { + uint32_t len = rule[1]; + const uint32_t *args = rule + 2; + if (len == 0) return text; + down1(s); + for (uint32_t i = 0; text && i < len - 1; i++) + text = peg_rule(s, s->bytecode + args[i], text); + up1(s); + if (!text) return NULL; + rule = s->bytecode + args[len - 1]; + goto tail; + } + case RULE_IFNOT: + { + const uint32_t *rule_a = s->bytecode + rule[1]; + const uint32_t *rule_b = s->bytecode + rule[2]; + int oldmode = s->mode; + s->mode = PEG_MODE_NOCAPTURE; + down1(s); + const uint8_t *result = peg_rule(s, rule_b, text); + up1(s); + s->mode = oldmode; + if (result) return NULL; + rule = rule_a; + goto tail; + } + case RULE_NOT: + { + const uint32_t *rule_a = s->bytecode + rule[1]; + int oldmode = s->mode; + s->mode = PEG_MODE_NOCAPTURE; + down1(s); + const uint8_t *result = peg_rule(s, rule_a, text); + up1(s); + s->mode = oldmode; + return (result) ? NULL : text; + } + case RULE_BETWEEN: + { + uint32_t lo = rule[1]; + uint32_t hi = rule[2]; + const uint32_t *rule_a = s->bytecode + rule[3]; + uint32_t captured = 0; + const uint8_t *next_text; + CapState cs = cap_save(s); + down1(s); + while (captured < hi && (next_text = peg_rule(s, rule_a, text))) { + captured++; + text = next_text; + } + up1(s); + if (captured < lo) { + cap_load(s, cs); + return NULL; + } + return text; + } + case RULE_POSITION: + { + pushcap(s, janet_wrap_number(text - s->text_start), text, text); + return text; + } + case RULE_ARGUMENT: + { + int32_t index = ((int32_t *)rule)[1]; + Janet capture = (index >= s->extrac) ? janet_wrap_nil() : s->extrav[index]; + pushcap(s, capture, text, text); + return text; + } + case RULE_REPINDEX: + { + int32_t index = ((int32_t *)rule)[1]; + if (index >= s->captures->count) + janet_panic("invalid capture index"); + Janet capture = s->captures->data[index]; + pushcap(s, capture, text, text); + return text; + } + case RULE_BACKINDEX: + { + int32_t index = ((int32_t *)rule)[1]; + if (index >= s->captures->count) + janet_panic("invalid capture index"); + Janet capture = s->captures->data[s->captures->count - 1 - index]; + pushcap(s, capture, text, text); + return text; + } + case RULE_CONSTANT: + { + pushcap(s, s->constants[rule[1]], text, text); + return text; + } + case RULE_CAPTURE: + { + int oldmode = s->mode; + if (oldmode == PEG_MODE_NOCAPTURE) { + rule = s->bytecode + rule[1]; + goto tail; + } + if (oldmode == PEG_MODE_SUBSTITUTE) s->mode = PEG_MODE_NOCAPTURE; + down1(s); + const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text); + up1(s); + s->mode = oldmode; + if (!result) return NULL; + pushcap(s, janet_stringv(text, result - text), text, result); + return result; + } + case RULE_SUBSTITUTE: + case RULE_GROUP: + case RULE_REPLACE: + { + /* In no-capture mode, all captures simply become their matching pattern */ + int oldmode = s->mode; + if (oldmode == PEG_MODE_NOCAPTURE) { + rule = s->bytecode + rule[1]; + goto tail; + } + + /* Save previous state. Will use this to reload state before + * pushing grammar. Each of these rules pushes exactly 1 new + * capture, regardless of the sub rule. */ + CapState cs = cap_save(s); + + /* Set sub mode as needed. Modes affect how captures are recorded (pushed to stack, + * pushed to byte buffer, or ignored) */ + if (rule[0] == RULE_GROUP) s->mode = PEG_MODE_NORMAL; + if (rule[0] == RULE_REPLACE) s->mode = PEG_MODE_NORMAL; + if (rule[0] == RULE_SUBSTITUTE) { + s->mode = PEG_MODE_SUBSTITUTE; + s->subst_end = text; + } + + down1(s); + const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text); + up1(s); + s->mode = oldmode; + if (!result) return NULL; + + /* The replacement capture */ + Janet cap; + + /* Figure out what to push base on opcode */ + if (rule[0] == RULE_GROUP) { + int32_t num_sub_captures = s->captures->count - cs.cap; + JanetArray *sub_captures = janet_array(num_sub_captures); + memcpy(sub_captures->data, + s->captures->data + cs.cap, + sizeof(Janet) * num_sub_captures); + sub_captures->count = num_sub_captures; + cap = janet_wrap_array(sub_captures); + + } else if (rule[0] == RULE_SUBSTITUTE) { + janet_buffer_push_bytes(s->scratch, s->subst_end, result - s->subst_end); + cap = janet_stringv(s->scratch->data + cs.scratch, + s->scratch->count - cs.scratch); + + } else { /* RULE_REPLACE */ + Janet constant = s->constants[rule[2]]; + switch (janet_type(constant)) { + default: + cap = constant; + break; + case JANET_STRUCT: + cap = janet_struct_get(janet_unwrap_struct(constant), + janet_stringv(text, result - text)); + break; + case JANET_TABLE: + cap = janet_table_get(janet_unwrap_table(constant), + janet_stringv(text, result - text)); + break; + case JANET_CFUNCTION: + janet_array_push(s->captures, + janet_stringv(text, result - text)); + JanetCFunction cfunc = janet_unwrap_cfunction(constant); + cap = cfunc(s->captures->count - cs.cap, + s->captures->data + cs.cap); + break; + case JANET_FUNCTION: + cap = janet_stringv(text, result - text); + janet_array_push(s->captures, + janet_stringv(text, result - text)); + cap = janet_call(janet_unwrap_function(constant), + s->captures->count - cs.cap, + s->captures->data + cs.cap); + break; + } + } + + /* Reset old state and then push capture */ + cap_load(s, cs); + pushcap(s, cap, text, result); + return result; } } - s->captures->count = old_count; - push_capture(s, capture, text, result); - return result; } -/* Substitution capture */ -static int32_t match_substitute(State *s, int32_t argc, const Janet *argv, const uint8_t *text) { +/* + * Compilation + */ + +typedef struct { + JanetTable *grammar; + JanetTable *memoized; + Janet *constants; + uint32_t *bytecode; + Janet form; + int depth; +} Builder; + +/* Forward declaration to allow recursion */ +static uint32_t compile1(Builder *b, Janet peg); + +/* + * Emission + */ + +static uint32_t emit_constant(Builder *b, Janet c) { + uint32_t cindex = (uint32_t) janet_v_count(b->constants); + janet_v_push(b->constants, c); + return cindex; +} + +/* Emit a rule in the builder. Returns the index of the new rule */ +static uint32_t emit_rule(Builder *b, uint32_t op, int32_t n, const uint32_t *body) { + uint32_t next_rule = janet_v_count(b->bytecode); + janet_v_push(b->bytecode, op); + for (int32_t i = 0; i < n; i++) + janet_v_push(b->bytecode, body[i]); + return next_rule; +} + +/* For RULE_LITERAL */ +static uint32_t emit_bytes(Builder *b, uint32_t op, int32_t len, const uint8_t *bytes) { + uint32_t next_rule = janet_v_count(b->bytecode); + janet_v_push(b->bytecode, op); + janet_v_push(b->bytecode, len); + int32_t words = ((len + 3) >> 2); + for (int32_t i = 0; i < words; i++) + janet_v_push(b->bytecode, 0); + memcpy(b->bytecode + next_rule + 2, bytes, len); + return next_rule; +} + +/* For fixed arity rules of arities 1, 2, and 3 */ +static uint32_t emit_1(Builder *b, uint32_t op, uint32_t arg) { + return emit_rule(b, op, 1, &arg); +} +static uint32_t emit_2(Builder *b, uint32_t op, uint32_t arg1, uint32_t arg2) { + uint32_t arr[2] = {arg1, arg2}; + return emit_rule(b, op, 2, arr); +} +static uint32_t emit_3(Builder *b, uint32_t op, uint32_t arg1, uint32_t arg2, uint32_t arg3) { + uint32_t arr[3] = {arg1, arg2, arg3}; + return emit_rule(b, op, 3, arr); +} + +/* + * Errors + */ + +static void builder_cleanup(Builder *b) { + janet_v_free(b->constants); + janet_v_free(b->bytecode); +} + +static void peg_panic(Builder *b, const char *msg) { + builder_cleanup(b); + janet_panicf("grammar error in %p, %s", b->form, msg); +} + +#define peg_panicf(b,...) peg_panic((b), (const char *) janet_formatc(__VA_ARGS__)) + +static void peg_fixarity(Builder *b, int32_t argc, int32_t arity) { + if (argc != arity) { + peg_panicf(b, "expected %d argument%s, got %d%", + arity, + arity == 1 ? "" : "s", + argc); + } +} + +static void peg_arity(Builder *b, int32_t arity, int32_t min, int32_t max) { + if (min >= 0 && arity < min) + peg_panicf(b, "arity mismatch, expected at least %d, got %d", min, arity); + if (max >= 0 && arity > max) + peg_panicf(b, "arity mismatch, expected at most %d, got %d", max, arity); +} + +static const uint8_t *peg_getset(Builder *b, Janet x) { + if (!janet_checktype(x, JANET_STRING)) + peg_panicf(b, "expected string for character set"); + const uint8_t *str = janet_unwrap_string(x); + return str; +} + +static const uint8_t *peg_getrange(Builder *b, Janet x) { + if (!janet_checktype(x, JANET_STRING)) + peg_panicf(b, "expected string for character range"); + const uint8_t *str = janet_unwrap_string(x); + if (janet_string_length(str) != 2) + peg_panicf(b, "expected string to have length 2, got %v", x); + if (str[1] < str[0]) + peg_panicf(b, "range %v is empty", x); + return str; +} + +static int32_t peg_getinteger(Builder *b, Janet x) { + if (!janet_checkint(x)) + peg_panicf(b, "expected integer, got %v", x); + return janet_unwrap_integer(x); +} + +static int32_t peg_getnat(Builder *b, Janet x) { + int32_t i = peg_getinteger(b, x); + if (i < 0) + peg_panicf(b, "expected non-negative integer, got %v", x); + return i; +} + +/* + * Specials + */ + +/* Special matcher form */ +typedef uint32_t (*Special)(Builder *b, int32_t argc, const Janet *argv); + +static void bitmap_set(uint32_t *bitmap, uint8_t c) { + bitmap[c >> 5] |= ((uint32_t)1) << (c & 0x1F); +} + +static uint32_t spec_range(Builder *b, int32_t argc, const Janet *argv) { + peg_arity(b, argc, 1, -1); + if (argc == 1) { + const uint8_t *str = peg_getrange(b, argv[0]); + uint32_t arg = str[0] | (str[1] << 16); + return emit_1(b, RULE_RANGE, arg); + } else { + /* Compile as a set */ + uint32_t bitmap[8] = {0}; + for (int32_t i = 0; i < argc; i++) { + const uint8_t *str = peg_getrange(b, argv[i]); + for (uint32_t c = str[0]; c <= str[1]; c++) + bitmap_set(bitmap, c); + } + return emit_rule(b, RULE_SET, 8, bitmap); + } +} + +static uint32_t spec_set(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 1); + const uint8_t *str = peg_getset(b, argv[0]); + uint32_t bitmap[8] = {0}; + for (int32_t i = 0; i < janet_string_length(str); i++) + bitmap_set(bitmap, str[i]); + return emit_rule(b, RULE_SET, 8, bitmap); +} + +static uint32_t spec_look(Builder *b, int32_t argc, const Janet *argv) { + peg_arity(b, argc, 1, 2); + int32_t rulearg = argc == 2 ? 1 : 0; + int32_t offset = argc == 2 ? peg_getinteger(b, argv[0]) : 0; + uint32_t subrule = compile1(b, argv[rulearg]); + return emit_2(b, RULE_LOOK, (uint32_t) offset, subrule); +} + +/* Rule of the form [len, rules...] */ +static uint32_t spec_variadic(Builder *b, int32_t argc, const Janet *argv, uint32_t op) { + uint32_t rule = janet_v_count(b->bytecode); + janet_v_push(b->bytecode, op); + janet_v_push(b->bytecode, argc); + for (int32_t i = 0; i < argc; i++) + janet_v_push(b->bytecode, 0); + for (int32_t i = 0; i < argc; i++) { + uint32_t rulei = compile1(b, argv[i]); + b->bytecode[rule + 2 + i] = rulei; + } + return rule; +} + +static uint32_t spec_choice(Builder *b, int32_t argc, const Janet *argv) { + return spec_variadic(b, argc, argv, RULE_CHOICE); +} +static uint32_t spec_sequence(Builder *b, int32_t argc, const Janet *argv) { + return spec_variadic(b, argc, argv, RULE_SEQUENCE); +} + +static uint32_t spec_ifnot(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 2); + uint32_t rule_a = compile1(b, argv[0]); + uint32_t rule_b = compile1(b, argv[1]); + return emit_2(b, RULE_IFNOT, rule_a, rule_b); +} + +/* Rule of the form [rule] */ +static uint32_t spec_onerule(Builder *b, int32_t argc, const Janet *argv, uint32_t op) { + peg_fixarity(b, argc, 1); + uint32_t rule = compile1(b, argv[0]); + return emit_1(b, op, rule); +} + +static uint32_t spec_not(Builder *b, int32_t argc, const Janet *argv) { + return spec_onerule(b, argc, argv, RULE_NOT); +} +static uint32_t spec_capture(Builder *b, int32_t argc, const Janet *argv) { + return spec_onerule(b, argc, argv, RULE_CAPTURE); +} +static uint32_t spec_substitute(Builder *b, int32_t argc, const Janet *argv) { + return spec_onerule(b, argc, argv, RULE_SUBSTITUTE); +} +static uint32_t spec_group(Builder *b, int32_t argc, const Janet *argv) { + return spec_onerule(b, argc, argv, RULE_GROUP); +} + +static uint32_t spec_exponent(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 2); + int32_t n = peg_getinteger(b, argv[1]); + uint32_t subrule = compile1(b, argv[0]); + if (n < 0) { + return emit_3(b, RULE_BETWEEN, 0, -n, subrule); + } else { + return emit_3(b, RULE_BETWEEN, n, UINT32_MAX, subrule); + } +} + +static uint32_t spec_between(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 3); + int32_t lo = peg_getnat(b, argv[0]); + int32_t hi = peg_getnat(b, argv[1]); + uint32_t subrule = compile1(b, argv[2]); + return emit_3(b, RULE_BETWEEN, lo, hi, subrule); +} + +static uint32_t spec_position(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 0); + (void) argv; + return emit_rule(b, RULE_POSITION, 0, NULL); +} + +static uint32_t spec_reference(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 1); + int32_t index = peg_getinteger(b, argv[0]); + if (index < 0) { + return emit_1(b, RULE_BACKINDEX, -index); + } else { + return emit_1(b, RULE_REPINDEX, index); + } +} + +static uint32_t spec_argument(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 1); + int32_t index = peg_getinteger(b, argv[0]); + if (index < 0) + peg_panicf(b, "argument index must be natural number, got %v", argv[0]); + return emit_1(b, RULE_ARGUMENT, index); +} + +static uint32_t spec_constant(Builder *b, int32_t argc, const Janet *argv) { janet_fixarity(argc, 1); - - /* Save old scratch state */ - int32_t old_count = s->scratch->count; - const uint8_t *old_subst_end = s->subst_end; - - /* Prepare for collecting in scratch */ - s->subst_end = text; - - int oldmode = s->mode; - if (oldmode != PEG_NOCAPTURE) s->mode = PEG_SUBSTITUTE; - int32_t result = match(s, argv[0], text); - s->mode = oldmode; - - if (result < 0) return -1; - - if (oldmode != PEG_NOCAPTURE) { - /* Push remaining text to scratch buffer */ - janet_buffer_push_bytes(s->scratch, s->subst_end, text - s->subst_end + result); - /* Pop last section of scratch buffer and push a string capture */ - janet_array_push(s->captures, - janet_stringv(s->scratch->data + old_count, s->scratch->count - old_count)); - } - - /* Reset scratch buffer and subst_end */ - s->scratch->count = old_count; - s->subst_end = old_subst_end; - - return result; + return emit_1(b, RULE_CONSTANT, emit_constant(b, argv[0])); } -/* Lookup for special forms */ +static uint32_t spec_replace(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 2); + uint32_t subrule = compile1(b, argv[0]); + uint32_t constant = emit_constant(b, argv[1]); + return emit_2(b, RULE_REPLACE, subrule, constant); +} + +/* For some and any, really just short-hand for (^ rule n) */ +static uint32_t spec_repeater(Builder *b, int32_t argc, const Janet *argv, int32_t min) { + peg_fixarity(b, argc, 1); + uint32_t subrule = compile1(b, argv[0]); + return emit_3(b, RULE_BETWEEN, min, UINT32_MAX, subrule); +} + +static uint32_t spec_some(Builder *b, int32_t argc, const Janet *argv) { + return spec_repeater(b, argc, argv, 1); +} +static uint32_t spec_any(Builder *b, int32_t argc, const Janet *argv) { + return spec_repeater(b, argc, argv, 0); +} + +static uint32_t spec_atleast(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 2); + int32_t n = peg_getnat(b, argv[0]); + uint32_t subrule = compile1(b, argv[1]); + return emit_3(b, RULE_BETWEEN, n, UINT32_MAX, subrule); +} + +static uint32_t spec_atmost(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 2); + int32_t n = peg_getnat(b, argv[0]); + uint32_t subrule = compile1(b, argv[1]); + return emit_3(b, RULE_BETWEEN, 0, n, subrule); +} + +/* Special matcher form */ +typedef uint32_t (*Special)(Builder *b, int32_t argc, const Janet *argv); typedef struct { const char *name; - Matcher matcher; -} MatcherPair; -static const MatcherPair specials[] = { - {"*", match_sequence}, - {"+", match_choice}, - {"-", match_minus}, - {"/", match_replace}, - {"<-", match_capture}, - {"<-arg", match_capture_arg}, - {"<-c", match_capture_constant}, - {"<-g", match_group}, - {"<-p", match_position}, - {"<-s", match_substitute}, - {">", match_lookahead}, - {">>", match_lookoffset}, - {"at-least", match_atleast}, - {"at-most", match_atmost}, - {"between", match_between}, - {"not", match_not}, - {"range", match_range}, - {"set", match_set} + Special special; +} SpecialPair; + +static const SpecialPair specials[] = { + {"!", spec_not}, + {"*", spec_sequence}, + {"+", spec_choice}, + {"-", spec_ifnot}, + {"/", spec_replace}, + {">", spec_look}, + {"^", spec_exponent}, + {"any", spec_any}, + {"argument", spec_argument}, + {"at-least", spec_atleast}, + {"at-most", spec_atmost}, + {"backref", spec_reference}, + {"between", spec_between}, + {"capture", spec_capture}, + {"choice", spec_choice}, + {"constant", spec_constant}, + {"group", spec_group}, + {"if-not", spec_ifnot}, + {"look", spec_look}, + {"not", spec_not}, + {"position", spec_position}, + {"range", spec_range}, + {"replace", spec_replace}, + {"sequence", spec_sequence}, + {"set", spec_set}, + {"some", spec_some}, + {"substitute", spec_substitute}, + {"|", spec_substitute}, }; -/* Check if the string matches the pattern at the given point. Returns a negative number - * if no match, else the number of characters matched against. */ -static int32_t match(State *s, Janet peg, const uint8_t *text) { - switch(janet_type(peg)) { +/* Compile a janet value into a rule and return the rule index. */ +static uint32_t compile1(Builder *b, Janet peg) { + + /* Check for alreay compiled rules */ + Janet check = janet_table_get(b->memoized, peg); + if (!janet_checktype(check, JANET_NIL)) { + uint32_t rule = janet_unwrap_number(check); + return rule; + } + + /* Keep track of the form being compiled for error purposes */ + Janet old_form = b->form; + b->form = peg; + + /* Check depth */ + if (b->depth-- == 0) { + peg_panic(b, "peg grammar recursed too deeply"); + } + + /* The final rule to return */ + uint32_t rule; + + switch (janet_type(peg)) { default: - janet_panicf("unexpected element in peg: %v", peg); - return -1; + peg_panicf(b, "unexpected peg source"); + return 0; case JANET_NUMBER: - /* Match n characters */ { if (!janet_checkint(peg)) - janet_panicf("numbers in peg must be integers, got %v", peg); + peg_panicf(b, "expected integer"); int32_t n = janet_unwrap_integer(peg); - if (n < 0) /* Invert pattern */ - return (text - n > s->text_end) ? 0 : -1; - return (text + n > s->text_end) ? -1 : n; + if (n < 0) { + rule = emit_1(b, RULE_NOTNCHAR, -n); + } else { + rule = emit_1(b, RULE_NCHAR, n); + } + break; } case JANET_STRING: - /* Match a sequence of bytes */ { const uint8_t *str = janet_unwrap_string(peg); int32_t len = janet_string_length(str); - if (text + len > s->text_end) return -1; - return memcmp(text, str, len) ? -1 : len; - } - case JANET_TUPLE: - /* Match a special command */ - { - const Janet *items; - int32_t len; - janet_indexed_view(peg, &items, &len); - janet_arity(len, 1, -1); - if (!janet_checktype(items[0], JANET_SYMBOL)) - janet_panicf("expected symbol for name of command"); - const uint8_t *sym = janet_unwrap_symbol(items[0]); - const MatcherPair *mp = janet_strbinsearch( - &specials, - sizeof(specials)/sizeof(MatcherPair), - sizeof(MatcherPair), - sym); - if (!mp) janet_panicf("unknown special form %p", peg); - if (s->depth-- == 0) - janet_panic("recursed too deeply"); - - /* Save old state in case of failure */ - int32_t old_capture_count = s->captures->count; - int32_t old_scratch_count = s->scratch->count; - const uint8_t *old_subst_end = s->subst_end; - int32_t result = mp->matcher(s, len - 1, items + 1, text); - - /* Reset old state on failure */ - if (result < 0) { - s->captures->count = old_capture_count; - s->scratch->count = old_scratch_count; - s->subst_end = old_subst_end; - } - s->depth++; - return result; + rule = emit_bytes(b, RULE_LITERAL, len, str); + break; } case JANET_KEYWORD: - /* Look up a rule */ - return match(s, janet_table_get(s->grammar, peg), text); + { + Janet check = janet_table_get(b->grammar, peg); + if (janet_checktype(check, JANET_NIL)) + peg_panicf(b, "unknown rule"); + rule = compile1(b, check); + /* We don't want to memoize references, as they will become invalid + * if we go out of scope */ + b->depth++; + b->form = old_form; + return rule; + } case JANET_STRUCT: - /* Specify a grammar */ { JanetTable *grammar = janet_struct_to_table(janet_unwrap_struct(peg)); - grammar->proto = s->grammar; - - /* Run main rule with grammar set */ - s->grammar = grammar; - int32_t result = match(s, janet_table_get(grammar, janet_ckeywordv("main")), text); - s->grammar = grammar->proto; - - return result; + grammar->proto = b->grammar; + b->grammar = grammar; + Janet main_rule = janet_table_get(grammar, janet_ckeywordv("main")); + if (janet_checktype(main_rule, JANET_NIL)) + peg_panicf(b, "grammar requires :main rule"); + rule = compile1(b, main_rule); + b->grammar = grammar->proto; + break; + } + case JANET_TUPLE: + { + const Janet *tup = janet_unwrap_tuple(peg); + int32_t len = janet_tuple_length(tup); + if (len == 0) peg_panic(b, "tuple in grammar must have non-zero length"); + if (!janet_checktype(tup[0], JANET_SYMBOL)) + peg_panicf(b, "expected grammar command, found %v", tup[0]); + const uint8_t *sym = janet_unwrap_symbol(tup[0]); + const SpecialPair *sp = janet_strbinsearch( + &specials, + sizeof(specials)/sizeof(SpecialPair), + sizeof(SpecialPair), + sym); + if (!sp) + peg_panicf(b, "unknown special %S", sym); + rule = sp->special(b, len - 1, tup + 1); + break; } } + + /* Add rule to memoized table */ + janet_table_put(b->memoized, peg, janet_wrap_number(rule)); + + /* Increase depth again */ + b->depth++; + b->form = old_form; + + return rule; } -/* C Functions */ +/* + * Post-Compilation + */ + +typedef struct { + uint32_t *bytecode; + Janet *constants; + uint32_t main_rule; + uint32_t num_constants; +} Peg; + +static int peg_mark(void *p, size_t size) { + (void) size; + Peg *peg = (Peg *)p; + for (uint32_t i = 0; i < peg->num_constants; i++) + janet_mark(peg->constants[i]); + return 0; +} + +static JanetAbstractType peg_type = { + "core/peg", + NULL, + peg_mark +}; + +/* Convert Builder to Peg (Janet Abstract Value) */ +static Peg *make_peg(Builder *b, uint32_t main_rule) { + size_t bytecode_size = janet_v_count(b->bytecode) * sizeof(uint32_t); + size_t constants_size = janet_v_count(b->constants) * sizeof(Janet); + size_t total_size = bytecode_size + constants_size + sizeof(Peg); + char *mem = janet_abstract(&peg_type, total_size); + Peg *peg = (Peg *)mem; + peg->bytecode = (uint32_t *)(mem + sizeof(Peg)); + peg->constants = (Janet *)(mem + sizeof(Peg) + bytecode_size); + peg->num_constants = janet_v_count(b->constants); + peg->main_rule = main_rule; + memcpy(peg->bytecode, b->bytecode, bytecode_size); + memcpy(peg->constants, b->constants, constants_size); + return peg; +} + +/* Compiler entry point */ +static Peg *compile_peg(Janet x) { + Builder builder; + builder.grammar = janet_table(0); + builder.memoized = janet_table(0); + builder.constants = NULL; + builder.bytecode = NULL; + builder.form = x; + builder.depth = JANET_RECURSION_GUARD; + uint32_t main_rule = compile1(&builder, x); + Peg *peg = make_peg(&builder, main_rule); + builder_cleanup(&builder); + return peg; +} + +/* + * C Functions + */ + +static Janet cfun_compile(int32_t argc, Janet *argv) { + janet_fixarity(argc, 1); + Peg *peg = compile_peg(argv[0]); + return janet_wrap_abstract(peg); +} static Janet cfun_match(int32_t argc, Janet *argv) { janet_arity(argc, 2, -1); + Peg *peg; + if (janet_checktype(argv[0], JANET_ABSTRACT) && + janet_abstract_type(janet_unwrap_abstract(argv[0])) == &peg_type) { + peg = janet_unwrap_abstract(argv[0]); + } else { + peg = compile_peg(argv[0]); + } JanetByteView bytes = janet_getbytes(argv, 1); int32_t start; - State s; + PegState s; if (argc > 2) { start = janet_gethalfrange(argv, 2, bytes.len, "offset"); s.extrac = argc - 3; @@ -504,18 +914,25 @@ static Janet cfun_match(int32_t argc, Janet *argv) { s.extrac = 0; s.extrav = NULL; } - s.mode = PEG_NORMAL; + s.mode = PEG_MODE_NORMAL; s.text_start = bytes.bytes; s.text_end = bytes.bytes + bytes.len; s.depth = JANET_RECURSION_GUARD; - s.grammar = NULL; s.captures = janet_array(0); - s.scratch = janet_buffer(0); - int32_t result = match(&s, argv[0], bytes.bytes + start); - return result >= 0 ? janet_wrap_array(s.captures) : janet_wrap_nil(); + s.scratch = janet_buffer(10); + + s.constants = peg->constants; + s.bytecode = peg->bytecode; + const uint8_t *result = peg_rule(&s, s.bytecode + peg->main_rule, bytes.bytes + start); + return result ? janet_wrap_array(s.captures) : janet_wrap_nil(); } static const JanetReg cfuns[] = { + {"peg/compile", cfun_compile, + "(peg/compile peg)\n\n" + "Compiles a peg source data structure into a . This will speed up matching " + "if the same peg will be used multiple times." + }, {"peg/match", cfun_match, "(peg/match peg text [,start=0])\n\n" "Match a Parsing Expression Grammar to a byte string and return an array of captured values. " diff --git a/src/core/vector.h b/src/core/vector.h index dd2db3a7..169c3a48 100644 --- a/src/core/vector.h +++ b/src/core/vector.h @@ -38,7 +38,6 @@ #define janet_v_push(v, x) (janet_v__maybegrow(v, 1), (v)[janet_v__cnt(v)++] = (x)) #define janet_v_pop(v) (janet_v_count(v) ? janet_v__cnt(v)-- : 0) #define janet_v_count(v) (((v) != NULL) ? janet_v__cnt(v) : 0) -#define janet_v_add(v, n) (janet_v__maybegrow(v, n), janet_v_cnt(v) += (n), &(v)[janet_v__cnt(v) - (n)]) #define janet_v_last(v) ((v)[janet_v__cnt(v) - 1]) #define janet_v_empty(v) (((v) != NULL) ? (janet_v__cnt(v) = 0) : 0) #define janet_v_copy(v) (janet_v_copymem((v), sizeof(*(v)))) diff --git a/test/suite3.janet b/test/suite3.janet index 64c436f0..2c8168b3 100644 --- a/test/suite3.janet +++ b/test/suite3.janet @@ -179,9 +179,15 @@ # Simple pattern -(check-match '(* (at-least 1 (range "az" "AZ")) (not 1)) "hello" true) -(check-match '(* (at-least 1 (range "az" "AZ")) (not 1)) "hello world" false) -(check-match '(* (at-least 1 (range "az" "AZ")) (not 1)) "1he11o" false) +(check-match '(* (some (range "az" "AZ")) -1) "hello" true) +(check-match '(* (some (range "az" "AZ")) -1) "hello world" false) +(check-match '(* (some (range "az" "AZ")) -1) "1he11o" false) + +# Pre compile + +(def pegleg (peg/compile '{:item "abc" :main (* :item "," :item -1)})) + +(peg/match pegleg "abc,abc") # IP address @@ -189,9 +195,14 @@ '{:d (range "09") :0-4 (range "04") :0-5 (range "05") - :block (+ (* "25" :0-5) (* "2" :0-4 :d) (* "1" :d :d) (between 1 2 :d)) - :main (* :block (between 3 3 (* "." :block)) -1)}) + :byte (+ + (* "25" :0-5) + (* "2" :0-4 :d) + (* "1" :d :d) + (between 1 2 :d)) + :main (* :byte "." :byte "." :byte "." :byte)}) +(check-match ip-address "10.240.250.250" true) (check-match ip-address "0.0.0.0" true) (check-match ip-address "1.2.3.4" true) (check-match ip-address "256.2.3.4" false) @@ -199,7 +210,10 @@ # Substitution test with peg -(def grammar '(<-s (at-least 0 (+ (/ "dog" "purple panda") 1)))) +(file/flush stderr) +(file/flush stdout) + +(def grammar '(| (any (+ (/ "dog" "purple panda") 1)))) (defn try-grammar [text] (assert (= (string/replace-all "dog" "purple panda" text) (0 (peg/match grammar text))) text)) @@ -212,11 +226,12 @@ (try-grammar "i have a dog called doug the dogggg") # Peg CSV test + (def csv '{:field (+ - (* `"` (<-s (at-least 0 (+ (- 1 `"`) (/ `""` `"`)))) `"`) - (<- (at-least 0 (- 1 (set ",\n"))))) - :main (* :field (at-least 0 (* "," :field)) (+ "\n" -1))}) + (* `"` (| (any (+ (- 1 `"`) (/ `""` `"`)))) `"`) + (| (any (- 1 (set ",\n"))))) + :main (* :field (any (* "," :field)) (+ "\n" -1))}) (defn check-csv [str res] @@ -228,18 +243,22 @@ # Nested Captures -(def grmr '(<- (* (<- "a") (<- 1) (<- "c")))) +(def grmr '(capture (* (capture "a") (capture 1) (capture "c")))) (check-deep grmr "abc" @["a" "b" "c" "abc"]) (check-deep grmr "acc" @["a" "c" "c" "acc"]) # Functions in grammar -(def grmr-triple ~(<-s (at-least 0 (/ 1 ,(fn [x] (string x x x)))))) +(def grmr-triple ~(| (any (/ 1 ,(fn [x] (string x x x)))))) (check-deep grmr-triple "abc" @["aaabbbccc"]) (check-deep grmr-triple "" @[""]) (check-deep grmr-triple " " @[" "]) -(def counter ~(/ (<-g (at-least 0 (<- 1))) ,length)) +(def counter ~(/ (group (^ (capture 1) 0)) ,length)) (check-deep counter "abcdefg" @[7]) +# Capture Backtracking + +(check-deep '(+ (* (capture "c") "d") "ce") "ce" @[]) + (end-suite)