mirror of
https://github.com/janet-lang/janet
synced 2025-01-12 00:20:26 +00:00
add a new (sub) PEG special
(sub) will first match one pattern, then match another pattern against the text that the first pattern advanced over.
This commit is contained in:
parent
772f4c26e8
commit
ea75086300
@ -39,6 +39,10 @@
|
||||
typedef struct {
|
||||
const uint8_t *text_start;
|
||||
const uint8_t *text_end;
|
||||
/* text_end will be restricted in a (sub) rule, but
|
||||
outer_text_end will always contain the real end of
|
||||
input, which we need to generate a line mapping */
|
||||
const uint8_t *outer_text_end;
|
||||
const uint32_t *bytecode;
|
||||
const Janet *constants;
|
||||
JanetArray *captures;
|
||||
@ -114,12 +118,12 @@ static LineCol get_linecol_from_position(PegState *s, int32_t position) {
|
||||
/* Generate if not made yet */
|
||||
if (s->linemaplen < 0) {
|
||||
int32_t newline_count = 0;
|
||||
for (const uint8_t *c = s->text_start; c < s->text_end; c++) {
|
||||
for (const uint8_t *c = s->text_start; c < s->outer_text_end; c++) {
|
||||
if (*c == '\n') newline_count++;
|
||||
}
|
||||
int32_t *mem = janet_smalloc(sizeof(int32_t) * newline_count);
|
||||
size_t index = 0;
|
||||
for (const uint8_t *c = s->text_start; c < s->text_end; c++) {
|
||||
for (const uint8_t *c = s->text_start; c < s->outer_text_end; c++) {
|
||||
if (*c == '\n') mem[index++] = (int32_t)(c - s->text_start);
|
||||
}
|
||||
s->linemaplen = newline_count;
|
||||
@ -179,7 +183,7 @@ static const uint8_t *peg_rule(
|
||||
const uint32_t *rule,
|
||||
const uint8_t *text) {
|
||||
tail:
|
||||
switch (*rule & 0x1F) {
|
||||
switch (*rule) {
|
||||
default:
|
||||
janet_panic("unexpected opcode");
|
||||
return NULL;
|
||||
@ -482,6 +486,30 @@ tail:
|
||||
return result;
|
||||
}
|
||||
|
||||
case RULE_SUB: {
|
||||
const uint8_t *text_start = text;
|
||||
const uint32_t *rule_window = s->bytecode + rule[1];
|
||||
const uint32_t *rule_subpattern = s->bytecode + rule[2];
|
||||
down1(s);
|
||||
const uint8_t *window_end = peg_rule(s, rule_window, text);
|
||||
up1(s);
|
||||
if (!window_end) {
|
||||
return NULL;
|
||||
}
|
||||
const uint8_t *saved_end = s->text_end;
|
||||
s->text_end = window_end;
|
||||
down1(s);
|
||||
const uint8_t *next_text = peg_rule(s, rule_subpattern, text_start);
|
||||
up1(s);
|
||||
s->text_end = saved_end;
|
||||
|
||||
if (!next_text) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return window_end;
|
||||
}
|
||||
|
||||
case RULE_REPLACE:
|
||||
case RULE_MATCHTIME: {
|
||||
uint32_t tag = rule[3];
|
||||
@ -1107,6 +1135,14 @@ static void spec_matchtime(Builder *b, int32_t argc, const Janet *argv) {
|
||||
emit_3(r, RULE_MATCHTIME, subrule, cindex, tag);
|
||||
}
|
||||
|
||||
static void spec_sub(Builder *b, int32_t argc, const Janet *argv) {
|
||||
peg_fixarity(b, argc, 2);
|
||||
Reserve r = reserve(b, 3);
|
||||
uint32_t subrule1 = peg_compile1(b, argv[0]);
|
||||
uint32_t subrule2 = peg_compile1(b, argv[1]);
|
||||
emit_2(r, RULE_SUB, subrule1, subrule2);
|
||||
}
|
||||
|
||||
#ifdef JANET_INT_TYPES
|
||||
#define JANET_MAX_READINT_WIDTH 8
|
||||
#else
|
||||
@ -1190,6 +1226,7 @@ static const SpecialPair peg_specials[] = {
|
||||
{"sequence", spec_sequence},
|
||||
{"set", spec_set},
|
||||
{"some", spec_some},
|
||||
{"sub", spec_sub},
|
||||
{"thru", spec_thru},
|
||||
{"to", spec_to},
|
||||
{"uint", spec_uint_le},
|
||||
@ -1431,7 +1468,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
|
||||
uint32_t instr = bytecode[i];
|
||||
uint32_t *rule = bytecode + i;
|
||||
op_flags[i] |= 0x02;
|
||||
switch (instr & 0x1F) {
|
||||
switch (instr) {
|
||||
case RULE_LITERAL:
|
||||
i += 2 + ((rule[1] + 3) >> 2);
|
||||
break;
|
||||
@ -1524,6 +1561,14 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
|
||||
op_flags[rule[1]] |= 0x01;
|
||||
i += 4;
|
||||
break;
|
||||
case RULE_SUB:
|
||||
/* [rule, rule] */
|
||||
if (rule[1] >= blen) goto bad;
|
||||
if (rule[2] >= blen) goto bad;
|
||||
op_flags[rule[1]] |= 0x01;
|
||||
op_flags[rule[2]] |= 0x01;
|
||||
i += 3;
|
||||
break;
|
||||
case RULE_ERROR:
|
||||
case RULE_DROP:
|
||||
case RULE_NOT:
|
||||
@ -1677,6 +1722,7 @@ static PegCall peg_cfun_init(int32_t argc, Janet *argv, int get_replace) {
|
||||
ret.s.mode = PEG_MODE_NORMAL;
|
||||
ret.s.text_start = ret.bytes.bytes;
|
||||
ret.s.text_end = ret.bytes.bytes + ret.bytes.len;
|
||||
ret.s.outer_text_end = ret.s.text_end;
|
||||
ret.s.depth = JANET_RECURSION_GUARD;
|
||||
ret.s.captures = janet_array(0);
|
||||
ret.s.tagged_captures = janet_array(0);
|
||||
|
@ -2140,7 +2140,8 @@ typedef enum {
|
||||
RULE_LINE, /* [tag] */
|
||||
RULE_COLUMN, /* [tag] */
|
||||
RULE_UNREF, /* [rule, tag] */
|
||||
RULE_CAPTURE_NUM /* [rule, tag] */
|
||||
RULE_CAPTURE_NUM, /* [rule, tag] */
|
||||
RULE_SUB /* [rule, rule] */
|
||||
} JanetPegOpcod;
|
||||
|
||||
typedef struct {
|
||||
|
@ -263,6 +263,8 @@
|
||||
(marshpeg '(if-not "abcdf" 123))
|
||||
(marshpeg ~(cmt "abcdf" ,identity))
|
||||
(marshpeg '(group "abc"))
|
||||
(marshpeg '(sub "abcdf" "abc"))
|
||||
(marshpeg '(* (sub 1 1)))
|
||||
|
||||
# Peg swallowing errors
|
||||
# 159651117
|
||||
@ -660,5 +662,53 @@
|
||||
(peg/match '(if (not (* (constant 7) "a")) "hello") "hello")
|
||||
@[]) "peg if not")
|
||||
|
||||
(defn test [name peg input expected]
|
||||
(assert (deep= (peg/match peg input) expected) name))
|
||||
|
||||
(test "sub: matches the same input twice"
|
||||
~(sub "abcd" "abc")
|
||||
"abcdef"
|
||||
@[])
|
||||
|
||||
(test "sub: second pattern cannot match more than the first pattern"
|
||||
~(sub "abcd" "abcde")
|
||||
"abcdef"
|
||||
nil)
|
||||
|
||||
(test "sub: fails if first pattern fails"
|
||||
~(sub "x" "abc")
|
||||
"abcdef"
|
||||
nil)
|
||||
|
||||
(test "sub: fails if second pattern fails"
|
||||
~(sub "abc" "x")
|
||||
"abcdef"
|
||||
nil)
|
||||
|
||||
(test "sub: keeps captures from both patterns"
|
||||
~(sub '"abcd" '"abc")
|
||||
"abcdef"
|
||||
@["abcd" "abc"])
|
||||
|
||||
(test "sub: second pattern can reference captures from first"
|
||||
~(* (constant 5 :tag) (sub (capture "abc" :tag) (backref :tag)))
|
||||
"abcdef"
|
||||
@[5 "abc" "abc"])
|
||||
|
||||
(test "sub: second pattern can't see past what the first pattern matches"
|
||||
~(sub "abc" (* "abc" -1))
|
||||
"abcdef"
|
||||
@[])
|
||||
|
||||
(test "sub: positions inside second match are still relative to the entire input"
|
||||
~(* "one\ntw" (sub "o" (* ($) (line) (column))))
|
||||
"one\ntwo\nthree\n"
|
||||
@[6 2 3])
|
||||
|
||||
(test "sub: advances to the end of the first pattern's match"
|
||||
~(* (sub "abc" "ab") "d")
|
||||
"abcdef"
|
||||
@[])
|
||||
|
||||
(end-suite)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user