1
0
mirror of https://github.com/janet-lang/janet synced 2024-12-31 19:00:26 +00:00

add (til) PEG special

(til sep subpattern) is a specialized (sub) that behaves like
(sub (to sep) subpattern), but advances over the input like (thru sep).
This commit is contained in:
Ian Henry 2024-12-04 21:17:10 -08:00
parent 5d1bd8a932
commit 952906279c
No known key found for this signature in database
3 changed files with 82 additions and 0 deletions

View File

@ -544,6 +544,42 @@ tail:
return window_end;
}
case RULE_TIL: {
const uint32_t *rule_terminus = s->bytecode + rule[1];
const uint32_t *rule_subpattern = s->bytecode + rule[2];
const uint8_t *terminus_start = text;
const uint8_t *terminus_end = NULL;
down1(s);
while (terminus_start <= s->text_end) {
CapState cs2 = cap_save(s);
terminus_end = peg_rule(s, rule_terminus, terminus_start);
cap_load(s, cs2);
if (terminus_end) {
break;
}
terminus_start++;
}
up1(s);
if (!terminus_end) {
return NULL;
}
const uint8_t *saved_end = s->text_end;
s->text_end = terminus_start;
down1(s);
const uint8_t *matched = peg_rule(s, rule_subpattern, text);
up1(s);
s->text_end = saved_end;
if (!matched) {
return NULL;
}
return terminus_end;
}
case RULE_SPLIT: {
const uint8_t *saved_end = s->text_end;
const uint32_t *rule_separator = s->bytecode + rule[1];
@ -1227,6 +1263,14 @@ static void spec_sub(Builder *b, int32_t argc, const Janet *argv) {
emit_2(r, RULE_SUB, subrule1, subrule2);
}
static void spec_til(Builder *b, int32_t argc, const Janet *argv) {
peg_fixarity(b, argc, 2);
Reserve r = reserve(b, 3);
uint32_t subrule1 = peg_compile1(b, argv[0]);
uint32_t subrule2 = peg_compile1(b, argv[1]);
emit_2(r, RULE_TIL, subrule1, subrule2);
}
static void spec_split(Builder *b, int32_t argc, const Janet *argv) {
peg_fixarity(b, argc, 2);
Reserve r = reserve(b, 3);
@ -1323,6 +1367,7 @@ static const SpecialPair peg_specials[] = {
{"split", spec_split},
{"sub", spec_sub},
{"thru", spec_thru},
{"til", spec_til},
{"to", spec_to},
{"uint", spec_uint_le},
{"uint-be", spec_uint_be},
@ -1657,6 +1702,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
i += 4;
break;
case RULE_SUB:
case RULE_TIL:
case RULE_SPLIT:
/* [rule, rule] */
if (rule[1] >= blen) goto bad;

View File

@ -2180,6 +2180,7 @@ typedef enum {
RULE_UNREF, /* [rule, tag] */
RULE_CAPTURE_NUM, /* [rule, tag] */
RULE_SUB, /* [rule, rule] */
RULE_TIL, /* [rule, rule] */
RULE_SPLIT, /* [rule, rule] */
RULE_NTH, /* [nth, rule, tag] */
RULE_ONLY_TAGS, /* [rule] */

View File

@ -713,6 +713,41 @@
"abcdef"
@[])
(test "til: basic matching"
~(til "d" "abc")
"abcdef"
@[])
(test "til: second pattern can't see past the first occurrence of first pattern"
~(til "d" (* "abc" -1))
"abcdef"
@[])
(test "til: fails if first pattern fails"
~(til "x" "abc")
"abcdef"
nil)
(test "til: fails if second pattern fails"
~(til "abc" "x")
"abcdef"
nil)
(test "til: discards captures from initial pattern"
~(til '"d" '"abc")
"abcdef"
@["abc"])
(test "til: positions inside second match are still relative to the entire input"
~(* "one\ntw" (til 0 (* ($) (line) (column))))
"one\ntwo\nthree\n"
@[6 2 3])
(test "til: advances to the end of the first pattern's first occurrence"
~(* (til "d" "ab") "e")
"abcdef"
@[])
(test "split: basic functionality"
~(split "," '1)
"a,b,c"