From 952906279c8ad7370c11a363955fa1a852adea63 Mon Sep 17 00:00:00 2001 From: Ian Henry Date: Wed, 4 Dec 2024 21:17:10 -0800 Subject: [PATCH] add (til) PEG special (til sep subpattern) is a specialized (sub) that behaves like (sub (to sep) subpattern), but advances over the input like (thru sep). --- src/core/peg.c | 46 ++++++++++++++++++++++++++++++++++++++++++++ src/include/janet.h | 1 + test/suite-peg.janet | 35 +++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) diff --git a/src/core/peg.c b/src/core/peg.c index 24d1d320..2f2ebe0f 100644 --- a/src/core/peg.c +++ b/src/core/peg.c @@ -544,6 +544,42 @@ tail: return window_end; } + case RULE_TIL: { + const uint32_t *rule_terminus = s->bytecode + rule[1]; + const uint32_t *rule_subpattern = s->bytecode + rule[2]; + + const uint8_t *terminus_start = text; + const uint8_t *terminus_end = NULL; + down1(s); + while (terminus_start <= s->text_end) { + CapState cs2 = cap_save(s); + terminus_end = peg_rule(s, rule_terminus, terminus_start); + cap_load(s, cs2); + if (terminus_end) { + break; + } + terminus_start++; + } + up1(s); + + if (!terminus_end) { + return NULL; + } + + const uint8_t *saved_end = s->text_end; + s->text_end = terminus_start; + down1(s); + const uint8_t *matched = peg_rule(s, rule_subpattern, text); + up1(s); + s->text_end = saved_end; + + if (!matched) { + return NULL; + } + + return terminus_end; + } + case RULE_SPLIT: { const uint8_t *saved_end = s->text_end; const uint32_t *rule_separator = s->bytecode + rule[1]; @@ -1227,6 +1263,14 @@ static void spec_sub(Builder *b, int32_t argc, const Janet *argv) { emit_2(r, RULE_SUB, subrule1, subrule2); } +static void spec_til(Builder *b, int32_t argc, const Janet *argv) { + peg_fixarity(b, argc, 2); + Reserve r = reserve(b, 3); + uint32_t subrule1 = peg_compile1(b, argv[0]); + uint32_t subrule2 = peg_compile1(b, argv[1]); + emit_2(r, RULE_TIL, subrule1, subrule2); +} + static void spec_split(Builder *b, int32_t argc, const Janet *argv) { peg_fixarity(b, argc, 2); Reserve r = reserve(b, 3); @@ -1323,6 +1367,7 @@ static const SpecialPair peg_specials[] = { {"split", spec_split}, {"sub", spec_sub}, {"thru", spec_thru}, + {"til", spec_til}, {"to", spec_to}, {"uint", spec_uint_le}, {"uint-be", spec_uint_be}, @@ -1657,6 +1702,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) { i += 4; break; case RULE_SUB: + case RULE_TIL: case RULE_SPLIT: /* [rule, rule] */ if (rule[1] >= blen) goto bad; diff --git a/src/include/janet.h b/src/include/janet.h index 8ec75e4f..974ee837 100644 --- a/src/include/janet.h +++ b/src/include/janet.h @@ -2180,6 +2180,7 @@ typedef enum { RULE_UNREF, /* [rule, tag] */ RULE_CAPTURE_NUM, /* [rule, tag] */ RULE_SUB, /* [rule, rule] */ + RULE_TIL, /* [rule, rule] */ RULE_SPLIT, /* [rule, rule] */ RULE_NTH, /* [nth, rule, tag] */ RULE_ONLY_TAGS, /* [rule] */ diff --git a/test/suite-peg.janet b/test/suite-peg.janet index ac426cfc..3f97125d 100644 --- a/test/suite-peg.janet +++ b/test/suite-peg.janet @@ -713,6 +713,41 @@ "abcdef" @[]) +(test "til: basic matching" + ~(til "d" "abc") + "abcdef" + @[]) + +(test "til: second pattern can't see past the first occurrence of first pattern" + ~(til "d" (* "abc" -1)) + "abcdef" + @[]) + +(test "til: fails if first pattern fails" + ~(til "x" "abc") + "abcdef" + nil) + +(test "til: fails if second pattern fails" + ~(til "abc" "x") + "abcdef" + nil) + +(test "til: discards captures from initial pattern" + ~(til '"d" '"abc") + "abcdef" + @["abc"]) + +(test "til: positions inside second match are still relative to the entire input" + ~(* "one\ntw" (til 0 (* ($) (line) (column)))) + "one\ntwo\nthree\n" + @[6 2 3]) + +(test "til: advances to the end of the first pattern's first occurrence" + ~(* (til "d" "ab") "e") + "abcdef" + @[]) + (test "split: basic functionality" ~(split "," '1) "a,b,c"