diff --git a/CHANGELOG.md b/CHANGELOG.md index f65f9ec1..35784cd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ All notable changes to this project will be documented in this file. ## Unreleased - ??? +- Add `lenprefix` combinator to PEGs. - Add `%M`, `%m`, `%N`, and `%n` formatters to formatting functions. These are the same as `%Q`, `%q`, `%P`, and `%p`, but will not truncate long values. - Add `fiber/root`. diff --git a/src/core/peg.c b/src/core/peg.c index d08b1770..d732d157 100644 --- a/src/core/peg.c +++ b/src/core/peg.c @@ -413,6 +413,38 @@ tail: return NULL; } + case RULE_LENPREFIX: { + int oldmode = s->mode; + s->mode = PEG_MODE_NORMAL; + const uint8_t *next_text; + CapState cs = cap_save(s); + down1(s); + next_text = peg_rule(s, s->bytecode + rule[1], text); + up1(s); + if (NULL == next_text) return NULL; + s->mode = oldmode; + int32_t num_sub_captures = s->captures->count - cs.cap; + Janet lencap; + if (num_sub_captures <= 0 || + (lencap = s->captures->data[cs.cap], !janet_checkint(lencap))) { + cap_load(s, cs); + return NULL; + } + int32_t nrep = janet_unwrap_integer(lencap); + /* drop captures from len pattern */ + cap_load(s, cs); + for (int32_t i = 0; i < nrep; i++) { + down1(s); + next_text = peg_rule(s, s->bytecode + rule[2], next_text); + up1(s); + if (NULL == next_text) { + cap_load(s, cs); + return NULL; + } + } + return next_text; + } + } } @@ -657,6 +689,9 @@ static void spec_if(Builder *b, int32_t argc, const Janet *argv) { static void spec_ifnot(Builder *b, int32_t argc, const Janet *argv) { spec_branch(b, argc, argv, RULE_IFNOT); } +static void spec_lenprefix(Builder *b, int32_t argc, const Janet *argv) { + spec_branch(b, argc, argv, RULE_LENPREFIX); +} static void spec_between(Builder *b, int32_t argc, const Janet *argv) { peg_fixarity(b, argc, 3); @@ -847,6 +882,7 @@ static const SpecialPair peg_specials[] = { {"group", spec_group}, {"if", spec_if}, {"if-not", spec_ifnot}, + {"lenprefix", spec_lenprefix}, {"look", spec_look}, {"not", spec_not}, {"opt", spec_opt}, @@ -1100,6 +1136,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) { break; case RULE_IF: case RULE_IFNOT: + case RULE_LENPREFIX: /* [rule_a, rule_b (b if not a)] */ if (rule[1] >= blen) goto bad; if (rule[2] >= blen) goto bad; diff --git a/src/include/janet.h b/src/include/janet.h index 8a432f3b..c803aac9 100644 --- a/src/include/janet.h +++ b/src/include/janet.h @@ -1576,6 +1576,7 @@ typedef enum { RULE_ERROR, /* [rule] */ RULE_DROP, /* [rule] */ RULE_BACKMATCH, /* [tag] */ + RULE_LENPREFIX, /* [rule_a, rule_b (repeat rule_b rule_a times)] */ } JanetPegOpcode; typedef struct { diff --git a/test/suite8.janet b/test/suite8.janet index a6af0ce3..d2556709 100644 --- a/test/suite8.janet +++ b/test/suite8.janet @@ -252,4 +252,33 @@ neldb\0\0\0\xD8\x05printG\x01\0\xDE\xDE\xDE'\x03\0marshal_tes/\x02 (assert (< [1 2 3] [1 2 3 -1]) "tuple comparison 5") (assert (> [1 2 3] [1 2]) "tuple comparison 6") +# Lenprefix rule + +(def peg (peg/compile ~(* (lenprefix (/ (* '(any (if-not ":" 1)) ":") ,scan-number) 1) -1))) + +(assert (peg/match peg "5:abcde") "lenprefix 1") +(assert (not (peg/match peg "5:abcdef")) "lenprefix 2") +(assert (not (peg/match peg "5:abcd")) "lenprefix 3") + +# Packet capture + +(def peg2 + (peg/compile + ~{# capture packet length in tag :header-len + :packet-header (* (/ ':d+ ,scan-number :header-len) ":") + + # capture n bytes from a backref :header-len + :packet-body '(lenprefix (-> :header-len) 1) + + # header, followed by body, and drop the :header-len capture + :packet (/ (* :packet-header :packet-body) ,|$1) + + # any exact seqence of packets (no extra characters) + :main (* (any :packet) -1)})) + +(assert (deep= @["a" "bb" "ccc"] (peg/match peg2 "1:a2:bb3:ccc")) "lenprefix 4") +(assert (deep= @["a" "bb" "cccccc"] (peg/match peg2 "1:a2:bb6:cccccc")) "lenprefix 5") +(assert (= nil (peg/match peg2 "1:a2:bb:5:cccccc")) "lenprefix 6") +(assert (= nil (peg/match peg2 "1:a2:bb:7:cccccc")) "lenprefix 7") + (end-suite)