Add lenprefix combinator to pegs.

This lets peg match n repeitions of a pattern, where
n is supplied from other parsed input and is not a constant.
This commit is contained in:
Calvin Rose 2020-05-02 10:37:39 -05:00
parent 761ea65d81
commit 8b5bcaee3c
4 changed files with 68 additions and 0 deletions

View File

@ -2,6 +2,7 @@
All notable changes to this project will be documented in this file.
## Unreleased - ???
- Add `lenprefix` combinator to PEGs.
- Add `%M`, `%m`, `%N`, and `%n` formatters to formatting functions. These are the
same as `%Q`, `%q`, `%P`, and `%p`, but will not truncate long values.
- Add `fiber/root`.

View File

@ -413,6 +413,38 @@ tail:
return NULL;
}
case RULE_LENPREFIX: {
int oldmode = s->mode;
s->mode = PEG_MODE_NORMAL;
const uint8_t *next_text;
CapState cs = cap_save(s);
down1(s);
next_text = peg_rule(s, s->bytecode + rule[1], text);
up1(s);
if (NULL == next_text) return NULL;
s->mode = oldmode;
int32_t num_sub_captures = s->captures->count - cs.cap;
Janet lencap;
if (num_sub_captures <= 0 ||
(lencap = s->captures->data[cs.cap], !janet_checkint(lencap))) {
cap_load(s, cs);
return NULL;
}
int32_t nrep = janet_unwrap_integer(lencap);
/* drop captures from len pattern */
cap_load(s, cs);
for (int32_t i = 0; i < nrep; i++) {
down1(s);
next_text = peg_rule(s, s->bytecode + rule[2], next_text);
up1(s);
if (NULL == next_text) {
cap_load(s, cs);
return NULL;
}
}
return next_text;
}
}
}
@ -657,6 +689,9 @@ static void spec_if(Builder *b, int32_t argc, const Janet *argv) {
static void spec_ifnot(Builder *b, int32_t argc, const Janet *argv) {
spec_branch(b, argc, argv, RULE_IFNOT);
}
static void spec_lenprefix(Builder *b, int32_t argc, const Janet *argv) {
spec_branch(b, argc, argv, RULE_LENPREFIX);
}
static void spec_between(Builder *b, int32_t argc, const Janet *argv) {
peg_fixarity(b, argc, 3);
@ -847,6 +882,7 @@ static const SpecialPair peg_specials[] = {
{"group", spec_group},
{"if", spec_if},
{"if-not", spec_ifnot},
{"lenprefix", spec_lenprefix},
{"look", spec_look},
{"not", spec_not},
{"opt", spec_opt},
@ -1100,6 +1136,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
break;
case RULE_IF:
case RULE_IFNOT:
case RULE_LENPREFIX:
/* [rule_a, rule_b (b if not a)] */
if (rule[1] >= blen) goto bad;
if (rule[2] >= blen) goto bad;

View File

@ -1576,6 +1576,7 @@ typedef enum {
RULE_ERROR, /* [rule] */
RULE_DROP, /* [rule] */
RULE_BACKMATCH, /* [tag] */
RULE_LENPREFIX, /* [rule_a, rule_b (repeat rule_b rule_a times)] */
} JanetPegOpcode;
typedef struct {

View File

@ -252,4 +252,33 @@ neldb\0\0\0\xD8\x05printG\x01\0\xDE\xDE\xDE'\x03\0marshal_tes/\x02
(assert (< [1 2 3] [1 2 3 -1]) "tuple comparison 5")
(assert (> [1 2 3] [1 2]) "tuple comparison 6")
# Lenprefix rule
(def peg (peg/compile ~(* (lenprefix (/ (* '(any (if-not ":" 1)) ":") ,scan-number) 1) -1)))
(assert (peg/match peg "5:abcde") "lenprefix 1")
(assert (not (peg/match peg "5:abcdef")) "lenprefix 2")
(assert (not (peg/match peg "5:abcd")) "lenprefix 3")
# Packet capture
(def peg2
(peg/compile
~{# capture packet length in tag :header-len
:packet-header (* (/ ':d+ ,scan-number :header-len) ":")
# capture n bytes from a backref :header-len
:packet-body '(lenprefix (-> :header-len) 1)
# header, followed by body, and drop the :header-len capture
:packet (/ (* :packet-header :packet-body) ,|$1)
# any exact seqence of packets (no extra characters)
:main (* (any :packet) -1)}))
(assert (deep= @["a" "bb" "ccc"] (peg/match peg2 "1:a2:bb3:ccc")) "lenprefix 4")
(assert (deep= @["a" "bb" "cccccc"] (peg/match peg2 "1:a2:bb6:cccccc")) "lenprefix 5")
(assert (= nil (peg/match peg2 "1:a2:bb:5:cccccc")) "lenprefix 6")
(assert (= nil (peg/match peg2 "1:a2:bb:7:cccccc")) "lenprefix 7")
(end-suite)