Add Peg combinators 'to' and 'thru'.

Inpsired by the REBOL operators of the same name, these
combinators match bytes up to or inculding a given pattern.
(to patt) is (almost) equalivalent to (any (if-not patt 1)), and
(thru patt) is equivalent to (* (to patt) patt). The one difference
is that if the end of the input is reached and patt is not
matched, the entire pattern does not match.
This commit is contained in:
Calvin Rose 2020-06-10 21:18:50 -05:00
parent 2595c8a853
commit 6d096551f0
5 changed files with 50 additions and 2 deletions

View File

@ -23,7 +23,7 @@ static int num_array_gc(void *p, size_t s) {
return 0;
}
int num_array_get(void *p, Janet key, Janet *out);
int num_array_get(void *p, Janet key, Janet *out);
void num_array_put(void *p, Janet key, Janet value);
static const JanetAbstractType num_array_type = {

View File

@ -213,7 +213,7 @@ static int64_t compare_double_double(double x, double y) {
static int64_t compare_int64_double(int64_t x, double y) {
if (isnan(y)) {
return 0; // clojure and python do this
} else if ((y > ((double) -MAX_INT_IN_DBL)) && (y < ((double) MAX_INT_IN_DBL))) {
} else if ((y > ((double) - MAX_INT_IN_DBL)) && (y < ((double) MAX_INT_IN_DBL))) {
double dx = (double) x;
return compare_double_double(dx, y);
} else if (y > ((double) INT64_MAX)) {

View File

@ -206,6 +206,29 @@ tail:
return (result) ? NULL : text;
}
case RULE_THRU:
case RULE_TO: {
const uint32_t *rule_a = s->bytecode + rule[1];
const uint8_t *next_text;
CapState cs = cap_save(s);
down1(s);
while (text < s->text_end) {
CapState cs2 = cap_save(s);
next_text = peg_rule(s, rule_a, text);
if (next_text) {
if (rule[0] == RULE_TO) cap_load(s, cs2);
break;
}
text++;
}
up1(s);
if (text >= s->text_end) {
cap_load(s, cs);
return NULL;
}
return rule[0] == RULE_TO ? text : next_text;
}
case RULE_BETWEEN: {
uint32_t lo = rule[1];
uint32_t hi = rule[2];
@ -765,6 +788,12 @@ static void spec_error(Builder *b, int32_t argc, const Janet *argv) {
static void spec_drop(Builder *b, int32_t argc, const Janet *argv) {
spec_onerule(b, argc, argv, RULE_DROP);
}
static void spec_to(Builder *b, int32_t argc, const Janet *argv) {
spec_onerule(b, argc, argv, RULE_TO);
}
static void spec_thru(Builder *b, int32_t argc, const Janet *argv) {
spec_onerule(b, argc, argv, RULE_THRU);
}
/* Rule of the form [rule, tag] */
static void spec_cap1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
@ -895,6 +924,8 @@ static const SpecialPair peg_specials[] = {
{"sequence", spec_sequence},
{"set", spec_set},
{"some", spec_some},
{"thru", spec_thru},
{"to", spec_to},
};
/* Compile a janet value into a rule and return the rule index. */
@ -1180,6 +1211,8 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
case RULE_ERROR:
case RULE_DROP:
case RULE_NOT:
case RULE_TO:
case RULE_THRU:
/* [rule] */
if (rule[1] >= blen) goto bad;
op_flags[rule[1]] |= 0x01;

View File

@ -1582,6 +1582,8 @@ typedef enum {
RULE_ERROR, /* [rule] */
RULE_DROP, /* [rule] */
RULE_BACKMATCH, /* [tag] */
RULE_TO, /* [rule] */
RULE_THRU, /* [rule] */
RULE_LENPREFIX, /* [rule_a, rule_b (repeat rule_b rule_a times)] */
} JanetPegOpcode;

View File

@ -288,4 +288,17 @@ neldb\0\0\0\xD8\x05printG\x01\0\xDE\xDE\xDE'\x03\0marshal_tes/\x02
# Issue #412
(assert (peg/match '(* "a" (> -1 "a") "b") "abc") "lookhead does not move cursor")
(def peg3
~{:main (* "(" (thru ")"))})
(def peg4 (peg/compile ~(* (thru "(") '(to ")"))))
(assert (peg/match peg3 "(12345)") "peg thru 1")
(assert (not (peg/match peg3 " (12345)")) "peg thru 2")
(assert (not (peg/match peg3 "(12345")) "peg thru 3")
(assert (= "abc" (0 (peg/match peg4 "123(abc)"))) "peg thru/to 1")
(assert (= "abc" (0 (peg/match peg4 "(abc)"))) "peg thru/to 2")
(assert (not (peg/match peg4 "123(abc")) "peg thru/to 3")
(end-suite)