mirror of
https://github.com/janet-lang/janet
synced 2025-01-10 15:40:30 +00:00
add a new (split) PEG special
This works similarly to string/split, but the separator is a PEG.
This commit is contained in:
parent
e8ed961572
commit
61f38fab37
@ -39,7 +39,7 @@
|
|||||||
typedef struct {
|
typedef struct {
|
||||||
const uint8_t *text_start;
|
const uint8_t *text_start;
|
||||||
const uint8_t *text_end;
|
const uint8_t *text_end;
|
||||||
/* text_end will be restricted in a (sub) rule, but
|
/* text_end can be restricted by some rules, but
|
||||||
outer_text_end will always contain the real end of
|
outer_text_end will always contain the real end of
|
||||||
input, which we need to generate a line mapping */
|
input, which we need to generate a line mapping */
|
||||||
const uint8_t *outer_text_end;
|
const uint8_t *outer_text_end;
|
||||||
@ -510,6 +510,44 @@ tail:
|
|||||||
return window_end;
|
return window_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case RULE_SPLIT: {
|
||||||
|
const uint8_t *saved_end = s->text_end;
|
||||||
|
const uint32_t *rule_separator = s->bytecode + rule[1];
|
||||||
|
const uint32_t *rule_subpattern = s->bytecode + rule[2];
|
||||||
|
|
||||||
|
const uint8_t *separator_end = NULL;
|
||||||
|
do {
|
||||||
|
const uint8_t *text_start = text;
|
||||||
|
CapState cs = cap_save(s);
|
||||||
|
down1(s);
|
||||||
|
while (text <= s->text_end) {
|
||||||
|
separator_end = peg_rule(s, rule_separator, text);
|
||||||
|
cap_load(s, cs);
|
||||||
|
if (separator_end) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
text++;
|
||||||
|
}
|
||||||
|
up1(s);
|
||||||
|
|
||||||
|
if (separator_end) {
|
||||||
|
s->text_end = text;
|
||||||
|
text = separator_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
down1(s);
|
||||||
|
const uint8_t *subpattern_end = peg_rule(s, rule_subpattern, text_start);
|
||||||
|
up1(s);
|
||||||
|
s->text_end = saved_end;
|
||||||
|
|
||||||
|
if (!subpattern_end) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
} while (separator_end);
|
||||||
|
|
||||||
|
return s->text_end;
|
||||||
|
}
|
||||||
|
|
||||||
case RULE_REPLACE:
|
case RULE_REPLACE:
|
||||||
case RULE_MATCHTIME: {
|
case RULE_MATCHTIME: {
|
||||||
uint32_t tag = rule[3];
|
uint32_t tag = rule[3];
|
||||||
@ -1143,6 +1181,14 @@ static void spec_sub(Builder *b, int32_t argc, const Janet *argv) {
|
|||||||
emit_2(r, RULE_SUB, subrule1, subrule2);
|
emit_2(r, RULE_SUB, subrule1, subrule2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void spec_split(Builder *b, int32_t argc, const Janet *argv) {
|
||||||
|
peg_fixarity(b, argc, 2);
|
||||||
|
Reserve r = reserve(b, 3);
|
||||||
|
uint32_t subrule1 = peg_compile1(b, argv[0]);
|
||||||
|
uint32_t subrule2 = peg_compile1(b, argv[1]);
|
||||||
|
emit_2(r, RULE_SPLIT, subrule1, subrule2);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef JANET_INT_TYPES
|
#ifdef JANET_INT_TYPES
|
||||||
#define JANET_MAX_READINT_WIDTH 8
|
#define JANET_MAX_READINT_WIDTH 8
|
||||||
#else
|
#else
|
||||||
@ -1226,6 +1272,7 @@ static const SpecialPair peg_specials[] = {
|
|||||||
{"sequence", spec_sequence},
|
{"sequence", spec_sequence},
|
||||||
{"set", spec_set},
|
{"set", spec_set},
|
||||||
{"some", spec_some},
|
{"some", spec_some},
|
||||||
|
{"split", spec_split},
|
||||||
{"sub", spec_sub},
|
{"sub", spec_sub},
|
||||||
{"thru", spec_thru},
|
{"thru", spec_thru},
|
||||||
{"to", spec_to},
|
{"to", spec_to},
|
||||||
@ -1562,6 +1609,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
|
|||||||
i += 4;
|
i += 4;
|
||||||
break;
|
break;
|
||||||
case RULE_SUB:
|
case RULE_SUB:
|
||||||
|
case RULE_SPLIT:
|
||||||
/* [rule, rule] */
|
/* [rule, rule] */
|
||||||
if (rule[1] >= blen) goto bad;
|
if (rule[1] >= blen) goto bad;
|
||||||
if (rule[2] >= blen) goto bad;
|
if (rule[2] >= blen) goto bad;
|
||||||
|
@ -2141,7 +2141,8 @@ typedef enum {
|
|||||||
RULE_COLUMN, /* [tag] */
|
RULE_COLUMN, /* [tag] */
|
||||||
RULE_UNREF, /* [rule, tag] */
|
RULE_UNREF, /* [rule, tag] */
|
||||||
RULE_CAPTURE_NUM, /* [rule, tag] */
|
RULE_CAPTURE_NUM, /* [rule, tag] */
|
||||||
RULE_SUB /* [rule, rule] */
|
RULE_SUB, /* [rule, rule] */
|
||||||
|
RULE_SPLIT /* [rule, rule] */
|
||||||
} JanetPegOpcod;
|
} JanetPegOpcod;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -265,6 +265,7 @@
|
|||||||
(marshpeg '(group "abc"))
|
(marshpeg '(group "abc"))
|
||||||
(marshpeg '(sub "abcdf" "abc"))
|
(marshpeg '(sub "abcdf" "abc"))
|
||||||
(marshpeg '(* (sub 1 1)))
|
(marshpeg '(* (sub 1 1)))
|
||||||
|
(marshpeg '(split "," (+ "a" "b" "c")))
|
||||||
|
|
||||||
# Peg swallowing errors
|
# Peg swallowing errors
|
||||||
# 159651117
|
# 159651117
|
||||||
@ -710,5 +711,50 @@
|
|||||||
"abcdef"
|
"abcdef"
|
||||||
@[])
|
@[])
|
||||||
|
|
||||||
|
(test "split: basic functionality"
|
||||||
|
~(split "," '1)
|
||||||
|
"a,b,c"
|
||||||
|
@["a" "b" "c"])
|
||||||
|
|
||||||
|
(test "split: drops captures from separator pattern"
|
||||||
|
~(split '"," '1)
|
||||||
|
"a,b,c"
|
||||||
|
@["a" "b" "c"])
|
||||||
|
|
||||||
|
(test "split: can match empty subpatterns"
|
||||||
|
~(split "," ':w*)
|
||||||
|
",a,,bar,,,c,,"
|
||||||
|
@["" "a" "" "bar" "" "" "c" "" ""])
|
||||||
|
|
||||||
|
(test "split: subpattern is limited to only text before the separator"
|
||||||
|
~(split "," '(to -1))
|
||||||
|
"a,,bar,c"
|
||||||
|
@["a" "" "bar" "c"])
|
||||||
|
|
||||||
|
(test "split: fails if any subpattern fails"
|
||||||
|
~(split "," '"a")
|
||||||
|
"a,a,b"
|
||||||
|
nil)
|
||||||
|
|
||||||
|
(test "split: separator does not have to match anything"
|
||||||
|
~(split "x" '(to -1))
|
||||||
|
"a,a,b"
|
||||||
|
@["a,a,b"])
|
||||||
|
|
||||||
|
(test "split: always consumes entire input"
|
||||||
|
~(split 1 '"")
|
||||||
|
"abc"
|
||||||
|
@["" "" "" ""])
|
||||||
|
|
||||||
|
(test "split: separator can be an arbitrary PEG"
|
||||||
|
~(split :s+ '(to -1))
|
||||||
|
"a b c"
|
||||||
|
@["a" "b" "c"])
|
||||||
|
|
||||||
|
(test "split: does not advance past the end of the input"
|
||||||
|
~(* (split "," ':w+) 0)
|
||||||
|
"a,b,c"
|
||||||
|
@["a" "b" "c"])
|
||||||
|
|
||||||
(end-suite)
|
(end-suite)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user