1
0
mirror of https://github.com/janet-lang/janet synced 2024-06-18 11:19:56 +00:00

Address #641 - add undef combinator.

The (undef rule :tag) combinator lets a user "scope" tagged captures.
After the rule has matched, all captures with tag :tag can no longer be
refered to by their tag. However, such captures from outside
rule are kept as is. If no tag is given, all tagged captures from rule
are unreferenced. Note that this doesn't `drop` the captures, merely
removes their association with the tag. This means subsequent calls to
`backref` and `backmatch` will no longer "see" these tagged captures.
This commit is contained in:
Calvin Rose 2021-02-26 17:25:09 -06:00
parent a5f237993d
commit 96513665d6
3 changed files with 46 additions and 4 deletions

View File

@ -596,6 +596,30 @@ tail:
return text + width;
}
case RULE_UNREF: {
int32_t tcap = s->tags->count;
down1(s);
const uint8_t *result = peg_rule(s, s->bytecode + rule[1], text);
up1(s);
if (!result) return NULL;
int32_t final_tcap = s->tags->count;
/* Truncate tagged captures to not include items of the given tag */
int32_t w = tcap;
/* If no tag is given, drop ALL tagged captures */
if (rule[2]) {
for (int32_t i = tcap; i < final_tcap; i++) {
if (s->tags->data[i] != (0xFF & rule[2])) {
s->tags->data[w] = s->tags->data[i];
s->tagged_captures->data[w] = s->tagged_captures->data[i];
w++;
}
}
}
s->tags->count = w;
s->tagged_captures->count = w;
return result;
}
}
}
@ -919,15 +943,15 @@ static void spec_error(Builder *b, int32_t argc, const Janet *argv) {
spec_onerule(b, argc, argv, RULE_ERROR);
}
}
static void spec_drop(Builder *b, int32_t argc, const Janet *argv) {
spec_onerule(b, argc, argv, RULE_DROP);
}
static void spec_to(Builder *b, int32_t argc, const Janet *argv) {
spec_onerule(b, argc, argv, RULE_TO);
}
static void spec_thru(Builder *b, int32_t argc, const Janet *argv) {
spec_onerule(b, argc, argv, RULE_THRU);
}
static void spec_drop(Builder *b, int32_t argc, const Janet *argv) {
spec_onerule(b, argc, argv, RULE_DROP);
}
/* Rule of the form [rule, tag] */
static void spec_cap1(Builder *b, int32_t argc, const Janet *argv, uint32_t op) {
@ -947,6 +971,9 @@ static void spec_accumulate(Builder *b, int32_t argc, const Janet *argv) {
static void spec_group(Builder *b, int32_t argc, const Janet *argv) {
spec_cap1(b, argc, argv, RULE_GROUP);
}
static void spec_unref(Builder *b, int32_t argc, const Janet *argv) {
spec_cap1(b, argc, argv, RULE_UNREF);
}
static void spec_reference(Builder *b, int32_t argc, const Janet *argv) {
peg_arity(b, argc, 1, 2);
@ -1104,6 +1131,7 @@ static const SpecialPair peg_specials[] = {
{"to", spec_to},
{"uint", spec_uint_le},
{"uint-be", spec_uint_be},
{"unref", spec_unref},
};
/* Compile a janet value into a rule and return the rule index. */
@ -1392,6 +1420,7 @@ static void *peg_unmarshal(JanetMarshalContext *ctx) {
case RULE_ACCUMULATE:
case RULE_GROUP:
case RULE_CAPTURE:
case RULE_UNREF:
/* [rule, tag] */
if (rule[1] >= blen) goto bad;
op_flags[rule[1]] |= 0x01;

View File

@ -1841,7 +1841,8 @@ typedef enum {
RULE_LENPREFIX, /* [rule_a, rule_b (repeat rule_b rule_a times)] */
RULE_READINT, /* [(signedness << 4) | (endianess << 5) | bytewidth, tag] */
RULE_LINE, /* [tag] */
RULE_COLUMN /* [tag] */
RULE_COLUMN, /* [tag] */
RULE_UNREF /* [rule, tag] */
} JanetPegOpcod;
typedef struct {

View File

@ -481,4 +481,16 @@
(check-deep '(to "b") "aaaa" nil)
(check-deep '(thru "b") "aaaa" nil)
# unref
(def grammar
(peg/compile
~{:main (* :tagged -1)
:tagged (unref (replace (* :open-tag :value :close-tag) ,struct))
:open-tag (* (constant :tag) "<" (capture :w+ :tag-name) ">")
:value (* (constant :value) (group (any (+ :tagged :untagged))))
:close-tag (* "</" (backmatch :tag-name) ">")
:untagged (capture (any (if-not "<" 1)))}))
(check-deep grammar "<p><em>foobar</em></p>" @[{:tag "p" :value @[{:tag "em" :value @["foobar"]}]}])
(check-deep grammar "<p>foobar</p>" @[{:tag "p" :value @["foobar"]}])
(end-suite)