mirror of
https://github.com/janet-lang/janet
synced 2025-01-13 00:50:26 +00:00
Add some preliminary capturing ability to PEGs.
This commit is contained in:
parent
40845b5c1b
commit
1efca2ebe7
@ -43,6 +43,7 @@ typedef struct {
|
||||
const uint8_t *text_start;
|
||||
const uint8_t *text_end;
|
||||
JanetTable *grammar;
|
||||
JanetArray *captures;
|
||||
int flags;
|
||||
} State;
|
||||
|
||||
@ -56,6 +57,10 @@ typedef struct {
|
||||
Matcher matcher;
|
||||
} MatcherPair;
|
||||
|
||||
/*
|
||||
* Primitive Pattern Types
|
||||
*/
|
||||
|
||||
/* Match a character range */
|
||||
int32_t match_range(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
if (s->text_end <= text)
|
||||
@ -82,6 +87,10 @@ int32_t match_set(State *s, int32_t argc, const Janet *argv, const uint8_t *text
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Combining Pattern Types
|
||||
*/
|
||||
|
||||
/* Match the first of argv[0], argv[1], argv[2], ... */
|
||||
int32_t match_choice(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
for (int32_t i = 0; i < argc; i++) {
|
||||
@ -173,11 +182,57 @@ int32_t match_between(State *s, int32_t argc, const Janet *argv, const uint8_t *
|
||||
return captured >= lo ? total_length : -1;
|
||||
}
|
||||
|
||||
/* Capture a value */
|
||||
int32_t match_capture(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
janet_fixarity(argc, 1);
|
||||
int32_t result = match(s, argv[0], text);
|
||||
if (result < 0) return -1;
|
||||
janet_array_push(s->captures, janet_stringv(text, result));
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Capture position */
|
||||
int32_t match_position(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
janet_fixarity(argc, 0);
|
||||
(void) argv;
|
||||
janet_array_push(s->captures, janet_wrap_number(text - s->text_start));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Capture group */
|
||||
int32_t match_group(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
janet_fixarity(argc, 1);
|
||||
int32_t old_count = s->captures->count;
|
||||
int32_t result = match(s, argv[0], text);
|
||||
if (result < 0) return -1;
|
||||
/* Collect sub-captures into an array by popping new values off of the capture stack,
|
||||
* and then putting them in a new array. Then, push hte new array back onto the capture stack. */
|
||||
int32_t num_sub_captures = s->captures->count - old_count;
|
||||
JanetArray *sub_captures = janet_array(num_sub_captures);
|
||||
memcpy(sub_captures->data, s->captures->data + old_count, sizeof(Janet) * num_sub_captures);
|
||||
sub_captures->count = num_sub_captures;
|
||||
s->captures->count = old_count;
|
||||
janet_array_push(s->captures, janet_wrap_array(sub_captures));
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Capture a constant */
|
||||
int32_t match_capture_constant(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||
(void) text;
|
||||
janet_fixarity(argc, 1);
|
||||
janet_array_push(s->captures, argv[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Lookup for special forms */
|
||||
static const MatcherPair specials[] = {
|
||||
{"*", match_sequence},
|
||||
{"+", match_choice},
|
||||
{"-", match_minus},
|
||||
{"<-", match_capture},
|
||||
{"<-c", match_capture_constant},
|
||||
{"<-p", match_position},
|
||||
{"<-g", match_group},
|
||||
{">", match_lookahead},
|
||||
{"at-least", match_atleast},
|
||||
{"at-most", match_atmost},
|
||||
@ -228,8 +283,11 @@ static int32_t match(State *s, Janet peg, const uint8_t *text) {
|
||||
if (!mp) janet_panicf("unknown special form %v", peg);
|
||||
if (s->depth-- == 0)
|
||||
janet_panic("recursed too deeply");
|
||||
int32_t old_capture_count = s->captures->count;
|
||||
int32_t result = mp->matcher(s, len - 1, items + 1, text);
|
||||
s->depth++;
|
||||
if (result < 0)
|
||||
s->captures->count = old_capture_count;
|
||||
return result;
|
||||
}
|
||||
case JANET_KEYWORD:
|
||||
@ -261,8 +319,9 @@ static Janet cfun_match(int32_t argc, Janet *argv) {
|
||||
s.text_end = bytes.bytes + bytes.len;
|
||||
s.depth = JANET_RECURSION_GUARD;
|
||||
s.grammar = NULL;
|
||||
s.captures = janet_array(10);
|
||||
int32_t result = match(&s, argv[0], bytes.bytes);
|
||||
return janet_wrap_boolean(result >= 0);
|
||||
return result >= 0 ? janet_wrap_array(s.captures) : janet_wrap_nil();
|
||||
}
|
||||
|
||||
static const JanetReg cfuns[] = {
|
||||
|
Loading…
Reference in New Issue
Block a user