mirror of
https://github.com/janet-lang/janet
synced 2025-05-01 23:14:15 +00:00
Add some preliminary capturing ability to PEGs.
This commit is contained in:
parent
40845b5c1b
commit
1efca2ebe7
@ -43,6 +43,7 @@ typedef struct {
|
|||||||
const uint8_t *text_start;
|
const uint8_t *text_start;
|
||||||
const uint8_t *text_end;
|
const uint8_t *text_end;
|
||||||
JanetTable *grammar;
|
JanetTable *grammar;
|
||||||
|
JanetArray *captures;
|
||||||
int flags;
|
int flags;
|
||||||
} State;
|
} State;
|
||||||
|
|
||||||
@ -56,6 +57,10 @@ typedef struct {
|
|||||||
Matcher matcher;
|
Matcher matcher;
|
||||||
} MatcherPair;
|
} MatcherPair;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Primitive Pattern Types
|
||||||
|
*/
|
||||||
|
|
||||||
/* Match a character range */
|
/* Match a character range */
|
||||||
int32_t match_range(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
int32_t match_range(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||||
if (s->text_end <= text)
|
if (s->text_end <= text)
|
||||||
@ -82,6 +87,10 @@ int32_t match_set(State *s, int32_t argc, const Janet *argv, const uint8_t *text
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Combining Pattern Types
|
||||||
|
*/
|
||||||
|
|
||||||
/* Match the first of argv[0], argv[1], argv[2], ... */
|
/* Match the first of argv[0], argv[1], argv[2], ... */
|
||||||
int32_t match_choice(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
int32_t match_choice(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||||
for (int32_t i = 0; i < argc; i++) {
|
for (int32_t i = 0; i < argc; i++) {
|
||||||
@ -173,11 +182,57 @@ int32_t match_between(State *s, int32_t argc, const Janet *argv, const uint8_t *
|
|||||||
return captured >= lo ? total_length : -1;
|
return captured >= lo ? total_length : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Capture a value */
|
||||||
|
int32_t match_capture(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||||
|
janet_fixarity(argc, 1);
|
||||||
|
int32_t result = match(s, argv[0], text);
|
||||||
|
if (result < 0) return -1;
|
||||||
|
janet_array_push(s->captures, janet_stringv(text, result));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Capture position */
|
||||||
|
int32_t match_position(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||||
|
janet_fixarity(argc, 0);
|
||||||
|
(void) argv;
|
||||||
|
janet_array_push(s->captures, janet_wrap_number(text - s->text_start));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Capture group */
|
||||||
|
int32_t match_group(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||||
|
janet_fixarity(argc, 1);
|
||||||
|
int32_t old_count = s->captures->count;
|
||||||
|
int32_t result = match(s, argv[0], text);
|
||||||
|
if (result < 0) return -1;
|
||||||
|
/* Collect sub-captures into an array by popping new values off of the capture stack,
|
||||||
|
* and then putting them in a new array. Then, push hte new array back onto the capture stack. */
|
||||||
|
int32_t num_sub_captures = s->captures->count - old_count;
|
||||||
|
JanetArray *sub_captures = janet_array(num_sub_captures);
|
||||||
|
memcpy(sub_captures->data, s->captures->data + old_count, sizeof(Janet) * num_sub_captures);
|
||||||
|
sub_captures->count = num_sub_captures;
|
||||||
|
s->captures->count = old_count;
|
||||||
|
janet_array_push(s->captures, janet_wrap_array(sub_captures));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Capture a constant */
|
||||||
|
int32_t match_capture_constant(State *s, int32_t argc, const Janet *argv, const uint8_t *text) {
|
||||||
|
(void) text;
|
||||||
|
janet_fixarity(argc, 1);
|
||||||
|
janet_array_push(s->captures, argv[0]);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Lookup for special forms */
|
/* Lookup for special forms */
|
||||||
static const MatcherPair specials[] = {
|
static const MatcherPair specials[] = {
|
||||||
{"*", match_sequence},
|
{"*", match_sequence},
|
||||||
{"+", match_choice},
|
{"+", match_choice},
|
||||||
{"-", match_minus},
|
{"-", match_minus},
|
||||||
|
{"<-", match_capture},
|
||||||
|
{"<-c", match_capture_constant},
|
||||||
|
{"<-p", match_position},
|
||||||
|
{"<-g", match_group},
|
||||||
{">", match_lookahead},
|
{">", match_lookahead},
|
||||||
{"at-least", match_atleast},
|
{"at-least", match_atleast},
|
||||||
{"at-most", match_atmost},
|
{"at-most", match_atmost},
|
||||||
@ -228,8 +283,11 @@ static int32_t match(State *s, Janet peg, const uint8_t *text) {
|
|||||||
if (!mp) janet_panicf("unknown special form %v", peg);
|
if (!mp) janet_panicf("unknown special form %v", peg);
|
||||||
if (s->depth-- == 0)
|
if (s->depth-- == 0)
|
||||||
janet_panic("recursed too deeply");
|
janet_panic("recursed too deeply");
|
||||||
|
int32_t old_capture_count = s->captures->count;
|
||||||
int32_t result = mp->matcher(s, len - 1, items + 1, text);
|
int32_t result = mp->matcher(s, len - 1, items + 1, text);
|
||||||
s->depth++;
|
s->depth++;
|
||||||
|
if (result < 0)
|
||||||
|
s->captures->count = old_capture_count;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
case JANET_KEYWORD:
|
case JANET_KEYWORD:
|
||||||
@ -261,8 +319,9 @@ static Janet cfun_match(int32_t argc, Janet *argv) {
|
|||||||
s.text_end = bytes.bytes + bytes.len;
|
s.text_end = bytes.bytes + bytes.len;
|
||||||
s.depth = JANET_RECURSION_GUARD;
|
s.depth = JANET_RECURSION_GUARD;
|
||||||
s.grammar = NULL;
|
s.grammar = NULL;
|
||||||
|
s.captures = janet_array(10);
|
||||||
int32_t result = match(&s, argv[0], bytes.bytes);
|
int32_t result = match(&s, argv[0], bytes.bytes);
|
||||||
return janet_wrap_boolean(result >= 0);
|
return result >= 0 ? janet_wrap_array(s.captures) : janet_wrap_nil();
|
||||||
}
|
}
|
||||||
|
|
||||||
static const JanetReg cfuns[] = {
|
static const JanetReg cfuns[] = {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user